In [None]:
import pandas as pd
import numpy as np

events = pd.read_csv("game_events_combined.csv")
ball_pos = pd.read_csv("ball_pos_combined.csv")

# Helper Function for Infield
def is_in_infield(x, y):
    if pd.isna(x) or pd.isna(y):
        return False
    dist = np.sqrt(x**2 + y**2)
    angle_deg = np.degrees(np.arctan2(x, y))
    return (dist <= 95) and (-75 <= angle_deg <= 75)

#Get hits and first bounce per play
hits = events[events["event_code"] == 4][["game_str", "play_id", "timestamp"]].rename(columns={"timestamp": "hit_time"})
bounces = events[events["event_code"] == 16][["game_str", "play_id", "timestamp"]].rename(columns={"timestamp": "bounce_time"})
first_bounce = bounces.sort_values("bounce_time").drop_duplicates(subset=["game_str", "play_id"], keep="first")

# Merge and ensure bounce is after hit
merged = hits.merge(first_bounce, on=["game_str", "play_id"], how="inner")
merged = merged[merged["bounce_time"] > merged["hit_time"]]

#Get ball position at first bounce
bounce_with_pos = merged.merge(
    ball_pos[["game_str", "play_id", "timestamp", "ball_position_x", "ball_position_y"]],
    left_on=["game_str", "play_id", "bounce_time"],
    right_on=["game_str", "play_id", "timestamp"],
    how="left"
)

# Filter to only bounces that landed in the infield
bounce_with_pos["infield_bounce"] = bounce_with_pos.apply(
    lambda row: is_in_infield(row["ball_position_x"], row["ball_position_y"]),
    axis=1
)
bounce_with_pos = bounce_with_pos[bounce_with_pos["infield_bounce"]]

# Get first acquisition per play
acquisitions = events[events["event_code"] == 2][["game_str", "play_id", "timestamp", "player_position"]]
first_acq = acquisitions.sort_values("timestamp").drop_duplicates(subset=["game_str", "play_id"], keep="first")

# Only keep plays where the first acquisition was by an infielder / pitcher / catcher
infield_positions = [1, 2, 3, 4, 5, 6]
first_acq["fielder_is_inf"] = first_acq["player_position"].isin(infield_positions)
first_acq = first_acq[first_acq["fielder_is_inf"]]

# Merge final valid plays
valid_bounces = bounce_with_pos.merge(
    first_acq[["game_str", "play_id"]],
    on=["game_str", "play_id"],
    how="inner"
).drop_duplicates(subset=["game_str", "play_id"])


In [None]:
valid_bounces = valid_bounces.drop(columns=["timestamp"])
valid_bounces = valid_bounces.rename(columns={"bounce_time": "bounce_timestamp"})

In [18]:
valid_bounces.to_csv("grounders.csv")