# Setup 

In [97]:
import os 
import pandas as pd 
import numpy as np 
import plotly.graph_objects as go 

folder_path = os.environ.get("NFL_DATA_PATH") 
results_path = os.environ.get("NFL_RESULTS_PATH") 

pd.options.mode.chained_assignment = None 

## Helper Functions 

In [98]:
# function to get the distance between two points from coordinates in dataframe columns
def calc_distance(df, cols1, cols2): 
    return ((df[cols1[0]] - df[cols2[0]])**2 + (df[cols1[1]] - df[cols2[1]])**2)**0.5 

# function to transform the x coordinate 
def calc_new_x(df, ycol): 
    xnew = 53.3 - df[ycol] 

    # flip if needed 
    xnew = np.where(df["flip_y"], 53.3 - xnew, xnew)

    return xnew  

# function to transform the y coordinate 
def calc_new_y(df, xcol):
    ynew = df[xcol]

    # flip if needed 
    ynew = np.where(df["flip_y"], 120 - ynew, ynew) 

    return ynew 

# function to transform the angle 
def calc_new_angle(df, acol):
    anglenew = df[acol] - 90

    # flip if needed 
    anglenew = np.where(df["flip_y"], (anglenew + 180) % 360, anglenew) 
    
    return anglenew  

# Metrics 

## import_playdata 

In [99]:
# import the data 
def import_playdata(weeknum):

    # read in the dataframes 
    df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
    df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

    # create the keys 
    df1["game_play_key"] = df1["game_id"].astype(str) + "-" + df1["play_id"].astype(str) 
    df2["game_play_key"] = df2["game_id"].astype(str) + "-" + df2["play_id"].astype(str) 
    df1["play_player_key"] = df1["game_play_key"] + "-" + df1["nfl_id"].astype(str) 
    df2["play_player_key"] = df2["game_play_key"] + "-" + df2["nfl_id"].astype(str) 

    return df1, df2 

# df1, df2 = import_playdata(1) 
# df2.head() 

## add_play_metadata 

In [100]:
# add metadata for each play 
def add_play_metadata(df1, df2): 

    # get the ending frames for each play 
    df1_ends = df1.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "before_frames"}) 
    df2_ends = df2.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "after_frames"}) 
    df1 = df1.merge(df1_ends, on = "game_play_key", how = "left") 
    df2 = df2.merge(df2_ends, on = "game_play_key", how = "left") 
    df1["before_time"] = df1["before_frames"] / 10 
    df2["after_time"] = df2["after_frames"] / 10 

    # calculate the difference in x from start to end for each player 
    df1_start = df1.loc[df1["frame_id"] == 1][["play_player_key", "game_play_key", "x"]].rename(columns={"x": "start_x"}) 
    df2_start = df2.loc[df2["frame_id"] == 1][["play_player_key", "game_play_key", "x"]].rename(columns={"x": "end_x"}) 
    df_starts = df1_start.merge(df2_start, on = ["play_player_key", "game_play_key"], how = "inner") 
    df_starts["xdelta"] = df_starts["end_x"] - df_starts["start_x"] 

    # calculate if the play was going left or right (from the sideline perspective) 
    df_avg = df_starts.groupby("game_play_key")["xdelta"].mean().reset_index().rename(columns={"xdelta": "avg_xdelta"}) 
    df_avg["play_direction"] = np.where(df_avg["avg_xdelta"] > 0, "right", "left") 
    df_avg["flip_y"] = np.where(df_avg["play_direction"] == "left", True, False) 

    # merge everything together 
    df1 = df1.merge(df_avg[["game_play_key", "play_direction", "flip_y"]], on = "game_play_key", how = "left") 
    df2 = df2.merge(df_avg[["game_play_key", "play_direction", "flip_y"]], on = "game_play_key", how = "left") 

    # transform the coordinates and angles 
    df1["x_new"] = calc_new_x(df1, "y") 
    df1["y_new"] = calc_new_y(df1, "x")  
    df1["o_new"] = calc_new_angle(df1, "o") 
    df1["dir_new"] = calc_new_angle(df1, "dir") 
    df1["ball_land_x_new"] = calc_new_x(df1, "ball_land_y") 
    df1["ball_land_y_new"] = calc_new_y(df1, "ball_land_x") 
    df2["x_new"] = calc_new_x(df2, "y") 
    df2["y_new"] = calc_new_y(df2, "x") 
    
    # rename the position columns 
    df1 = (
        df1.drop(columns=["x", "y", "o", "dir", "ball_land_x", "ball_land_y"])
        .rename(columns={"x_new": "x", "y_new": "y", "o_new": "o", "dir_new": "dir", "ball_land_x_new": "ball_land_x", "ball_land_y_new": "ball_land_y"}) 
    ) 
    df2 = df2.drop(columns=["x", "y"]).rename(columns={"x_new": "x", "y_new": "y"}) 

    # get the center of the play 
    df_plays = (
        df1[(df1['player_position'] == 'QB') & (df1['frame_id'] == 1)]
        [['game_play_key', 'y']]
        .rename(columns={'y': 'qb_y'})
    ) 

    # merge to df1 and df2 
    df1 = df1.merge(df_plays, on = "game_play_key", how = "left") 
    df2 = df2.merge(df_plays, on = "game_play_key", how = "left") 

    return df1, df2 

# test the function 
df1, df2 = import_playdata(1) 
df1, df2 = add_play_metadata(df1, df2) 
df2.head() 

Unnamed: 0,game_id,play_id,nfl_id,frame_id,game_play_key,play_player_key,after_frames,after_time,play_direction,flip_y,x,y,qb_y
0,2023090700,101,46137,1,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.02,56.22,37.36
1,2023090700,101,46137,2,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.42,56.63,37.36
2,2023090700,101,46137,3,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.84,57.06,37.36
3,2023090700,101,46137,4,2023090700-101,2023090700-101-46137,21,2.1,right,False,37.28,57.48,37.36
4,2023090700,101,46137,5,2023090700-101,2023090700-101-46137,21,2.1,right,False,37.74,57.91,37.36


## calc_starts_ends 

In [101]:
# calculate the starting and ending positions for each play 
def calc_starts_ends(df1, df2): 

    # get the start positions 
    df1_starts = df1.loc[df1["frame_id"] == 1].rename(columns = {
        "x": "before_x0", 
        "y": "before_y0", 
        "s": "before_s0", 
        "a": "before_a0", 
        "dir": "before_dir0", 
        "o": "before_o0" 
    }) 
    df1_ends = df1.loc[df1["frame_id"] == df1["before_frames"]][["play_player_key", "x", "y", "s", "a", "dir", "o"]].rename(columns = {
        "x": "before_x1", 
        "y": "before_y1", 
        "s": "before_s1",
        "a": "before_a1", 
        "dir": "before_dir1", 
        "o": "before_o1"
    }) 

    # get the end positions 
    df2_starts = df2.loc[df2["frame_id"] == 1][["play_player_key", "after_time", "x", "y"]].rename(columns = {
        "x": "after_x0", 
        "y": "after_y0" 
    }) 
    df2_ends = df2.loc[df2["frame_id"] == df2["after_frames"]][["play_player_key", "x", "y"]].rename(columns = {
        "x": "after_x1", 
        "y": "after_y1" 
    }) 

    # merge the dataframes together 
    df = (
        df1_starts.merge(df1_ends, on = "play_player_key", how = "left") 
        .merge(df2_starts, on = "play_player_key", how = "left") 
        .merge(df2_ends, on = "play_player_key", how = "left")
    ) 

    return df 

## air_speed_calcs 

In [102]:
# function to calculate speed and acceleration during the air portion of the play 
def air_speed_calcs(df1, df2): 

    # add the ball landing position to the dataframe 
    df_lands = df1[["game_play_key", "ball_land_x", "ball_land_y"]].drop_duplicates() 
    df2 = df2.merge(df_lands, on = "game_play_key", how = "left") 

    # get the player ending positions before the throw 
    df_ends = (
        df1.loc[df1["frame_id"] == df1["before_frames"]]
        [["play_player_key", "x", "y", "a", "s", "o", "dir", "ball_land_x", "ball_land_y"]] 
    )
    
    # calculate the angle to the ball landing position 
    df_ends["angle_to_ball"] = np.degrees(np.arctan2(
        df_ends["ball_land_x"] - df_ends["x"], 
        df_ends["ball_land_y"] - df_ends["y"]
    ))
    
    # calculate difference between movement direction and ball direction
    df_ends["dir_diff"] = df_ends["dir"] - df_ends["angle_to_ball"] 
    df_ends["dir_diff"] = ((df_ends["dir_diff"] + 180) % 360) - 180
    
    # calculate speed component toward ball (positive = toward, negative = away)
    df_ends["speed_to_ball_before"] = df_ends["s"] * np.cos(np.radians(df_ends["dir_diff"])) 

    # calculate the distance to the ball landing position at each frame 
    df2["dist_to_ball_land"] = calc_distance(df2, ["x", "y"], ["ball_land_x", "ball_land_y"]) 

    # lag the distances 
    df2 = df2.sort_values(by = ["play_player_key", "frame_id"]) 
    df2["dist_to_ball_land-1"] = df2.groupby("play_player_key")["dist_to_ball_land"].shift(1) 

    # calculate the frame-to-frame distance and speed
    df2["frame_dist"] = df2["dist_to_ball_land-1"] - df2["dist_to_ball_land"] 
    df2["frame_speed"] = df2["frame_dist"] * 10  

    # calculate the acceleration 
    df2["frame_speed-1"] = df2.groupby("play_player_key")["frame_speed"].shift(1) 
    df2["frame_accel"] = (df2["frame_speed"] - df2["frame_speed-1"]) * 10 

    # subset the columns 
    df2 = df2[["play_player_key", "frame_id", "frame_dist", "frame_speed", "frame_accel"]] 

    # aggregate the metrics 
    df2_agg = df2.groupby("play_player_key").agg(
        MAX_AIR_SPEED = ("frame_speed", "max"),
        AVG_AIR_SPEED = ("frame_speed", "mean"), 
        MIN_AIR_SPEED = ("frame_speed", "min"), 
        MAX_AIR_ACCEL = ("frame_accel", "max"),
        AVG_AIR_ACCEL = ("frame_accel", "mean"), 
        MIN_AIR_ACCEL = ("frame_accel", "min"), 
        TOTAL_AIR_DIST = ("frame_dist", "sum") 
    ).reset_index() 

    # add the before speed 
    df2_agg = df2_agg.merge(
        df_ends[["play_player_key", "speed_to_ball_before"]], 
        on = "play_player_key", 
        how = "left"
    ) 

    return df2, df2_agg 

## calc_matchup_distances 

In [103]:
# calculate distances between players at the start of the play 
def calc_matchup_distances(df): 

    # get the starting position of the targeted receiver 
    dfr = df.loc[df["player_to_predict"]][["game_play_key", "before_x0", "before_y0", "before_x1", "before_y1"]].rename(columns={
        "before_x0": "target_x", 
        "before_y0": "target_y", 
        "before_x1": "target_x1",
        "before_y1": "target_y1" 
    }) 

    # get the starting positions of all defenders 
    dfd = df.loc[df["player_side"] == "Defense"][["game_play_key", "play_player_key", "before_x0", "before_y0", "before_x1", "before_y1"]].rename(columns={
        "before_x0": "defender_x",
        "before_y0": "defender_y", 
        "before_x1": "defender_x1",
        "before_y1": "defender_y1" 
    }) 

    # calculate distances and rank them 
    dfd = dfr.merge(dfd, on = "game_play_key", how = "inner") 
    dfd["defender_start_dist"] = calc_distance(dfd, ["defender_x", "defender_y"], ["target_x", "target_y"]) 
    dfd["defender_rank"] = dfd.groupby("game_play_key")["defender_start_dist"].rank(method = "min") 
    dfd["defender_dist_before1"] = calc_distance(dfd, ["defender_x1", "defender_y1"], ["target_x1", "target_y1"]) 
    dfd["defender_dist_before1_rank"] = dfd.groupby("game_play_key")["defender_dist_before1"].rank(method = "min") 

    return dfd[["play_player_key", "defender_start_dist", "defender_rank", "defender_dist_before1", "defender_dist_before1_rank"]]

# calc_matchup_distances(df)

## calc_position_metrics 

In [104]:
# function to calculate position metrics 
def calc_position_metrics(df): 

    # calculate distances and speeds 
    df["ball_dist_before"] = calc_distance(df, ["before_x1", "before_y1"], ["ball_land_x", "ball_land_y"]) 
    df["ball_dist_after"] = calc_distance(df, ["after_x1", "after_y1"], ["ball_land_x", "ball_land_y"]) 
    df["air_move_dist"] = calc_distance(df, ["after_x0", "after_y0"], ["after_x1", "after_y1"]) 
    df["air_move_speed"] = df["air_move_dist"] / df["after_time"]

    # rank by defender distance 
    #df2 = df.loc[df["player_side"] == "Defense"]
    df["ball_dist_after_rank"] = df.groupby("game_play_key")["ball_dist_after"].rank(method = "min") 
    #df = df.merge(df2[["play_player_key", "ball_dist_after_rank"]], on = "play_player_key", how = "left") 

    # display(df)

    return df 

## calc_angles 

In [105]:
# function to calculate angles based on coordinates 
def calc_angles(xc0, yc0, xc1, yc1): 

    # calculate the angle in degrees 
    angle = np.degrees(np.arctan2(xc1 - xc0, yc1 - yc0)) 

    # normalize between -180 and 180 
    angle = ((angle + 180) % 360) - 180 

    return angle 

# calc_angles(0, 0, 0.11, -1)  # test the function 

# classify angles into categories 
def classify_angles(angles, labels = ["left", "forward", "right", "back"]):
    return np.where(
        (angles > -135) & (angles <= -45), labels[0], 
        np.where(
            (angles > -45) & (angles <= 45), labels[1], 
            np.where(
                (angles > 45) & (angles <= 135), labels[2], 
                labels[3]
            )
        ) 
    )

def calc_angle_cols(df): 

    # noarmalize the angles between -180 and 180 
    df["before_o1"] = ((df["before_o1"] + 180) % 360) - 180 
    df["before_dir1"] = ((df["before_dir1"] + 180) % 360) - 180 

    # calculate the angle to the ball landing point 
    df["angle_to_ball"] = calc_angles(df["before_x1"], df["before_y1"], df["ball_land_x"], df["ball_land_y"]) 

    # categorize the angles 
    df["face_before"] = classify_angles(df["before_o1"], labels = ["right", "back", "left", "forward"]) 
    df["move_before"] = classify_angles(df["before_dir1"], labels = ["right", "back", "left", "forward"]) 
    df["angle_to_ball_cat"] = classify_angles(df["angle_to_ball"], labels = ["right", "back", "left", "forward"]) 

    # calculate movement relative to player orientation 
    df["move_to_face_angle"] = df["before_dir1"] - df["before_o1"] 
    df["move_to_face_angle"] = ((df["move_to_face_angle"] + 180) % 360) - 180 
    df["move_to_face"] = classify_angles(df["move_to_face_angle"], labels = ["side left", "forward", "side right", "backpedal"]) 

    # calculate movement relative to ball landing point 
    df["move_to_ball_angle"] = df["before_dir1"] - df["angle_to_ball"] 
    df["move_to_ball_angle"] = ((df["move_to_ball_angle"] + 180) % 360) - 180 
    df["move_to_ball"] = classify_angles(df["move_to_ball_angle"], labels = ["side left", "forward", "side right", "backpedal"]) 

    return df 

## Put Everything Together 

In [106]:
weeknum = 1 

# loop through each week and process the data 
df_all = pd.DataFrame() 
for weeknum in range(1, 2): 
    print(f"Processing week {weeknum}") 

    # import the data 
    df1, df2 = import_playdata(weeknum) 

    # add metadata for each play 
    df1, df2 = add_play_metadata(df1, df2) 

    # calculate the starting and ending positions for each play 
    df = calc_starts_ends(df1, df2) 

    # calculate position metrics 
    df = calc_position_metrics(df) 

    # filter to just the defense 
    df = df.loc[df["player_side"] == "Defense"]

    # calculate air speed metrics 
    df2_metrics, df2_agg = air_speed_calcs(df1, df2) 
    df = df.merge(df2_agg, on = "play_player_key", how = "left") 

    # calculate matchup distances 
    df_matchups = calc_matchup_distances(df) 
    df = df.merge(df_matchups, on = "play_player_key", how = "left") 

    # calculate angle metrics 
    df = calc_angle_cols(df) 

    # # filter to just the closest defender 
    # df = df.loc[df["ball_dist_after_rank"] == 1] 

    # append to the overall dataframe 
    df_all = pd.concat([df_all, df], axis = 0) 

# save to a dataframe 
df_all.to_csv(f"{folder_path}//position_analysis.csv", index = False) 

# showcase the data 
df_all.head() 

Processing week 1


Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction_x,absolute_yardline_number,player_name,player_height,player_weight,...,defender_dist_before1,defender_dist_before1_rank,angle_to_ball,face_before,move_before,angle_to_ball_cat,move_to_face_angle,move_to_face,move_to_ball_angle,move_to_ball
0,2023090700,101,False,54527,1,right,42,Bryan Cook,6-1,210,...,17.975386,7.0,80.441708,forward,back,left,-118.46,side left,-37.181708,forward
1,2023090700,101,False,54527,1,right,42,Bryan Cook,6-1,210,...,24.918485,8.0,80.441708,forward,back,left,-118.46,side left,-37.181708,forward
2,2023090700,101,True,46137,1,right,42,Justin Reid,6-1,204,...,0.0,1.0,67.41881,left,back,left,-50.82,side left,-23.24881,forward
3,2023090700,101,True,46137,1,right,42,Justin Reid,6-1,204,...,9.399415,4.0,67.41881,left,back,left,-50.82,side left,-23.24881,forward
4,2023090700,101,True,52546,1,right,42,L'Jarius Sneed,6-1,193,...,9.399415,4.0,39.698237,forward,left,back,-117.29,side left,62.481763,side right


In [107]:
df2.head() 

Unnamed: 0,game_id,play_id,nfl_id,frame_id,game_play_key,play_player_key,after_frames,after_time,play_direction,flip_y,x,y,qb_y
0,2023090700,101,46137,1,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.02,56.22,37.36
1,2023090700,101,46137,2,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.42,56.63,37.36
2,2023090700,101,46137,3,2023090700-101,2023090700-101-46137,21,2.1,right,False,36.84,57.06,37.36
3,2023090700,101,46137,4,2023090700-101,2023090700-101-46137,21,2.1,right,False,37.28,57.48,37.36
4,2023090700,101,46137,5,2023090700-101,2023090700-101-46137,21,2.1,right,False,37.74,57.91,37.36


In [108]:
# get the percentiles 
vals = df["AVG_AIR_SPEED"].loc[~df["AVG_AIR_SPEED"].isna()] 
pcts = np.linspace(0, 100, 100)
pct_vals = np.percentile(vals, pcts) 

# showcase in a scatter plot 
fig = go.Figure() 
fig.add_trace(go.Scatter(
    x=pcts, 
    y=pct_vals, 
    mode='lines+markers', 
    name='AVG_AIR_SPEED Percentiles'
)) 

fig.update_layout(
    title='Average Air Speed Percentiles',
    xaxis_title='Percentile',
    yaxis_title='Average Air Speed (yards/second)',
    template='plotly_white'
) 

fig.show() 

# Other 

In [109]:
# weeknum = 1 

# # read in the dataframes 
# df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
# df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

# # create the keys 
# df1["game_play_key"] = df1["game_id"].astype(str) + "-" + df1["play_id"].astype(str) 
# df2["game_play_key"] = df2["game_id"].astype(str) + "-" + df2["play_id"].astype(str) 
# df1["play_player_key"] = df1["game_play_key"] + "-" + df1["nfl_id"].astype(str) 
# df2["play_player_key"] = df2["game_play_key"] + "-" + df2["nfl_id"].astype(str) 

# # get the ending frames for each play 
# df1_ends = df1.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "before_frames"}) 
# df2_ends = df2.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "after_frames"}) 
# df1 = df1.merge(df1_ends, on = "game_play_key", how = "left") 
# df2 = df2.merge(df2_ends, on = "game_play_key", how = "left") 
# df1["before_time"] = df1["before_frames"] / 10 
# df2["after_time"] = df2["after_frames"] / 10 

# # get the start positions 
# df1_starts = df1.loc[df1["frame_id"] == 1].rename(columns = {
#     "x": "before_x0", 
#     "y": "before_y0", 
#     "s": "before_s0", 
#     "a": "before_a0", 
#     "dir": "before_dir0", 
#     "o": "before_o0" 
# }) 
# df1_ends = df1.loc[df1["frame_id"] == df1["before_frames"]][["play_player_key", "x", "y", "s", "a", "dir", "o"]].rename(columns = {
#     "x": "before_x1", 
#     "y": "before_y1", 
#     "s": "before_s1",
#     "a": "before_a1", 
#     "dir": "before_dir1", 
#     "o": "before_o1"
# }) 

# # get the end positions 
# df2_starts = df2.loc[df2["frame_id"] == 1][["play_player_key", "after_time", "x", "y"]].rename(columns = {
#     "x": "after_x0", 
#     "y": "after_y0" 
# }) 
# df2_ends = df2.loc[df2["frame_id"] == df2["after_frames"]][["play_player_key", "x", "y"]].rename(columns = {
#     "x": "after_x1", 
#     "y": "after_y1" 
# }) 

# # merge the dataframes together 
# df = (
#     df1_starts.merge(df1_ends, on = "play_player_key", how = "left") 
#     .merge(df2_starts, on = "play_player_key", how = "left") 
#     .merge(df2_ends, on = "play_player_key", how = "left")
# ) 

# # df1_starts.head() 



# df.head() 

# Process Data 

In [110]:
# weeknum = 1 

# # loop through each week and process the data 
# df_all = pd.DataFrame() 
# for weeknum in range(1, 18): 
#     print(f"Processing week {weeknum}") 

#     # read in the dataframes 
#     df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
#     df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

#     # filter to the last frame of each play 
#     df1_last = df1.groupby(["game_id", "play_id"])["frame_id"].max().reset_index().rename(columns={"frame_id": "last_frame_id"}) 
#     df1 = df1.merge(df1_last, on = ["game_id", "play_id"], how = "inner") 
#     df1_last = df1.loc[df1["frame_id"] == df1["last_frame_id"]].drop(columns = ["last_frame_id"]).rename(columns = {"frame_id": "input_frames"}) 
#     df2_last = df2.groupby(["game_id", "play_id"])["frame_id"].max().reset_index().rename(columns={"frame_id": "last_frame_id"})  
#     df2 = df2.merge(df2_last, on = ["game_id", "play_id"], how = "inner") 
#     df2_last = df2.loc[df2["frame_id"] == df2["last_frame_id"]].drop(columns = ["last_frame_id"]).rename(columns = {"frame_id": "output_frames"}) 

#     # combine the last from for both dataframes 
#     df1_last = df1_last.rename(columns={"x": "x1", "y": "y1"}) 
#     df2_last = df2_last.rename(columns={"x": "x2", "y": "y2"}) 
#     df_last = df1_last.merge(df2_last, on = ["game_id", "play_id", "nfl_id"], how = "left") 

#     # flag whether or not the player has both input and output frames 
#     df_last["has_output"] = np.where(df_last["x2"].isna(), 0, 1) 

#     # calculate the play timing 
#     df_last["input_time"] = df_last["input_frames"] / 10 
#     df_last["output_time"] = df_last["output_frames"] / 10 
#     df_last["total_time"] = df_last["input_time"] + df_last["output_time"] 

#     # calculate the distances before and after the throw 
#     df_last["distance_before"] = calc_distance(df_last, ["x1", "y1"], ["ball_land_x", "ball_land_y"]) 
#     df_last["distance_after"] = calc_distance(df_last, ["x2", "y2"], ["ball_land_x", "ball_land_y"]) 

#     # append to the overall dataframe 
#     df_all = pd.concat([df_all, df_last], axis = 0) 

# # create some unique keys 
# df_all["game_play_key"] = df_all["game_id"].astype(str) + "-" + df_all["play_id"].astype(str) 
# df_all["play_player_key"] = df_all["game_play_key"] + "-" + df_all["nfl_id"].astype(str)

# # save to a csv 
# df_all.to_csv(f"{folder_path}//defender_analysis.csv", index = False) 

# df_all.head() 

# df_last.head() 

# df_last

# Defender Movement 

In [111]:
# weeknum = 1 

# # read in the dataframes 
# df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
# df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

# # get a unique list of players 
# df_players = df1[["nfl_id", "player_name", "player_position", "player_side", "player_to_predict"]].drop_duplicates().reset_index(drop=True) 
# # df_defenders = df_players[df_players["player_side"] == "Defense"].reset_index(drop=True)

# # add the player attributes to the output dataframe 
# df2 = df2.merge(df_players, on = "nfl_id", how = "left") 

# # create the keys 
# df1["game_play_key"] = df1["game_id"].astype(str) + "-" + df1["play_id"].astype(str) 
# df2["game_play_key"] = df2["game_id"].astype(str) + "-" + df2["play_id"].astype(str) 
# df1["play_player_key"] = df1["game_play_key"] + "-" + df1["nfl_id"].astype(str) 
# df2["play_player_key"] = df2["game_play_key"] + "-" + df2["nfl_id"].astype(str) 

# # get the center of the play 
# df_plays = (
#     df1[(df1['player_position'] == 'QB') & (df1['frame_id'] == 1)]
#     [['game_play_key', 'y', "ball_land_x", "ball_land_y"]]
#     .rename(columns={'y': 'qb_y'})
# ) 

# # calculate the difference in x from start to end for each player 
# df1_start = df1.loc[df1["frame_id"] == 1][["play_player_key", "game_play_key", "x"]].rename(columns={"x": "start_x"}) 
# df2_start = df2.loc[df2["frame_id"] == 1][["play_player_key", "game_play_key", "x"]].rename(columns={"x": "end_x"}) 
# df_starts = df1_start.merge(df2_start, on = ["play_player_key", "game_play_key"], how = "inner") 
# df_starts["xdelta"] = df_starts["end_x"] - df_starts["start_x"] 

# # calculate if the play was going left or right (from the sideline perspective) 
# df_avg = df_starts.groupby("game_play_key")["xdelta"].mean().reset_index().rename(columns={"xdelta": "avg_xdelta"}) 
# df_avg["play_direction"] = np.where(df_avg["avg_xdelta"] > 0, "right", "left") 

# # get the ending frames for each play 
# df1_ends = df1.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "before_frames"}) 
# df2_ends = df2.groupby("game_play_key")["frame_id"].max().reset_index().rename(columns={"frame_id": "after_frames"}) 

# # put everything together 
# df_plays = (
#     df_plays.merge(df_avg[["game_play_key", "play_direction"]], on = "game_play_key", how = "inner") 
#     .merge(df1_ends, on = "game_play_key", how = "inner") 
#     .merge(df2_ends, on = "game_play_key", how = "inner") 
# ) 

# # add the play summary stuff to the dataframes 
# df1 = df1.merge(df_plays, on = "game_play_key", how = "left") 
# df2 = df2.merge(df_plays, on = "game_play_key", how = "left") 

# df_plays.head() 

In [112]:
# # filter to defenders only 

# # get the start and end positions for each defender while the ball is in the air 
# df2_def = df2.loc[df2["player_side"] == "Defense"] 
# df2_def_start = df2_def.loc[df2_def["frame_id"] == 1][["play_player_key", "game_play_key", "x", "y"]].rename(columns={"x": "start_x", "y": "start_y"}) 
# df2_def_end = df2_def.groupby("play_player_key").agg({"frame_id": "max"}).reset_index().rename(columns={"frame_id": "end_frame_id"}) 