# Setup 

In [22]:
import os 
import pandas as pd 
import numpy as np 

folder_path = os.environ.get("NFL_DATA_PATH") 
results_path = os.environ.get("NFL_RESULTS_PATH") 

In [3]:
print(folder_path )

C:\Users\rjfis\OneDrive - Richard Fischer\Documents\NFL Big Data Bowl\Data\nfl-big-data-bowl-2026-analytics\114239_nfl_competition_files_published_analytics_final


## Process Data 

In [19]:
# function to get the distance between two points from coordinates in dataframe columns
def calc_distance(df, cols1, cols2): 
    return ((df[cols1[0]] - df[cols2[0]])**2 + (df[cols1[1]] - df[cols2[1]])**2)**0.5 

In [31]:
weeknum = 1 

# loop through each week and process the data 
df_all = pd.DataFrame() 
for weeknum in range(1, 18): 
    print(f"Processing week {weeknum}") 

    # read in the dataframes 
    df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
    df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

    # filter to the last frame of each play 
    df1_last = df1.groupby(["game_id", "play_id"])["frame_id"].max().reset_index().rename(columns={"frame_id": "last_frame_id"}) 
    df1 = df1.merge(df1_last, on = ["game_id", "play_id"], how = "inner") 
    df1_last = df1.loc[df1["frame_id"] == df1["last_frame_id"]].drop(columns = ["last_frame_id"]).rename(columns = {"frame_id": "input_frames"}) 
    df2_last = df2.groupby(["game_id", "play_id"])["frame_id"].max().reset_index().rename(columns={"frame_id": "last_frame_id"})  
    df2 = df2.merge(df2_last, on = ["game_id", "play_id"], how = "inner") 
    df2_last = df2.loc[df2["frame_id"] == df2["last_frame_id"]].drop(columns = ["last_frame_id"]).rename(columns = {"frame_id": "output_frames"}) 

    # combine the last from for both dataframes 
    df1_last = df1_last.rename(columns={"x": "x1", "y": "y1"}) 
    df2_last = df2_last.rename(columns={"x": "x2", "y": "y2"}) 
    df_last = df1_last.merge(df2_last, on = ["game_id", "play_id", "nfl_id"], how = "left") 

    # flag whether or not the player has both input and output frames 
    df_last["has_output"] = np.where(df_last["x2"].isna(), 0, 1) 

    # calculate the play timing 
    df_last["input_time"] = df_last["input_frames"] / 10 
    df_last["output_time"] = df_last["output_frames"] / 10 
    df_last["total_time"] = df_last["input_time"] + df_last["output_time"] 

    # calculate the distances before and after the throw 
    df_last["distance_before"] = calc_distance(df_last, ["x1", "y1"], ["ball_land_x", "ball_land_y"]) 
    df_last["distance_after"] = calc_distance(df_last, ["x2", "y2"], ["ball_land_x", "ball_land_y"]) 

    # append to the overall dataframe 
    df_all = pd.concat([df_all, df_last], axis = 0) 

# create some unique keys 
df_all["game_play_key"] = df_all["game_id"].astype(str) + "-" + df_all["play_id"].astype(str) 
df_all["play_player_key"] = df_all["game_play_key"] + "-" + df_all["nfl_id"].astype(str)

# save to a csv 
df_all.to_csv(f"{folder_path}//defender_analysis.csv", index = False) 

df_all.head() 

# df_last.head() 

# df_last

Processing week 1
Processing week 2
Processing week 3
Processing week 4
Processing week 5
Processing week 6
Processing week 7
Processing week 8
Processing week 9
Processing week 10
Processing week 11
Processing week 12
Processing week 13
Processing week 14
Processing week 15
Processing week 16
Processing week 17


Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,input_frames,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,x2,y2,has_output,input_time,output_time,total_time,distance_before,distance_after,game_play_key,play_player_key
0,2023090700,101,False,54527,26,right,42,Bryan Cook,6-1,210,...,,,0,2.6,,,36.314143,,2023090700-101,2023090700-101-54527
1,2023090700,101,True,46137,26,right,42,Justin Reid,6-1,204,...,62.87,4.63,1,2.6,2.1,4.7,19.375388,4.865655,2023090700-101,2023090700-101-46137
2,2023090700,101,True,52546,26,right,42,L'Jarius Sneed,6-1,193,...,52.48,5.38,1,2.6,2.1,4.7,19.820143,12.147772,2023090700-101,2023090700-101-52546
3,2023090700,101,False,53487,26,right,42,Nick Bolton,6-0,232,...,,,0,2.6,,,26.305413,,2023090700-101,2023090700-101-53487
4,2023090700,101,False,54486,26,right,42,Trent McDuffie,5-11,195,...,,,0,2.6,,,44.342922,,2023090700-101,2023090700-101-54486
