# Setup 

In [2]:
import os 
import pandas as pd 
import numpy as np 

# get the folder path from an environment variable 
folder_path = os.environ.get("NFL_DATA_PATH") 

# conversion factor for yards/second to miles/hour 
MPH_CONV = 3600 / 1760 

# turn off the pandas warning 
pd.options.mode.chained_assignment = None  # default='warn' 

# Functions 

## import_playdata 

In [3]:
# import the data 
def import_playdata(weeknum):

    # read in the dataframes 
    df1 = pd.read_csv(f"{folder_path}//train//input_2023_w{weeknum:02}.csv") 
    df2 = pd.read_csv(f"{folder_path}//train//output_2023_w{weeknum:02}.csv") 

    # create the keys 
    df1["game_play_key"] = df1["game_id"].astype(str) + "-" + df1["play_id"].astype(str) 
    df2["game_play_key"] = df2["game_id"].astype(str) + "-" + df2["play_id"].astype(str) 
    df1["play_player_key"] = df1["game_play_key"] + "-" + df1["nfl_id"].astype(str) 
    df2["play_player_key"] = df2["game_play_key"] + "-" + df2["nfl_id"].astype(str) 

    # get the play metadata 
    df_meta = df1[["play_player_key", "game_play_key", "nfl_id", "player_side", "player_to_predict"]].drop_duplicates() 

    # adjust the frame ids for the output data 
    df_frames = df1.groupby("game_play_key").agg(max_frame_id = ("frame_id", "max")).reset_index() 
    df2 = df2.merge(df_frames, on = "game_play_key", how = "left") 
    df2["frame_id"] = df2["max_frame_id"] + df2["frame_id"] 
    df2 = df2.drop(columns = ["max_frame_id"]) 

    # combine the tracking data from both 
    df1["source"] = "input" 
    df2["source"] = "output" 
    df_pos = pd.concat([
        df1[["play_player_key", "frame_id", "x", "y", "source"]], 
        df2[["play_player_key", "frame_id", "x", "y", "source"]] 
    ]).sort_values(["play_player_key", "frame_id"]).reset_index(drop = True) 

    # add in the ball landing position 
    df_ball = df1[["play_player_key", "ball_land_x", "ball_land_y"]].drop_duplicates() 
    df_pos = df_pos.merge(df_ball, on = "play_player_key", how = "left") 

    return df_meta, df_pos, df1 

# # test the import function 
# df_meta, df_pos, df1 = import_playdata(1) 
# df_pos.head() 

## calc_distance 

In [4]:
# function to get the distance between two points from coordinates in dataframe columns
def calc_distance(df, cols1, cols2): 
    return ((df[cols1[0]] - df[cols2[0]])**2 + (df[cols1[1]] - df[cols2[1]])**2)**0.5 

## calc_top_speed 

In [5]:
def calc_top_speed(df_pos): 

    # calculate the speed at each frame 
    df_pos["x_last"] = df_pos.groupby("play_player_key")["x"].shift(1) 
    df_pos["y_last"] = df_pos.groupby("play_player_key")["y"].shift(1) 
    df_pos["speed"] = calc_distance(df_pos, ["x", "y"], ["x_last", "y_last"]) * 10  # speed in feet per second (10 frames per second) 

    # get the top speed for each player in each play 
    df_speed = df_pos.groupby("play_player_key").agg(
        top_speed = ("speed", "max")
    ).reset_index() 

    # add the speed measurement in mph 
    df_speed["top_speed_mph"] = df_speed["top_speed"] * MPH_CONV 

    return df_speed 

# # test the function 
# df_meta, df_pos, df1 = import_playdata(1) 
# df_speed = calc_top_speed(df_pos) 
# df_speed.head() 

## calc_peak_accel 

In [6]:
def calc_peak_accel(df_pos): 

    # calculate the distance to the ball landing position at each frame 
    df_pos["dist_to_ball_land"] = calc_distance(df_pos, ["x", "y"], ["ball_land_x", "ball_land_y"]) 

    # calculate the acceleration to the ball at each frame 
    df_pos["dist_last"] = df_pos.groupby("play_player_key")["dist_to_ball_land"].shift(1) 
    df_pos["speed_to_ball"] = (df_pos["dist_last"] - df_pos["dist_to_ball_land"]) * 10  # speed in feet per second (10 frames per second) 
    df_pos["accel_to_ball"] = df_pos.groupby("play_player_key")["speed_to_ball"].diff() * 10  # acceleration in feet per second squared 

    # filter to just the output frames 
    df_pos = df_pos.loc[df_pos["source"] == "output"] 

    # calculate the peak acceleration in each play 
    df_accel = df_pos.groupby("play_player_key").agg(
        peak_accel = ("accel_to_ball", "max") 
    ).reset_index() 

    return df_accel 

# # test the function 
# df_meta, df_pos, df1 = import_playdata(1) 
# df_accel = calc_peak_accel(df_pos) 
# df_accel.head() 

## calc_separation 

In [7]:
def calc_separation(df_meta, df_pos): 

    # get the final output positions 
    has_output = df_meta.loc[df_meta["player_to_predict"]][["play_player_key"]] 
    df_pos = df_pos.merge(has_output, on = "play_player_key", how = "inner") 
    df_pos["desc_rank"] = df_pos.groupby("play_player_key")["frame_id"].rank(method = "first", ascending = False) 
    df_final = df_pos.loc[df_pos["desc_rank"] == 1] 

    # separate the receivers and the defenders 
    df_final = df_final.merge(df_meta, on = "play_player_key", how = "left") 
    df_receivers = df_final[df_final["player_side"] == "Offense"][["game_play_key", "nfl_id", "x", "y", "ball_land_x", "ball_land_y"]].rename(columns = {
        "nfl_id": "rec_nfl_id",
        "x": "rec_x", 
        "y": "rec_y" 
    }) 
    df_defenders = df_final[df_final["player_side"] == "Defense"][["game_play_key", "nfl_id", "x", "y"]].rename(columns = {
        "nfl_id": "def_nfl_id",
        "x": "def_x", 
        "y": "def_y" 
    }) 

    # calculate the separation between each receiver and defender in the play 
    df_separation = df_receivers.merge(df_defenders, on = "game_play_key", how = "inner") 
    df_separation["separation"] = calc_distance(df_separation, ["rec_x", "rec_y"], ["def_x", "def_y"]) 

    # get the closest defender for each receiver 
    df_separation = df_separation.sort_values(["game_play_key", "rec_nfl_id", "separation"]) 
    df_separation = df_separation.groupby(["game_play_key", "rec_nfl_id"]).first().reset_index() 

    # calculate the distance from both to the ball landing position 
    df_separation["rec_ball_end_dist"] = calc_distance(df_separation, ["rec_x", "rec_y"], ["ball_land_x", "ball_land_y"])
    df_separation["def_ball_end_dist"] = calc_distance(df_separation, ["def_x", "def_y"], ["ball_land_x", "ball_land_y"]) 

    return df_separation 

# # test the function 
# df_meta, df_pos, df1 = import_playdata(1) 
# df_separation = calc_separation(df_meta, df_pos) 
# df_separation.head() 

# Putting Everything Together 

In [8]:
# blank dataframes to append to 
df_speed = pd.DataFrame() 
df_accel = pd.DataFrame() 
df_separation = pd.DataFrame() 

# loop through each week and calculate the metrics 
for weeknum in range(1, 18):
    print(f"Processing week {weeknum}...") 
    df_meta, df_pos, df1 = import_playdata(weeknum) 

    # calculate top speed 
    new_speed = calc_top_speed(df_pos) 
    df_speed = pd.concat([df_speed, new_speed]) 

    # calculate defender acceleration to ball 
    new_accel = calc_peak_accel(df_pos) 
    df_accel = pd.concat([df_accel, new_accel]) 

    # calculate separation at catch 
    new_separation = calc_separation(df_meta, df_pos) 
    df_separation = pd.concat([df_separation, new_separation]) 
    
# showcase the results 
print("\nTop Speed:")
display(df_speed.head()) 
print("\nDefender Acceleration to Ball:") 
display(df_accel.head()) 
print("\nSeparation at Catch:") 
display(df_separation.head()) 

Processing week 1...
Processing week 2...
Processing week 3...
Processing week 4...
Processing week 5...
Processing week 6...
Processing week 7...
Processing week 8...
Processing week 9...
Processing week 10...
Processing week 11...
Processing week 12...
Processing week 13...
Processing week 14...
Processing week 15...
Processing week 16...
Processing week 17...

Top Speed:


Unnamed: 0,play_player_key,top_speed,top_speed_mph
0,2023090700-1001-41325,6.902898,14.119564
1,2023090700-1001-44822,2.19545,4.490693
2,2023090700-1001-44834,6.103278,12.483977
3,2023090700-1001-44888,3.612478,7.38916
4,2023090700-1001-44906,3.671512,7.509911



Defender Acceleration to Ball:


Unnamed: 0,play_player_key,peak_accel
0,2023090700-1001-44888,5.941749
1,2023090700-1001-47888,5.855379
2,2023090700-1001-53953,4.106192
3,2023090700-1001-55910,6.517535
4,2023090700-1001-55920,3.728402



Separation at Catch:


Unnamed: 0,game_play_key,rec_nfl_id,rec_x,rec_y,ball_land_x,ball_land_y,def_nfl_id,def_x,def_y,separation,rec_ball_end_dist,def_ball_end_dist
0,2023090700-1001,55920,117.57,36.21,115.610001,34.799999,55910,115.55,39.65,3.989236,2.414477,4.850372
1,2023090700-101,44930,62.49,2.83,63.259998,-0.22,46137,62.87,4.63,1.839674,3.145695,4.865655
2,2023090700-1069,53959,80.19,28.88,82.360001,29.030001,53487,76.14,25.49,5.281534,2.175179,7.156816
3,2023090700-1154,53541,51.36,15.51,52.959999,15.24,54486,48.51,13.26,3.631116,1.62262,4.870615
4,2023090700-1201,55899,48.84,13.01,51.389999,16.469999,54486,46.99,9.33,4.118847,4.29815,8.38687


## Join and Save 

In [9]:
# get a dataframe with the primary defender 
df_def = df_separation[["game_play_key", "def_nfl_id", "separation"]] 
df_def["play_player_key"] = df_def["game_play_key"] + "-" + df_def["def_nfl_id"].astype(str) 

# join in the speed and acceleration metrics 
df_def = (
    df_def.rename(columns = {"def_nfl_id": "nfl_id"})  
    .merge(
        df_speed[["play_player_key", "top_speed", "top_speed_mph"]],
        on = "play_player_key", how = "left" 
    )
    .merge(
        df_accel[["play_player_key", "peak_accel"]], 
        on = "play_player_key", how = "left"
    )  
) 

# save the final results 
df_def.to_csv(f"{folder_path}//defender_metrics.csv", index = False) 

# showcase the data 
display(df_def.head()) 

Unnamed: 0,game_play_key,nfl_id,separation,play_player_key,top_speed,top_speed_mph,peak_accel
0,2023090700-1001,55910,3.989236,2023090700-1001-55910,6.220129,12.72299,6.517535
1,2023090700-101,46137,1.839674,2023090700-101-46137,8.31685,17.011739,5.667892
2,2023090700-1069,53487,5.281534,2023090700-1069-53487,3.801316,7.775418,6.149146
3,2023090700-1154,54486,3.631116,2023090700-1154-54486,6.935416,14.186079,8.192108
4,2023090700-1201,54486,4.118847,2023090700-1201-54486,2.692582,5.507555,6.151817
