In [1]:
import pandas as pd
import numpy as np

In [2]:
injuryrecord = pd.read_csv('datasets/nfl-playing-surface-analytics/InjuryRecord.csv')
playertrackdata = pd.read_csv('datasets/nfl-playing-surface-analytics/PlayerTrackData.csv')
playlist = pd.read_csv('datasets/nfl-playing-surface-analytics/PlayList.csv')

In [3]:
playertrackdata.head()

Unnamed: 0,PlayKey,time,event,x,y,dir,dis,o,s
0,26624-1-1,0.0,huddle_start_offense,87.46,28.93,288.24,0.01,262.33,0.13
1,26624-1-1,0.1,,87.45,28.92,283.91,0.01,261.69,0.12
2,26624-1-1,0.2,,87.44,28.92,280.4,0.01,261.17,0.12
3,26624-1-1,0.3,,87.44,28.92,278.79,0.01,260.66,0.1
4,26624-1-1,0.4,,87.44,28.92,275.44,0.01,260.27,0.09


In [8]:
playlist

Unnamed: 0,PlayerKey,GameID,PlayKey,RosterPosition,PlayerDay,PlayerGame,StadiumType,FieldType,Temperature,Weather,PlayType,PlayerGamePlay,Position,PositionGroup
0,26624,26624-1,26624-1-1,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,1,QB,QB
1,26624,26624-1,26624-1-2,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,2,QB,QB
2,26624,26624-1,26624-1-3,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,3,QB,QB
3,26624,26624-1,26624-1-4,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,4,QB,QB
4,26624,26624-1,26624-1-5,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,5,QB,QB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267000,47888,47888-13,47888-13-51,Cornerback,99,13,Outdoor,Synthetic,33,Sunny and cold,Pass,51,DB,DB
267001,47888,47888-13,47888-13-52,Cornerback,99,13,Outdoor,Synthetic,33,Sunny and cold,Pass,52,DB,DB
267002,47888,47888-13,47888-13-53,Cornerback,99,13,Outdoor,Synthetic,33,Sunny and cold,Pass,53,DB,DB
267003,47888,47888-13,47888-13-54,Cornerback,99,13,Outdoor,Synthetic,33,Sunny and cold,Pass,54,DB,DB


In [5]:
# feature engineering for motion data
def engineer_motion_features(play: pd.DataFrame) -> pd.DataFrame:
    play = play.sort_values('time').copy()
    
    # time delta
    play['dt'] = play['time'].diff()
    
    # acceleration & jerk
    play['acceleration'] = play['s'].diff() / play['dt']
    play['jerk']         = play['acceleration'].diff() / play['dt']
    
    # turn_rate: change in motion direction
    dir_rad = np.deg2rad(play['dir'])
    dθ = (dir_rad.diff() + np.pi) % (2*np.pi) - np.pi
    play['turn_rate'] = dθ / play['dt']
    
    # orient_rate: change in facing orientation
    o_rad = np.deg2rad(play['o'])
    d_phi = (o_rad.diff() + np.pi) % (2*np.pi) - np.pi
    play['orient_rate'] = d_phi / play['dt']

    return play


# 3) Apply to every play and reset the index
df_features = (playertrackdata.groupby('PlayKey', group_keys=False).apply(engineer_motion_features).reset_index(drop=True))

# Now df_features has all your original columns plus dt, acceleration, jerk, turn_rate, orient_rate
print(df_features.head())

  df_features = (playertrackdata.groupby('PlayKey', group_keys=False).apply(engineer_motion_features).reset_index(drop=True))


     PlayKey  time                 event      x      y     dir   dis       o  \
0  26624-1-1   0.0  huddle_start_offense  87.46  28.93  288.24  0.01  262.33   
1  26624-1-1   0.1                   NaN  87.45  28.92  283.91  0.01  261.69   
2  26624-1-1   0.2                   NaN  87.44  28.92  280.40  0.01  261.17   
3  26624-1-1   0.3                   NaN  87.44  28.92  278.79  0.01  260.66   
4  26624-1-1   0.4                   NaN  87.44  28.92  275.44  0.01  260.27   

      s   dt  acceleration  jerk  turn_rate  orient_rate  
0  0.13  NaN           NaN   NaN        NaN          NaN  
1  0.12  0.1          -0.1   NaN  -0.755728    -0.111701  
2  0.12  0.1           0.0   1.0  -0.612611    -0.090757  
3  0.10  0.1          -0.2  -2.0  -0.280998    -0.089012  
4  0.09  0.1          -0.1   1.0  -0.584685    -0.068068  


In [12]:
injuryrecord.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
0,39873,39873-4,39873-4-32,Knee,Synthetic,1,1,1,1
1,46074,46074-7,46074-7-26,Knee,Natural,1,1,0,0
2,36557,36557-1,36557-1-70,Ankle,Synthetic,1,1,1,1
3,46646,46646-3,46646-3-30,Ankle,Natural,1,0,0,0
4,43532,43532-5,43532-5-69,Ankle,Synthetic,1,1,1,1


In [14]:
injury_keys = injuryrecord['PlayKey'].to_list()

What these capture:
- Speed and acceleration dynamics around the injury moment (sharp decels/jerks).
- Turn rates and orientation changes to show how abruptly the player pivoted or twisted.
- Total distance/time for overall context of how long and far they moved before the injury.

In [15]:
injuries = df_features[df_features['PlayKey'].isin(injury_keys)].copy()

In [16]:
injuries.head()

Unnamed: 0,PlayKey,time,event,x,y,dir,dis,o,s,dt,acceleration,jerk,turn_rate,orient_rate
2085848,31070-3-7,0.0,line_set,44.07,32.14,23.31,0.0,174.83,0.03,,,,,
2085849,31070-3-7,0.1,,44.08,32.14,20.18,0.0,175.09,0.03,0.1,0.0,,-0.546288,0.045379
2085850,31070-3-7,0.2,,44.08,32.14,16.53,0.0,175.35,0.03,0.1,0.0,0.0,-0.637045,0.045379
2085851,31070-3-7,0.3,,44.08,32.14,13.23,0.0,175.6,0.02,0.1,-0.1,-1.0,-0.575959,0.043633
2085852,31070-3-7,0.4,,44.08,32.14,9.78,0.0,175.82,0.02,0.1,0.0,1.0,-0.602139,0.038397


In [17]:
df_features.to_csv('datasets/nfl-playing-surface-analytics/PlayerTrackData_Engineered.csv', index=False)
injuries.to_csv('datasets/nfl-playing-surface-analytics/Injuries_Engineered.csv', index=False)