In [1]:
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)

def process_motion_chunk_v2(chunk_df):
    # Make an explicit copy to avoid warnings
    chunk_df = chunk_df.copy()
    
    # Time is already in seconds, just use directly as relative_time
    chunk_df['relative_time'] = chunk_df['time']
    
    # Create time deciles based on relative time
    chunk_df['time_decile'] = chunk_df.groupby(['PlayKey'])['relative_time'].transform(
        lambda x: pd.cut(x, bins=10, labels=[f'slice_{i}' for i in range(10)])
    )
    
    # Pivot to wide - using more metrics now
    motion_wide = chunk_df.pivot_table(
        index=['PlayKey'], 
        columns='time_decile',
        values=['dis', 'x', 'y', 'o', 'dir', 's'],  # Added dir and s
        aggfunc='mean',
        observed=True
    )
    
    # Flatten columns
    motion_wide.columns = [f'{metric}_{time}' for metric, time in motion_wide.columns]
    
    # Fill missing values by interpolating across time slices
    for metric in ['dis', 'x', 'y', 'o', 'dir', 's']:
        metric_cols = [f'{metric}_slice_{i}' for i in range(10)]
        motion_wide[metric_cols] = motion_wide[metric_cols].interpolate(axis=1, method='linear')
    
    return motion_wide

# Process in chunks - but now by PlayKey groups since no gamekey
def process_playertrack_data(df, chunk_size=1000):
    motion_wide_list = []
    unique_plays = df['PlayKey'].unique()
    
    for i in range(0, len(unique_plays), chunk_size):
        play_chunk = unique_plays[i:i+chunk_size]
        chunk_df = df[df['PlayKey'].isin(play_chunk)]
        
        motion_wide_chunk = process_motion_chunk_v2(chunk_df)
        motion_wide_list.append(motion_wide_chunk)
        
        print(f"Processed {i+len(play_chunk)} of {len(unique_plays)} plays")
    
    # Combine all chunks
    motion_wide = pd.concat(motion_wide_list, axis=0)
    return motion_wide

# Usage:
# motion_wide = process_playertrack_data(df)

In [None]:
df = pd.read_csv('datasets/nfl-playing-surface-analytics/PlayerTrackData.csv')
motion_wide = process_playertrack_data(df)

# merge in categorical variables from PlayList
df = pd.read_csv('datasets/nfl-playing-surface-analytics/PlayList.csv')  # Load your data here
playlist_short = df.copy()
playlist_short = playlist_short[['PlayerKey', 'GameID', 'PlayKey', 'RosterPosition', 'PlayerDay', 'PlayerGame', 'FieldType', 'PlayType']]
df_final = motion_wide.merge(playlist_short, on='PlayKey', how='left')
df_final['PlayType'] = df_final['PlayType'].fillna('Unknown') 
print(df_final.isna().sum())  # Check for any remaining NaNs from the first merge

In [12]:
df.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,RosterPosition,PlayerDay,PlayerGame,StadiumType,FieldType,Temperature,Weather,PlayType,PlayerGamePlay,Position,PositionGroup
0,26624,26624-1,26624-1-1,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,1,QB,QB
1,26624,26624-1,26624-1-2,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,2,QB,QB
2,26624,26624-1,26624-1-3,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,3,QB,QB
3,26624,26624-1,26624-1-4,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,4,QB,QB
4,26624,26624-1,26624-1-5,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,5,QB,QB


In [13]:
motion_wide.head()

Unnamed: 0_level_0,dir_slice_0,dir_slice_1,dir_slice_2,dir_slice_3,dir_slice_4,dir_slice_5,dir_slice_6,dir_slice_7,dir_slice_8,dir_slice_9,...,y_slice_0,y_slice_1,y_slice_2,y_slice_3,y_slice_4,y_slice_5,y_slice_6,y_slice_7,y_slice_8,y_slice_9
PlayKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
26624-1-1,191.485333,224.039667,276.865,270.013667,233.042667,102.27,116.333667,173.968,192.327,78.342667,...,28.917667,29.141333,29.742333,29.769333,29.764667,29.638276,28.824333,27.708,24.982667,23.769667
26624-1-10,321.335,223.575357,258.757407,289.139286,113.188148,50.242143,101.534444,217.098929,199.318519,216.5775,...,22.4175,22.472857,22.573704,23.596429,23.738889,24.554286,27.278889,26.3,24.49,22.715714
26624-1-11,260.44027,137.356757,291.104595,188.302973,88.25027,128.555556,262.251892,300.738919,210.378919,147.169459,...,22.573243,22.851081,23.447568,23.764865,23.79973,25.973333,24.604324,26.081622,27.830541,26.09027
26624-1-12,110.672778,110.170833,200.163429,66.518611,216.207222,248.813429,117.101944,214.901429,233.532778,228.200278,...,22.271667,22.356944,22.452571,23.066944,23.4275,23.511429,23.77,24.062857,22.196667,21.098333
26624-1-13,125.639231,232.7,38.018,184.952692,181.7476,199.4592,155.309231,149.863077,232.2016,267.487692,...,22.128846,22.281154,22.9744,23.421923,23.4144,23.4692,23.483462,23.850769,22.6776,21.692308


In [14]:
playlist_short.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,RosterPosition,PlayerDay,PlayerGame,FieldType,PlayType
0,26624,26624-1,26624-1-1,Quarterback,1,1,Synthetic,Pass
1,26624,26624-1,26624-1-2,Quarterback,1,1,Synthetic,Pass
2,26624,26624-1,26624-1-3,Quarterback,1,1,Synthetic,Rush
3,26624,26624-1,26624-1-4,Quarterback,1,1,Synthetic,Rush
4,26624,26624-1,26624-1-5,Quarterback,1,1,Synthetic,Pass


In [15]:
df_final = motion_wide.merge(playlist_short, on='PlayKey', how='left')

In [18]:
df_final['PlayType'] = df_final['PlayType'].fillna('Unknown')

In [20]:
injury_record = pd.read_csv('datasets/nfl-playing-surface-analytics/InjuryRecord.csv')

In [22]:
injury_record['PlayerKey'].value_counts().head(10)  # Check top 10 players with most injuries

PlayerKey
43540    2
45950    2
33337    2
44449    2
47307    2
46646    1
43532    1
41145    1
44806    1
36557    1
Name: count, dtype: int64

In [23]:
injury_record.loc[injury_record['PlayKey'].isna()]

Unnamed: 0,PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
46,33337,33337-2,,Foot,Natural,1,1,1,1
47,45099,45099-5,,Knee,Natural,1,1,1,1
48,36591,36591-9,,Knee,Natural,1,1,1,1
49,45950,45950-6,,Toes,Synthetic,1,1,0,0
50,39653,39653-4,,Ankle,Synthetic,1,0,0,0
51,38253,38253-10,,Toes,Synthetic,1,1,1,0
52,38214,38214-12,,Toes,Synthetic,1,0,0,0
53,43119,43119-12,,Ankle,Synthetic,1,0,0,0
54,35648,35648-12,,Ankle,Natural,1,0,0,0
55,40051,40051-13,,Ankle,Natural,1,0,0,0
