In [1]:
import pandas as pd
import nfl_data_py as nfl

### Import injury, snap data

In [2]:
injury_df = nfl.import_injuries([2022])
injury_df = injury_df[injury_df['game_type'] == 'REG'] # regular season
injury_df = injury_df[['season','team','week','full_name','position','report_status','gsis_id']]

In [3]:
snap_df = nfl.import_snap_counts([2022])
snap_df = snap_df.sort_values(['player','position','week']).reset_index(drop=True)

In [4]:
def cartesian_product(d):
    index = pd.MultiIndex.from_product(d.values(), names=d.keys())
    return pd.DataFrame(index=index).reset_index()

### Aggregate mean snaps for each player (on pfr id)

We calculate the expanding mean (i.e., mean to that point) for each player

We do this now because we'll be forward-filling missing weeks (see: Keenan Allen for an injury example)

In [5]:
# sort to avoid weirdness
snap_df = snap_df.sort_values(by=['pfr_player_id','week']).reset_index(drop=True)

In [6]:
# get expanding mean based on nflpy snap data
mean_df = snap_df.groupby(['pfr_player_id'])[['offense_snaps','defense_snaps']].expanding().mean()\
                            .reset_index().drop(columns='level_1')

# recover week info
ms_df = pd.concat([mean_df.rename(columns={'offense_snaps':'mean_off_snaps','defense_snaps':'mean_def_snaps'}),
           snap_df['week']],axis=1)

### Recover full week info for each player-position combo

We use a cartesian product s.t. each player (by Id) has 20 weeks assigned to them

In [7]:
cp_df = cartesian_product({'pfr_player_id':ms_df['pfr_player_id'].drop_duplicates(),'week':range(1,21)})

In [8]:
cp_df.head(2)

Unnamed: 0,pfr_player_id,week
0,AbduAm00,1
1,AbduAm00,2


#### Merge expanding mean df, truncated to relevant columns, into cartesian product

In [9]:
mc_fs = cp_df.merge(ms_df,how='left',on=['pfr_player_id','week'])

In [10]:
mc_fs.duplicated().sum()

0

### Forward-fill within group, filling by default with zero for players who have no snaps

In [11]:
mc_fs[(mc_fs['pfr_player_id'] == 'AbduAm00') & ((mc_fs['week'] >=5) & (mc_fs['week'] <= 7))]

Unnamed: 0,pfr_player_id,week,mean_off_snaps,mean_def_snaps
4,AbduAm00,5,4.4,0.0
5,AbduAm00,6,,
6,AbduAm00,7,4.833333,0.0


In [12]:
mc_ff = mc_fs.groupby(['pfr_player_id']).ffill().fillna(0)
mc_rp = pd.concat([mc_fs[['pfr_player_id','week']],mc_ff[['mean_off_snaps','mean_def_snaps']]],axis=1)

In [13]:
mc_rp[(mc_rp['pfr_player_id'] == 'AbduAm00') & ((mc_rp['week'] >=5) & (mc_rp['week'] <= 7))]

Unnamed: 0,pfr_player_id,week,mean_off_snaps,mean_def_snaps
4,AbduAm00,5,4.4,0.0
5,AbduAm00,6,4.4,0.0
6,AbduAm00,7,4.833333,0.0


#### Load ID table for future reference

This is where we uptick week; if we do it after team merge, get potential team errors due to trades

In [14]:
id_pre = nfl.import_ids()
id_df = id_pre[[x for x in id_pre if 'id' in x]]
mc_rp['week'] +=1

Merge in ID's to player snap-mean data:

In [15]:
mc_wk = mc_rp.merge(id_df[['pfr_id','gsis_id']],how='left',left_on='pfr_player_id',
                    right_on = 'pfr_id').drop(columns=['pfr_id'])

In [16]:
mc_wk.head(1)

Unnamed: 0,pfr_player_id,week,mean_off_snaps,mean_def_snaps,gsis_id
0,AbduAm00,2,7.0,0.0,00-0032104


In [17]:
mc_wk.isna().sum()

pfr_player_id         0
week                  0
mean_off_snaps        0
mean_def_snaps        0
gsis_id           13280
dtype: int64

In [18]:
injury_df.head(1)

Unnamed: 0,season,team,week,full_name,position,report_status,gsis_id
0,2022,ARI,1,Rodney Hudson,C,,00-0027993


## Merge in injuries

Also specially handle Cmac since he double-enters due to a trade:

In [19]:
inj_df = mc_wk.merge(injury_df[['gsis_id','week','team','report_status']],how='left',left_on=['gsis_id','week'],
            right_on=['gsis_id','week'])

In [23]:
inj_df.sample(3)

Unnamed: 0,pfr_player_id,week,mean_off_snaps,mean_def_snaps,gsis_id,team,report_status
36201,SmitSh03,2,41.0,0.0,00-0036572,CAR,Questionable
4450,BridTe00,12,24.25,0.0,00-0031237,MIA,Questionable
16086,HarrAn01,8,0.0,0.0,,,


In [24]:
inj_df.loc[(inj_df['week'] == 7) & (inj_df['pfr_player_id'] == 'McCaCh01'),'team'] = 'SF'
inj_df[['pfr_player_id','week']].duplicated().sum()

1

In [25]:
inj_df = inj_df.drop_duplicates()

### Aggregate team-week injury stats (based on mean snaps for team members)

In [42]:
out_df = inj_df[inj_df['report_status'].isin(['Out','Doubtful'])]

Sanity check our logic:

In [43]:
out_df.drop(columns=['report_status'])[['pfr_player_id','week']].duplicated().sum()

0

In [45]:
out_df.head(5)

Unnamed: 0,pfr_player_id,week,mean_off_snaps,mean_def_snaps,gsis_id,team,report_status
141,AdamMa00,3,0.0,18.0,00-0034424,CHI,Out
142,AdamMa00,4,0.0,18.0,00-0034424,CHI,Doubtful
230,AddeNa00,12,0.0,57.4,00-0035677,LAC,Doubtful
280,AdebPa00,2,0.0,0.0,00-0036937,NO,Out
296,AdebPa00,18,0.0,62.461538,00-0036937,NO,Out


In [46]:
df_final = out_df.groupby(['team','week']).sum()[['mean_off_snaps','mean_def_snaps']].reset_index()

In [47]:
df_final = df_final.rename(columns={'mean_off_snaps':'off_snaps_lost','mean_def_snaps':'def_snaps_lost'})

In [48]:
df_final.head(2)

Unnamed: 0,team,week,off_snaps_lost,def_snaps_lost
0,ARI,2,21.0,0.0
1,ARI,3,0.0,8.5


In [49]:
import os
root_dir = os.getcwd()

In [50]:
df_final.to_csv(os.path.join(root_dir,'data/snaps_lost_injury.csv'),index=False)