In [1]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl

### Import injury, snap data

In [2]:
injury_df = nfl.import_injuries([2022])
injury_df = injury_df[injury_df['game_type'] == 'REG']
injury_df = injury_df[['season','team','week','full_name','position','report_status']]

In [3]:
snap_df = nfl.import_snap_counts([2022])
snap_df = snap_df.sort_values(['player','position','week']).reset_index(drop=True)

In [4]:
def cartesian_product(d):
    index = pd.MultiIndex.from_product(d.values(), names=d.keys())
    return pd.DataFrame(index=index).reset_index()

### Aggregate mean

We first get mean, such that we can fill forward when needed

In [5]:
mean_df = snap_df.groupby(['player','position'])[['offense_snaps','defense_snaps']].expanding().mean()\
                            .reset_index().drop(columns='level_2')

In [6]:
mean_df = snap_df.groupby(['player','position'])[['offense_snaps','defense_snaps']].expanding().mean()\
                            .reset_index().drop(columns='level_2')

ms_df = pd.concat([mean_df.rename(columns={'offense_snaps':'mean_off_snaps','defense_snaps':'mean_def_snaps'}),
           snap_df['week']],axis=1)

### Set up all 20 weeks for players

We use a cartesian product s.t. each player has 20 weeks assigned to them

In [7]:
d = {'player':ms_df['player'].drop_duplicates(),'position':ms_df['position'].drop_duplicates(),'week':range(1,21)}
cp_df = cartesian_product(d)

In [8]:
ms_trunc = ms_df[['player','week','position','mean_off_snaps','mean_def_snaps']]
mc_fs = cp_df.merge(ms_trunc,how='left',on=['player','week','position'])

### Break out dupes to "real" positions

Also forward-fill within group, filling by default with zero for players who have no snaps

In [9]:
mc_pre = mc_fs.merge(snap_df[['player','position']],how='inner').drop_duplicates(['player','position','week']).reset_index(drop=True)

In [10]:
mc_ff = mc_pre.groupby(['player','position']).ffill().fillna(0)

In [11]:
mc_rp = pd.concat([mc_pre[['player','position','week']],mc_ff[['mean_off_snaps','mean_def_snaps']]],axis=1)

### Incorporate club code (i.e., team name)

In [12]:
dc_df = nfl.import_depth_charts([2022])
dc_df = dc_df[dc_df['game_type'] != 'SBBYE']
dc_df['week'] = dc_df['week'].astype(int)

This is where we uptick week; if we do it after team merge, get potential team errors due to trades

In [13]:
mc_rp.head(2)

Unnamed: 0,player,position,week,mean_off_snaps,mean_def_snaps
0,A'Shawn Robinson,DT,1,0.0,38.0
1,A'Shawn Robinson,DT,2,0.0,38.5


In [14]:
mc_rp['week'] +=1

In [15]:
mc_wk = mc_rp.merge(dc_df[['club_code','week','position','full_name']],how='left',
                    left_on=['player','position','week'],
            right_on=['full_name','position','week']).dropna()

In [16]:
mc_wk.head(2)

Unnamed: 0,player,position,week,mean_off_snaps,mean_def_snaps,club_code,full_name
0,A'Shawn Robinson,DT,2,0.0,38.0,LA,A'Shawn Robinson
1,A'Shawn Robinson,DT,3,0.0,38.5,LA,A'Shawn Robinson


In [17]:
mc_wk = mc_wk.drop_duplicates().drop(columns=['full_name'])

### Merge in injuries

Here we upshift week by 1 just so that we're looking at *previous* data to know how many injury snaps are missed

In [18]:
inj_df = mc_wk.merge(injury_df.drop(columns=['team']),how='left',left_on=['player','position','week'],
            right_on=['full_name','position','week']).drop(columns=['full_name'])

In [19]:
inj_df = inj_df.drop_duplicates()

In [20]:
inj_df.loc[(inj_df['week'] == 7) & (inj_df['player'] == 'Christian McCaffrey'),'club_code'] = 'SF'

In [21]:
inj_df.loc[(inj_df['week'] == 7) & (inj_df['player'] == 'Christian McCaffrey')]

Unnamed: 0,player,position,week,mean_off_snaps,mean_def_snaps,club_code,season,report_status
4098,Christian McCaffrey,RB,7,47.833333,0.0,SF,2022.0,


### Aggregate team-week injury stats (based on mean snaps for team members)

In [22]:
out_df = inj_df[inj_df['report_status'] == 'Out']

In [23]:
df_final = out_df.groupby(['club_code','week']).sum()[['mean_off_snaps','mean_def_snaps']].reset_index()

In [24]:
df_final = df_final.rename(columns={'mean_off_snaps':'off_snaps_lost','mean_def_snaps':'def_snaps_lost'})

In [25]:
df_final.head(2)

Unnamed: 0,club_code,week,off_snaps_lost,def_snaps_lost
0,ARI,2,21.0,0.0
1,ARI,3,0.0,0.0


In [26]:
#df_final.to_csv('created-data-db-25/snaps_lost_injury_.csv',index=False)