In [37]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl

# Proces team pass ratios

We'll get to player-based expected pass ratios later, but first, we'll start with team data:

In [38]:
snap_df = nfl.import_snap_counts([2021])

In [39]:
nfl_pre = nfl.import_pbp_data([2021])

2021 done.
Downcasting floats.


In [40]:
nfl_pre['pass'].value_counts()

0.0    26756
1.0    23166
Name: pass, dtype: int64

In [41]:
nfl_pbp = nfl_pre[(nfl_pre['play_type_nfl'] == 'RUSH') | (nfl_pre['play_type_nfl'] == 'PASS') ]

In [42]:
nfl_pbp['play_type_nfl'].value_counts()

PASS    19158
RUSH    15104
Name: play_type_nfl, dtype: int64

In [43]:
snap_pbp = nfl_pbp[['posteam','defteam','pass']].copy()

Aggregate pass rate on offense and defense for teams in 2021:

In [44]:
pro_df = snap_pbp.groupby(['posteam']).agg(pass_ct=('pass','sum'),
                                                     snap_ct=('pass','count')).reset_index()
pro_df['pass_rate_off'] = pro_df['pass_ct']/pro_df['snap_ct']
pro_df.drop(columns=['pass_ct','snap_ct'],inplace=True)

In [45]:
prd_df = snap_pbp.groupby(['defteam']).agg(pass_ct=('pass','sum'),
                                                     snap_ct=('pass','count')).reset_index()
prd_df['pass_rate_def'] = prd_df['pass_ct']/prd_df['snap_ct']
prd_df.drop(columns=['pass_ct','snap_ct'],inplace=True)

In [46]:
team_pass_out = prd_df.merge(pro_df,how='left',left_on='defteam',right_on='posteam').drop(columns='posteam').rename(columns={'defteam':'possessionTeam'})

In [47]:
team_pass_out.sort_values(by='pass_rate_off',ascending=False)

Unnamed: 0,possessionTeam,pass_rate_def,pass_rate_off
29,TB,0.675629,0.659128
15,KC,0.624167,0.655253
17,LAC,0.544076,0.640408
14,JAX,0.547665,0.629857
24,NYJ,0.546279,0.62539
18,LV,0.591764,0.623971
26,PIT,0.561372,0.620231
3,BUF,0.585343,0.619126
23,NYG,0.565378,0.607071
19,MIA,0.597837,0.605187


# Get xpass (player-weighted expected pass),

missing players fillna w/mean

In [48]:
nfl_pbp['oid_ls'] = nfl_pbp['offense_players'].str.split(';',expand=False)
nfl_pbp['did_ls'] = nfl_pbp['defense_players'].str.split(';',expand=False)
nfl_pbp[[len(x) == 1 for x in nfl_pbp['oid_ls']]]['oid_ls'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_pbp['oid_ls'] = nfl_pbp['offense_players'].str.split(';',expand=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_pbp['did_ls'] = nfl_pbp['defense_players'].str.split(';',expand=False)


Series([], Name: oid_ls, dtype: int64)

### Offensive players

In [49]:
# remove rows w/no ids, then explode each player id to own row
off_pbp = nfl_pbp[[len(x) > 1 for x in nfl_pbp['oid_ls']]]
off_exp = off_pbp.explode('oid_ls')

#flag pass as 'dropback'
off_exp['isDropback'] = 0
off_exp.loc[off_exp['play_type'] =='pass', 'isDropback'] = 1

# get each player's 2021 pass snaps vs overall snaps, on offense
off_xp = off_exp.groupby(['oid_ls']).agg(pass_ct=('isDropback','sum'),
                                                     snap_ct=('isDropback','count')).reset_index()

In [50]:
off_exp[['game_id','play_id','oid_ls']].duplicated().sum()

0

In [51]:
off_xp.head(1)

Unnamed: 0,oid_ls,pass_ct,snap_ct
0,00-0019596,795,1197


### Rpeat for defensive players

In [52]:
def_pbp = nfl_pbp[[len(x) > 1 for x in nfl_pbp['did_ls']]]
def_exp = def_pbp.explode('did_ls')
def_exp['isDropback'] = 0
def_exp.loc[def_exp['play_type'] =='pass', 'isDropback'] = 1
def_xp = def_exp.groupby(['did_ls']).agg(pass_ct=('isDropback','sum'),
                                                     snap_ct=('isDropback','count')).reset_index()

In [53]:
def_xp.head(2)

Unnamed: 0,did_ls,pass_ct,snap_ct
0,00-0025424,123,174
1,00-0026190,357,554


### Reconcile ID's, get names

We want names for our players as keys to merge into big data bowl data. To do this, we perform a series of merges.

In [54]:
id_pre = nfl.import_ids()
id_df = id_pre[['gsis_id','pfr_id']]

In [55]:
# pfr_id is our common key
snap_names = snap_df[['pfr_player_id','player','position']]

In [56]:
snap_names.head(2)

Unnamed: 0,pfr_player_id,player,position
0,WirfTr00,Tristan Wirfs,T
1,MarpAl00,Ali Marpet,G


Merge in pfr ID:

In [57]:
xp_did = def_xp.merge(id_df,how='left',left_on='did_ls',right_on='gsis_id').drop(columns=['did_ls'])
xp_oid = off_xp.merge(id_df,how='left',left_on='oid_ls',right_on='gsis_id').drop(columns=['oid_ls'])

In [58]:
xp_did.head(1)

Unnamed: 0,pass_ct,snap_ct,gsis_id,pfr_id
0,123,174,00-0025424,WeddEr99


Join name, snap data on pfr_id, drop extraneous columns:

In [78]:
snap_names.head(3)

Unnamed: 0,pfr_player_id,player,position
0,WirfTr00,Tristan Wirfs,T
1,MarpAl00,Ali Marpet,G
2,CappAl00,Alex Cappa,G


In [59]:
def_ratio = xp_did.merge(snap_names,how='left',left_on='pfr_id',right_on='pfr_player_id').dropna().drop_duplicates()[['player','position','pass_ct','snap_ct']]
off_ratio = xp_oid.merge(snap_names,how='left',left_on='pfr_id',right_on='pfr_player_id').dropna().drop_duplicates()[['player','position','pass_ct','snap_ct']]

In [85]:
def_ratio['position'].value_counts()

LB    250
CB    199
DE    145
DT    118
SS     72
FS     69
NT     30
DB     10
RB      1
WR      1
Name: position, dtype: int64

In [60]:
def_ratio.head(1)

Unnamed: 0,player,position,pass_ct,snap_ct
0,Eric Weddle,FS,123,174


# Coverage processing

First we take play-by-play data, then split out coverage as one-hot

In [61]:
cov_pbp = pd.concat([nfl_pbp[['defteam','posteam']],pd.get_dummies(nfl_pbp['defense_man_zone_type']),pd.get_dummies(nfl_pbp['defense_coverage_type'])],axis=1)

### Rename columns to match BDB data

Note: "Other" is 2 man + prevent



In [62]:
rn_dict = {'MAN_COVERAGE':'Man', 'ZONE_COVERAGE':'Zone', 'COVER_0':'cover_0', 'COVER_1':'cover_1', 
           'COVER_2':'cover_2','COVER_3':'cover_3', 'COVER_6':'cover_6','COVER_4':'Quarters'
           }

In [63]:
cov_pbp = cov_pbp.rename(columns=rn_dict)
cov_pbp['Other'] = cov_pbp['PREVENT']+cov_pbp['2_MAN']
cov_pbp.drop(columns=['PREVENT','2_MAN'],inplace=True)

### Get expected coverage figures

Essentially, we divide total coverage snaps by 17, to get an expecation for w1 next year

In [64]:
cg_def = cov_pbp.groupby('defteam').sum().reset_index()
def_rates = pd.concat([cg_def['defteam'],cg_def.iloc[:,1:]/17],axis=1)
def_rates.columns = ['defteam']+[x+'_def' for x in def_rates.columns[1:]]

In [65]:
cg_off = cov_pbp.groupby('posteam').sum().reset_index()
off_rates = pd.concat([cg_off['posteam'],cg_off.iloc[:,1:]/17],axis=1)
off_rates.columns = ['offteam']+[x+'_off' for x in off_rates.columns[1:]]

In [66]:
def_rates.head(1)

Unnamed: 0,defteam,Man_def,Zone_def,cover_0_def,cover_1_def,cover_2_def,cover_3_def,Quarters_def,cover_6_def,Other_def
0,ARI,9.529412,23.647059,3.117647,5.352941,4.0,12.588235,4.117647,2.882353,1.117647


# Write out files

First, merge defensive/offensisve coverage stats

In [67]:
rates_out = def_rates.merge(off_rates,how='left',left_on='defteam',right_on='offteam').drop(columns='offteam').rename(columns={'defteam':'possessionTeam'})

In [68]:
rates_out.head(1)

Unnamed: 0,possessionTeam,Man_def,Zone_def,cover_0_def,cover_1_def,cover_2_def,cover_3_def,Quarters_def,cover_6_def,Other_def,Man_off,Zone_off,cover_0_off,cover_1_off,cover_2_off,cover_3_off,Quarters_off,cover_6_off,Other_off
0,ARI,9.529412,23.647059,3.117647,5.352941,4.0,12.588235,4.117647,2.882353,1.117647,7.411765,28.352941,1.823529,5.588235,4.588235,14.0,5.882353,3.705882,0.176471


In [69]:
team_pass_out.head(2)

Unnamed: 0,possessionTeam,pass_rate_def,pass_rate_off
0,ARI,0.573372,0.573464
1,ATL,0.548059,0.603575


In [75]:
off_ratio.head(10)

Unnamed: 0,player,position,pass_ct,snap_ct
0,Tom Brady,QB,795,1197
20,Ben Roethlisberger,QB,637,1037
37,Aaron Rodgers,QB,552,959
54,Ryan Fitzpatrick,QB,6,14
55,Marcedes Lewis,TE,188,476
74,Adrian Peterson,RB,25,68
78,Danny Amendola,WR,133,170
86,Matt Ryan,QB,548,926
103,Joe Flacco,QB,42,63
106,DeSean Jackson,WR,191,295


In [76]:
def_ratio.head(10)

Unnamed: 0,player,position,pass_ct,snap_ct
0,Eric Weddle,FS,123,174
4,Calais Campbell,DT,357,554
5,Calais Campbell,DE,357,554
20,Malcolm Jenkins,SS,545,935
36,Jason McCourty,FS,171,289
44,Devin McCourty,FS,509,980
62,Carlos Dunlap,LB,286,427
79,Corey Peters,DT,154,363
94,Everson Griffen,DE,249,398
103,Al Woods,DT,274,572


In [72]:
rates_out.to_csv('data_21/cov_21.csv',index=False)
team_pass_out.to_csv('data_21/team_pr_21.csv',index=False)
off_ratio.to_csv('data_21/off_xpass.csv',index=False)
def_ratio.to_csv('data_21/def_xpass.csv',index=False)