In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

root_dir = os.path.split(os.getcwd())[0]

In [2]:
df_play = pd.read_csv(os.path.join(root_dir, 'data/plays.csv'))
df_player_play = pd.read_csv(os.path.join(root_dir,'data/player_play.csv'))
df_games = pd.read_csv(os.path.join(root_dir,'data/games.csv'))
df_players = pd.read_csv(os.path.join(root_dir,'data/players.csv'))

In [3]:
df_play['pff_passCoverage'].value_counts()

Cover-3                 4956
Cover-1                 3300
Quarters                2073
Cover-2                 1852
Cover 6-Left             692
Cover-6 Right            690
Cover-3 Seam             636
Cover-0                  605
Red Zone                 537
2-Man                    186
Goal Line                146
Bracket                   75
Cover-1 Double            54
Prevent                   46
Cover-3 Cloud Right       31
Cover-3 Cloud Left        30
Miscellaneous             14
Cover-3 Double Cloud       9
Name: pff_passCoverage, dtype: int64

In [4]:
df_play['pff_manZone'].value_counts()

Zone     10969
Man       4145
Other      818
Name: pff_manZone, dtype: int64

# Split out coverages, group

Note: we mark cover-3 sub-groups as cover-3, cover-6 as the same, and all else as 'other'

In [5]:
# get play data merged with games data
df_condensed = df_play[['gameId','playId','possessionTeam','defensiveTeam']].merge(df_games[['gameId','week']],how='left',on=['gameId'])

# one-hot 
coverage_df = pd.get_dummies(df_play['pff_passCoverage']).astype(int)

In [6]:
#df_play[df_play['gameId'] == 2022091808].possessionTeam.unique()
#df_play[df_play['gameId'] == 2022091808]['defensiveTeam'].unique()

Aggregate variants of Cover-1, -3, and -6, then concat

In [7]:
cols_c3 = [x for x in coverage_df.columns if 'Cover-3' in x]
cols_c1 = [x for x in coverage_df.columns if 'Cover-1' in x]
cols_c6 = [x for x in coverage_df.columns if (('Cover-6' in x) or ('Cover 6') in x)]

df_condensed['cover_3'] = coverage_df[cols_c3].sum(axis=1).values
df_condensed['cover_6'] = coverage_df[cols_c6].sum(axis=1).values
df_condensed['cover_1'] = coverage_df[cols_c1].sum(axis=1).values

In [8]:
df_covered = pd.concat([df_condensed,coverage_df[['Quarters','Cover-2','Cover-0']]],axis=1)

Mark unlabeled coverages as 'other'

In [9]:
df_covered = pd.concat([df_covered,
                        pd.get_dummies(df_play['pff_manZone']).astype(int)],axis=1)
                                           

In [10]:
df_covered.head(1)

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone
0,2022102302,2655,CIN,ATL,7,1,0,0,0,0,0,0,0,1


Define coverages teams faced on offense, vs. ones they employed on defense

In [11]:
cov_faced = df_covered.drop(columns=['gameId','playId']).groupby(['possessionTeam','week']).sum().reset_index()
cov_used  = df_covered.drop(columns=['gameId','playId']).groupby(['defensiveTeam','week']).sum().reset_index()

In [12]:
cov_used.head(1)

Unnamed: 0,defensiveTeam,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone
0,ARI,1,21,2,21,7,4,3,26,4,34


## Aggregate cumulative snap counts

Here we get cumulative snap counts (rolling week-to-week) for the coverages each team uses

We use a multiindex to account for byes, merging into it, then filling cumulative sum forward

CF = coverages faced (offense), CU = coverages used  (defense)


In [13]:
# Define cartesian product
off_ind = pd.DataFrame(index=pd.MultiIndex.from_product([range(1,10),cov_faced.possessionTeam.unique()])).reset_index().rename(columns={'level_0':'week','level_1':'possessionTeam'})
def_ind = pd.DataFrame(index=pd.MultiIndex.from_product([range(1,10),cov_used.defensiveTeam.unique()])).reset_index().rename(columns={'level_0':'week','level_1':'defensiveTeam'})

In [14]:
# Get cumulative sums for weeks we do have
df_cum_cf = pd.concat([cov_faced[['possessionTeam','week']], 
                       cov_faced.groupby('possessionTeam').cumsum().drop(columns=['week'])],axis=1)
                       
df_cum_cu = pd.concat([cov_used[['defensiveTeam','week']], 
                       cov_used.groupby('defensiveTeam').cumsum().drop(columns=['week'])],axis=1)

### Get defensive coverages used

We merge into the cartesian product of weeks + teams s.t. team bye week data is filled forward 

This is mainly so byes dont trip up backward-looking merges (due to +1 week shift)

In [15]:
df_cum_cu.head(2)

Unnamed: 0,defensiveTeam,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone
0,ARI,1,21,2,21,7,4,3,26,4,34
1,ARI,2,35,14,32,16,14,4,39,6,79


In [16]:
dm = def_ind.merge(df_cum_cu,how='left',on=['week','defensiveTeam']).reset_index(drop=True)

We add ~17 rows of data absent due to byes, etc.

In [17]:
len(dm) - len(df_cum_cu)

17

Note Vegas example of week 6 bye, which we will fill forward for backward-looking references

In [18]:
dm[(dm['defensiveTeam'] == 'LV') &((dm['week'] >4) & (dm['week'] <8))]

Unnamed: 0,week,defensiveTeam,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone
146,5,LV,77.0,19.0,75.0,33.0,55.0,24.0,101.0,13.0,184.0
178,6,LV,,,,,,,,,
210,7,LV,99.0,21.0,89.0,51.0,63.0,24.0,115.0,13.0,234.0


In [19]:
# forward fill within each team
dmg = dm.groupby(['defensiveTeam']).ffill().reset_index(drop=True)
dmg['defensiveTeam'] = dm['defensiveTeam']

Voilà:

In [20]:
dmg[(dmg['defensiveTeam'] == 'LV') &((dmg['week'] >4) & (dmg['week'] <8))]

Unnamed: 0,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone,defensiveTeam
146,5,77.0,19.0,75.0,33.0,55.0,24.0,101.0,13.0,184.0,LV
178,6,77.0,19.0,75.0,33.0,55.0,24.0,101.0,13.0,184.0,LV
210,7,99.0,21.0,89.0,51.0,63.0,24.0,115.0,13.0,234.0,LV


### Get offensive coverages used

Repeate process for offense

In [21]:
om = off_ind.merge(df_cum_cf,how='left',on=['week','possessionTeam']).reset_index(drop=True)

In [22]:
# forward fill within each team
omg = om.groupby(['possessionTeam']).ffill().reset_index(drop=True)
omg['possessionTeam'] = om['possessionTeam']

### Upshift week

S.t. week one merges into week two (lookback)

In [23]:
omg['week']+=1
omg.head(3)

Unnamed: 0,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone,possessionTeam
0,2,8.0,3.0,11.0,16.0,14.0,2.0,13.0,3.0,41.0,ARI
1,2,17.0,2.0,10.0,21.0,14.0,0.0,11.0,3.0,54.0,ATL
2,2,23.0,2.0,6.0,15.0,1.0,1.0,7.0,0.0,41.0,BAL


In [24]:
dmg['week']+=1
dmg.head(3)

Unnamed: 0,week,cover_3,cover_6,cover_1,Quarters,Cover-2,Cover-0,Man,Other,Zone,defensiveTeam
0,2,21.0,2.0,21.0,7.0,4.0,3.0,26.0,4.0,34.0,ARI
1,2,16.0,1.0,13.0,3.0,15.0,2.0,16.0,3.0,35.0,ATL
2,2,26.0,3.0,10.0,15.0,8.0,3.0,13.0,7.0,52.0,BAL


In [25]:
dmg.columns = ['week']+[x+'_def' for x in dmg.columns[1:-1]]+['defensiveTeam']

In [26]:
dmg.head(3)

Unnamed: 0,week,cover_3_def,cover_6_def,cover_1_def,Quarters_def,Cover-2_def,Cover-0_def,Man_def,Other_def,Zone_def,defensiveTeam
0,2,21.0,2.0,21.0,7.0,4.0,3.0,26.0,4.0,34.0,ARI
1,2,16.0,1.0,13.0,3.0,15.0,2.0,16.0,3.0,35.0,ATL
2,2,26.0,3.0,10.0,15.0,8.0,3.0,13.0,7.0,52.0,BAL


In [27]:
omg.columns = ['week']+[x+'_off' for x in omg.columns[1:-1]]+['possessionTeam']

In [28]:
omg.head(1)

Unnamed: 0,week,cover_3_off,cover_6_off,cover_1_off,Quarters_off,Cover-2_off,Cover-0_off,Man_off,Other_off,Zone_off,possessionTeam
0,2,8.0,3.0,11.0,16.0,14.0,2.0,13.0,3.0,41.0,ARI


In [29]:
#omg.to_csv('data/coverages_faced.csv',index=False)

In [30]:
#dmg.to_csv('data/coverages_used.csv',index=False)