#### Opponents' Defense

Primary task in this notebook is to ensure consistency of abbreviations between defense and players dataframes.

In [1]:
import os
import pandas as pd

In [2]:
# Read in  player data
players = {}
path = '../data/data_modified/players'
for file in os.listdir(path):
    if file.split('.')[1]=='csv':
        players[file.split('.')[0]] = pd.read_csv(os.path.join(path,file))


In [3]:
# Read in defense data
defense = pd.read_csv('../data/data_raw/teams/defensive_stats.csv')
defense.head()

Unnamed: 0,team,date,fumbles_forced,interceptions,pass_yards_allowed,rush_yards_allowed,sacks,points_allowed
0,NWE,9/12/10,1,1,345,87,1,24
1,NWE,9/19/10,0,0,220,136,3,28
2,NWE,9/26/10,0,2,247,134,1,30
3,NWE,10/4/10,0,4,320,92,2,14
4,NWE,10/17/10,0,0,285,99,3,20


In [4]:
# Check to make sure abbreviation conventions are the same
players_abbrs = []
for key, value in players.items():
    players_abbrs.extend(value['opp'].tolist())

sum(~pd.Series(players_abbrs).isin(defense['team'].unique()))

17

In [5]:
# Figure out which cases are different
print(pd.Series(players_abbrs).loc[~pd.Series(players_abbrs).isin(defense['team'].unique())].unique())
print(pd.Series(defense['team'].unique()).loc[~pd.Series(defense['team'].unique()).isin(players_abbrs)].unique())

['TEN' 'IND' 'HOU' 'STL' 'ARI' 'BAL' 'OAK']
['CLT' 'RAI' 'HTX' 'RAV' 'OTI' 'TAM' 'RAM' 'CRD' 'CAR']


In [6]:
# Create mappings to convert inconsistent abbreviations. This is just based on domain knowledge.
# Again, to scalably deploy this on multiple players we would need to automate this process.
abbr_mappings = {
    'OTI':'TEN',
    'CLT':'IND',
    'HTX':'HOU',
    'RAM':'STL',
    'CRD':'ARI',
    'RAV':'BAL',
    'RAI':'OAK'}
defense.replace(abbr_mappings, inplace=True)
sum(~pd.Series(players_abbrs).isin(defense['team'].unique()))

0

In [7]:
defense.to_csv('../data/data_modified/teams/defensive_stats.csv', index=False)