In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
# Make cleaning function
# clean data and merge into dataframes for each team
# drop unnecessary columns
def clean_nfl_data(df):
    # drop bye week row
    df = df.drop(df.index[df[9] == 'Bye Week'])
    
    # drop nonrelevant columns
    df = df.drop([1, 3, 4, 5, 6, 7, 8, 9, 22, 23, 24], axis = 1)
    df = df.drop([0,1]).reset_index(drop = True)
    
    # identify playoffs index and drop everything after it
    pl_count = df[2].str.contains('Playoffs').sum()
    if pl_count > 0:
        i = df.index[df[2] == 'Playoffs']
        j = len(df)-i[0]
        df = df[:-j]
        df = df.drop([2], axis = 1)
    else:
        df = df.drop([2], axis = 1)
    
    # rename columns
    df = df.rename(columns = {0: 'Week', 10: 'TmScore', 11: 'OppScore', 12: 'off1stDn', 13: 'offTotYd', 14: 'offPassYd',
                              15: 'offRushYd', 16: 'offTO', 17: 'def1stDn', 18: 'defTotYdsAllowed', 19: 'defPassYdsAllowed',
                              20: 'defRushYdsAllowed', 21: 'defTOGained'})
    
    # reset the index
    df = df.reset_index(drop = True)
    
    # fillna with 0
    df = df.fillna(0)
    
    return df

In [3]:
# Make column and index function
def nfl_team_year(idf, team, year):
    # Call data cleaning function
    odf = clean_nfl_data(idf)
    
    # Set team and year
    odf['Team'] = team
    odf['Year'] = year
    
    # Set index
    odf = odf.set_index(['Team', 'Year', 'Week'])
    
    return odf

In [4]:
data = {}
for i in ["08","09", "10", "11", "12", "13", "14", "15", "16", "17", "18"]:
    data.update({'ari'+i: nfl_team_year(pd.read_csv('./NFL/Arizona Cardinals/20{}season.csv'.format(i), header = None), 'Arizona Cardinals', '20{}'.format(i))})
    data.update({'atl'+i: nfl_team_year(pd.read_csv('./NFL/Atlanta Falcons/20{}season.csv'.format(i), header = None), 'Atlanta Falcons', '20{}'.format(i))})
    data.update({'bal'+i: nfl_team_year(pd.read_csv('./NFL/Baltimore Ravens/20{}season.csv'.format(i), header = None), 'Baltimore Ravens', '20{}'.format(i))})
    data.update({'buf'+i: nfl_team_year(pd.read_csv('./NFL/Buffalo Bills/20{}season.csv'.format(i), header = None), 'Buffalo Bills', '20{}'.format(i))})
    data.update({'car'+i: nfl_team_year(pd.read_csv('./NFL/Carolina Panthers/20{}season.csv'.format(i), header = None), 'Carolina Panthers', '20{}'.format(i))})
    data.update({'chi'+i: nfl_team_year(pd.read_csv('./NFL/Chicago Bears/20{}season.csv'.format(i), header = None), 'Chicago Bears', '20{}'.format(i))})
    data.update({'cin'+i: nfl_team_year(pd.read_csv('./NFL/Cincinnati Bengals/20{}season.csv'.format(i), header = None), 'Cincinnati Bengals', '20{}'.format(i))})
    data.update({'cle'+i: nfl_team_year(pd.read_csv('./NFL/Cleveland Browns/20{}season.csv'.format(i), header = None), 'Cleveland Browns', '20{}'.format(i))})
    data.update({'dal'+i: nfl_team_year(pd.read_csv('./NFL/Dallas Cowboys/20{}season.csv'.format(i), header = None), 'Dallas Cowboys', '20{}'.format(i))})
    data.update({'den'+i: nfl_team_year(pd.read_csv('./NFL/Denver Broncos/20{}season.csv'.format(i), header = None), 'Denver Broncos', '20{}'.format(i))})
    data.update({'det'+i: nfl_team_year(pd.read_csv('./NFL/Detroit Lions/20{}season.csv'.format(i), header = None), 'Detroit Lions', '20{}'.format(i))})
    data.update({'gnb'+i: nfl_team_year(pd.read_csv('./NFL/Green Bay Packers/20{}season.csv'.format(i), header = None), 'Green Bay Packers', '20{}'.format(i))})
    data.update({'hou'+i: nfl_team_year(pd.read_csv('./NFL/Houston Texans/20{}season.csv'.format(i), header = None), 'Houston Texans', '20{}'.format(i))})
    data.update({'ind'+i: nfl_team_year(pd.read_csv('./NFL/Indianapolis Colts/20{}season.csv'.format(i), header = None), 'Indianapolis Colts', '20{}'.format(i))})
    data.update({'jax'+i: nfl_team_year(pd.read_csv('./NFL/Jacksonville Jaguars/20{}season.csv'.format(i), header = None), 'Jacksonville Jaguars', '20{}'.format(i))})
    data.update({'kan'+i: nfl_team_year(pd.read_csv('./NFL/Kansas City Chiefs/20{}season.csv'.format(i), header = None), 'Kansas City Chiefs', '20{}'.format(i))})
    data.update({'lac'+i: nfl_team_year(pd.read_csv('./NFL/Los Angeles Chargers/20{}season.csv'.format(i), header = None), 'Los Angeles Chargers', '20{}'.format(i))})
    data.update({'lar'+i: nfl_team_year(pd.read_csv('./NFL/Los Angeles Rams/20{}season.csv'.format(i), header = None), 'Los Angeles Rams', '20{}'.format(i))})
    data.update({'mia'+i: nfl_team_year(pd.read_csv('./NFL/Miami Dolphins/20{}.csv'.format(i), header = None), 'Miami Dolphins', '20{}'.format(i))})
    data.update({'min'+i: nfl_team_year(pd.read_csv('./NFL/Minnesota Vikings/20{}.csv'.format(i), header = None), 'Minnesota Vikings', '20{}'.format(i))})
    data.update({'nwe'+i: nfl_team_year(pd.read_csv('./NFL/New England Patriots/20{}.csv'.format(i), header = None), 'New England Patriots', '20{}'.format(i))})
    data.update({'nor'+i: nfl_team_year(pd.read_csv('./NFL/New Orleans Saints/20{}.csv'.format(i), header = None), 'New Orleans Saints', '20{}'.format(i))})
    data.update({'nyg'+i: nfl_team_year(pd.read_csv('./NFL/New York Giants/20{}.csv'.format(i), header = None), 'New York Giants', '20{}'.format(i))})
    data.update({'nyj'+i: nfl_team_year(pd.read_csv('./NFL/New York Jets/20{}.csv'.format(i), header = None), 'New York Jets', '20{}'.format(i))})
    data.update({'oak'+i: nfl_team_year(pd.read_csv('./NFL/Oakland Raiders/20{}.csv'.format(i), header = None), 'Oakland Raiders', '20{}'.format(i))})
    data.update({'phi'+i: nfl_team_year(pd.read_csv('./NFL/Philadelphia Eagles/20{}.csv'.format(i), header = None), 'Philadelphia Eagles', '20{}'.format(i))})
    data.update({'pit'+i: nfl_team_year(pd.read_csv('./NFL/Pittsburgh Steelers/20{}.csv'.format(i), header = None), 'Pittsburgh Steelers', '20{}'.format(i))})
    data.update({'sfo'+i: nfl_team_year(pd.read_csv('./NFL/San Francisco 49ers/20{}.csv'.format(i), header = None), 'San Francisco 49ers', '20{}'.format(i))})
    data.update({'tam'+i: nfl_team_year(pd.read_csv('./NFL/Tampa Bay Buccaneers/20{}.csv'.format(i), header = None), 'Tampa Bay Buccaneers', '20{}'.format(i))})
    data.update({'ten'+i: nfl_team_year(pd.read_csv('./NFL/Tennessee Titans/20{}.csv'.format(i), header = None), 'Tennessee Titans', '20{}'.format(i))})
    data.update({'was'+i: nfl_team_year(pd.read_csv('./NFL/Washington Redskins/20{}.csv'.format(i), header = None), 'Washington Redskins', '20{}'.format(i))})


In [5]:
# Create data frames per team
# Arizona Cardinals
arilist = [data['ari09'], data['ari10'], data['ari11'], data['ari12'], data['ari13'], data['ari14'], data['ari15'],
           data['ari16'], data['ari17'], data['ari18']]
ari = data['ari08'].append(arilist)

# Atlanta Falcons
atllist = [data['atl09'], data['atl10'], data['atl11'], data['atl12'], data['atl13'], data['atl14'], data['atl15'],
           data['atl16'], data['atl17'], data['atl18']]
atl = data['atl08'].append(atllist)

# Baltimore Ravens
ballist = [data['bal09'], data['bal10'], data['bal11'], data['bal12'], data['bal13'], data['bal14'], data['bal15'],
           data['bal16'], data['bal17'], data['bal18']]
bal = data['bal08'].append(ballist)

# Buffalo Bills
buflist = [data['buf09'], data['buf10'], data['buf11'], data['buf12'], data['buf13'], data['buf14'], data['buf15'],
           data['buf16'], data['buf17'], data['buf18']]
buf = data['buf08'].append(buflist)

# Carolina Panthers
carlist = [data['car09'], data['car10'], data['car11'], data['car12'], data['car13'], data['car14'], data['car15'],
           data['car16'], data['car17'], data['car18']]
car = data['car08'].append(carlist)

# Chicago Bears
chilist = [data['chi09'], data['chi10'], data['chi11'], data['chi12'], data['chi13'], data['chi14'], data['chi15'],
           data['chi16'], data['chi17'], data['chi18']]
chi = data['chi08'].append(chilist)

# Cincinnati Bengals
cinlist = [data['cin09'], data['cin10'], data['cin11'], data['cin12'], data['cin13'], data['cin14'], data['cin15'],
           data['cin16'], data['cin17'], data['cin18']]
cin = data['cin08'].append(cinlist)

# Cleveland Browns
clelist = [data['cle09'], data['cle10'], data['cle11'], data['cle12'], data['cle13'], data['cle14'], data['cle15'],
           data['cle16'], data['cle17'], data['cle18']]
cle = data['cle08'].append(clelist)

# Dallas Cowboys
dallist = [data['dal09'], data['dal10'], data['dal11'], data['dal12'], data['dal13'], data['dal14'], data['dal15'],
           data['dal16'], data['dal17'], data['dal18']]
dal = data['dal08'].append(dallist)

# Denver Broncos
denlist = [data['den09'], data['den10'], data['den11'], data['den12'], data['den13'], data['den14'], data['den15'],
           data['den16'], data['den17'], data['den18']]
den = data['den08'].append(denlist)

# Detroit Lions
detlist = [data['det09'], data['det10'], data['det11'], data['det12'], data['det13'], data['det14'], data['det15'],
           data['det16'], data['det17'], data['det18']]
det = data['det08'].append(detlist)

# Green Bay Packers
gnblist = [data['gnb09'], data['gnb10'], data['gnb11'], data['gnb12'], data['gnb13'], data['gnb14'], data['gnb15'],
           data['gnb16'], data['gnb17'], data['gnb18']]
gnb = data['gnb08'].append(gnblist)

# Houston Texans
houlist = [data['hou09'], data['hou10'], data['hou11'], data['hou12'], data['hou13'], data['hou14'], data['hou15'],
           data['hou16'], data['hou17'], data['hou18']]
hou = data['hou08'].append(houlist)

# Indianapolis Colts
indlist = [data['ind09'], data['ind10'], data['ind11'], data['ind12'], data['ind13'], data['ind14'], data['ind15'],
           data['ind16'], data['ind17'], data['ind18']]
ind = data['ind08'].append(indlist)

# Jacksonville Jaguars
jaxlist = [data['jax09'], data['jax10'], data['jax11'], data['jax12'], data['jax13'], data['jax14'], data['jax15'],
           data['jax16'], data['jax17'], data['jax18']]
jax = data['jax08'].append(jaxlist)

# Kansas City Chiefs
kanlist = [data['kan09'], data['kan10'], data['kan11'], data['kan12'], data['kan13'], data['kan14'], data['kan15'],
           data['kan16'], data['kan17'], data['kan18']]
kan = data['kan08'].append(kanlist)

# Los Angeles Chargers
laclist = [data['lac09'], data['lac10'], data['lac11'], data['lac12'], data['lac13'], data['lac14'], data['lac15'],
           data['lac16'], data['lac17'], data['lac18']]
lac = data['lac08'].append(laclist)

# Los Angeles Rams
larlist = [data['lar09'], data['lar10'], data['lar11'], data['lar12'], data['lar13'], data['lar14'], data['lar15'],
           data['lar16'], data['lar17'], data['lar18']]
lar = data['lar08'].append(larlist)

# Miami Dolphins
mialist = [data['mia09'], data['mia10'], data['mia11'], data['mia12'], data['mia13'], data['mia14'], data['mia15'],
           data['mia16'], data['mia17'], data['mia18']]
mia = data['mia08'].append(mialist)

# Minnesota Vikings
minlist = [data['min09'], data['min10'], data['min11'], data['min12'], data['min13'], data['min14'], data['min15'],
           data['min16'], data['min17'], data['min18']]
minn = data['min08'].append(minlist)

# New England Patriots
nwelist = [data['nwe09'], data['nwe10'], data['nwe11'], data['nwe12'], data['nwe13'], data['nwe14'], data['nwe15'],
           data['nwe16'], data['nwe17'], data['nwe18']]
nwe = data['nwe08'].append(nwelist)

# New Orleans Saints
norlist = [data['nor09'], data['nor10'], data['nor11'], data['nor12'], data['nor13'], data['nor14'], data['nor15'],
           data['nor16'], data['nor17'], data['nor18']]
nor = data['nor08'].append(norlist)

# New York Giants
nyglist = [data['nyg09'], data['nyg10'], data['nyg11'], data['nyg12'], data['nyg13'], data['nyg14'], data['nyg15'],
           data['nyg16'], data['nyg17'], data['nyg18']]
nyg = data['nyg08'].append(nyglist)

# New York Jets
nyjlist = [data['nyj09'], data['nyj10'], data['nyj11'], data['nyj12'], data['nyj13'], data['nyj14'], data['nyj15'],
           data['nyj16'], data['nyj17'], data['nyj18']]
nyj = data['nyj08'].append(nyjlist)

# Oakland Raiders
oaklist = [data['oak09'], data['oak10'], data['oak11'], data['oak12'], data['oak13'], data['oak14'], data['oak15'],
           data['oak16'], data['oak17'], data['oak18']]
oak = data['oak08'].append(oaklist)

# Philadelphia Eagles
philist = [data['phi09'], data['phi10'], data['phi11'], data['phi12'], data['phi13'], data['phi14'], data['phi15'],
           data['phi16'], data['phi17'], data['phi18']]
phi = data['phi08'].append(philist)

# Pittsburgh Steelers
pitlist = [data['pit09'], data['pit10'], data['pit11'], data['pit12'], data['pit13'], data['pit14'], data['pit15'],
           data['pit16'], data['pit17'], data['pit18']]
pit = data['pit08'].append(pitlist)

# San Francisco 49ers
sfolist = [data['sfo09'], data['sfo10'], data['sfo11'], data['sfo12'], data['sfo13'], data['sfo14'], data['sfo15'],
           data['sfo16'], data['sfo17'], data['sfo18']]
sfo = data['sfo08'].append(sfolist)

# Tampa Bay Buccaneers
tamlist = [data['tam09'], data['tam10'], data['tam11'], data['tam12'], data['tam13'], data['tam14'], data['tam15'],
           data['tam16'], data['tam17'], data['tam18']]
tam = data['tam08'].append(tamlist)

# Tennessee Titans
tenlist = [data['ten09'], data['ten10'], data['ten11'], data['ten12'], data['ten13'], data['ten14'], data['ten15'],
           data['ten16'], data['ten17'], data['ten18']]
ten = data['ten08'].append(tenlist)

# Washington Redskins
waslist = [data['was09'], data['was10'], data['was11'], data['was12'], data['was13'], data['was14'], data['was15'],
           data['was16'], data['was17'], data['was18']]
was = data['was08'].append(waslist)

In [6]:
# Append dataframes into teams and then append to total dataframe
teamlist = [atl, bal, buf, car, chi, cin, cle, dal, den, det, gnb, hou, ind, jax, kan, lac, lar, mia, minn, nwe, nor, nyg, nyj,
            oak, phi, pit, sfo, tam, ten, was]

team_data = ari.append(teamlist)

In [7]:
len(team_data)

5456

In [8]:
team_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TmScore,OppScore,off1stDn,offTotYd,offPassYd,offRushYd,offTO,def1stDn,defTotYdsAllowed,defPassYdsAllowed,defRushYdsAllowed,defTOGained
Team,Year,Week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Arizona Cardinals,2008,1,23,13,18,285,176,109,0,13,291,183,108,5
Arizona Cardinals,2008,2,31,10,22,445,364,81,0,17,236,164,72,0
Arizona Cardinals,2008,3,17,24,19,313,197,116,2,21,323,187,136,0
Arizona Cardinals,2008,4,35,56,33,468,426,42,7,23,373,284,89,1
Arizona Cardinals,2008,5,41,17,28,373,250,123,0,13,287,203,84,4
