In [1]:
# import here
import pandas as pd
from sqlalchemy import create_engine
from references_dict import Team_Dictionary

# connect to database
kaggle_engine = create_engine('mysql+pymysql://root:@localhost:3306/kaggle')
kaggle_conn = kaggle_engine.connect()
nfldb_engine = create_engine('mysql+pymysql://root:@localhost:3306/nfl_db')
nfldb_conn = nfldb_engine.connect()
file = ("D:\\NFLDB\\game_info.csv")

# trim csv file to relevant stats for weeks 1-16, 2009-2018
df = pd.read_csv(file)

# drop playoff weeks
indexNames = df[ df['schedule_playoff'] == True ].index
df.drop(indexNames,inplace=True)

# drop stats older than 2009
indexNames = df[ df['schedule_season'] < 2009 ].index
df.drop(indexNames,inplace=True)

In [8]:
def get_home_favorite(row):
	home_team = row['team_home']
	home_abbrev = Team_Dictionary().kaggle_games_abbrev[home_team]
	if home_abbrev == row['team_favorite_id']:
		return 1
	else:
		return 0

def get_spread_result(row):
    score_fav = 0
    score_und = 0
    spread = row['spread_favorite']*-1
    if(row['home_favorite']==1):
        score_fav = row['score_home']
        score_und = row['score_away']
    else:
        score_fav = row['score_away']
        score_und = row['score_home']
    diff = score_fav-score_und
    if( diff > spread ):
        return 1
    elif( diff < spread ):
        return -1
    else:
        return 0

def get_OU_result(row):
    OU = float(row['over_under_line'])
    total = row['score_home']+row['score_away']
    if( total > OU ):
        return 1
    elif( total < OU ):
        return -1
    else:
        return 0

def get_index(row):
    date = row['schedule_date']
    comps = date.split('/')
    date = comps[2]+'-'+comps[0]+'-'+comps[1]
    return date+Team_Dictionary().kaggle_games_abbrev[row['team_home']]

# # generate metrics for dataset, set index
df['home_favorite'] = df.apply (lambda row: get_home_favorite(row), axis=1)
df['spread_result'] = df.apply(lambda row: get_spread_result(row),axis=1)
df['OU_result'] = df.apply(lambda row: get_OU_result(row),axis=1)
df['idx'] = df.apply(lambda row: get_index(row),axis=1)
df.set_index('idx',inplace=True)

def get_pbpindex(row):
    team_dict = dict([[v,k] for k,v in Team_Dictionary().kaggle_plays_abbrev.items()])
    team_name = team_dict[row['home_team']]
    team_name = Team_Dictionary().kaggle_games_abbrev[team_name]
    comps = row['game_date'].split('/')
    print(row,comps)
    date = comps[2]+'-'+comps[0]+'-'+comps[1]
    idx = date+team_name
    idx = row['game_date']+team_name
    return idx

In [9]:
# sql statement for getting gameids
sql = "select distinct(pbp.game_id) as game_id, pbp.home_team as home_team, pbp.game_date as game_date \
       from nfl_pbp pbp \
       order by pbp.game_id"
df_gameids = pd.read_sql_query(sql, kaggle_conn, index_col=None)
df_gameids['idx'] = df_gameids.apply(lambda row: get_pbpindex(row),axis=1)
df_gameids.head()

game_id      2009091000
home_team           PIT
game_date     9/10/2009
Name: 0, dtype: object ['9', '10', '2009']
game_id      2009091300
home_team           ATL
game_date     9/13/2009
Name: 1, dtype: object ['9', '13', '2009']
game_id      2009091301
home_team           BAL
game_date     9/13/2009
Name: 2, dtype: object ['9', '13', '2009']
game_id      2009091302
home_team           CAR
game_date     9/13/2009
Name: 3, dtype: object ['9', '13', '2009']
game_id      2009091303
home_team           CIN
game_date     9/13/2009
Name: 4, dtype: object ['9', '13', '2009']
game_id      2009091304
home_team           CLE
game_date     9/13/2009
Name: 5, dtype: object ['9', '13', '2009']
game_id      2009091305
home_team           HOU
game_date     9/13/2009
Name: 6, dtype: object ['9', '13', '2009']
game_id      2009091306
home_team           IND
game_date     9/13/2009
Name: 7, dtype: object ['9', '13', '2009']
game_id      2009091307
home_team            NO
game_date     9/13/2009
Name: 8,

Name: 279, dtype: object ['9', '19', '2010']
game_id      2010091908
home_team           CAR
game_date     9/19/2010
Name: 280, dtype: object ['9', '19', '2010']
game_id      2010091909
home_team           DEN
game_date     9/19/2010
Name: 281, dtype: object ['9', '19', '2010']
game_id      2010091910
home_team           OAK
game_date     9/19/2010
Name: 282, dtype: object ['9', '19', '2010']
game_id      2010091911
home_team           NYJ
game_date     9/19/2010
Name: 283, dtype: object ['9', '19', '2010']
game_id      2010091912
home_team           CIN
game_date     9/19/2010
Name: 284, dtype: object ['9', '19', '2010']
game_id      2010091913
home_team            SD
game_date     9/19/2010
Name: 285, dtype: object ['9', '19', '2010']
game_id      2010091914
home_team           IND
game_date     9/19/2010
Name: 286, dtype: object ['9', '19', '2010']
game_id      2010092000
home_team            SF
game_date     9/20/2010
Name: 287, dtype: object ['9', '20', '2010']
game_id      201009

Name: 549, dtype: object ['9', '25', '2011']
game_id      2011092506
home_team           MIN
game_date     9/25/2011
Name: 550, dtype: object ['9', '25', '2011']
game_id      2011092507
home_team           CIN
game_date     9/25/2011
Name: 551, dtype: object ['9', '25', '2011']
game_id      2011092508
home_team           OAK
game_date     9/25/2011
Name: 552, dtype: object ['9', '25', '2011']
game_id      2011092509
home_team            SD
game_date     9/25/2011
Name: 553, dtype: object ['9', '25', '2011']
game_id      2011092510
home_team           STL
game_date     9/25/2011
Name: 554, dtype: object ['9', '25', '2011']
game_id      2011092511
home_team           CHI
game_date     9/25/2011
Name: 555, dtype: object ['9', '25', '2011']
game_id      2011092512
home_team           SEA
game_date     9/25/2011
Name: 556, dtype: object ['9', '25', '2011']
game_id      2011092513
home_team            TB
game_date     9/25/2011
Name: 557, dtype: object ['9', '25', '2011']
game_id      201109

game_id      2012100703
home_team           NYG
game_date     10/7/2012
Name: 835, dtype: object ['10', '7', '2012']
game_id      2012100704
home_team           MIN
game_date     10/7/2012
Name: 836, dtype: object ['10', '7', '2012']
game_id      2012100705
home_team           CIN
game_date     10/7/2012
Name: 837, dtype: object ['10', '7', '2012']
game_id      2012100706
home_team            KC
game_date     10/7/2012
Name: 838, dtype: object ['10', '7', '2012']
game_id      2012100707
home_team           CAR
game_date     10/7/2012
Name: 839, dtype: object ['10', '7', '2012']
game_id      2012100708
home_team           JAX
game_date     10/7/2012
Name: 840, dtype: object ['10', '7', '2012']
game_id      2012100709
home_team            NE
game_date     10/7/2012
Name: 841, dtype: object ['10', '7', '2012']
game_id      2012100710
home_team            SF
game_date     10/7/2012
Name: 842, dtype: object ['10', '7', '2012']
game_id      2012100711
home_team            NO
game_date     10

Name: 1110, dtype: object ['10', '13', '2013']
game_id      2013101310
home_team            NE
game_date    10/13/2013
Name: 1111, dtype: object ['10', '13', '2013']
game_id      2013101311
home_team            SF
game_date    10/13/2013
Name: 1112, dtype: object ['10', '13', '2013']
game_id      2013101312
home_team           DAL
game_date    10/13/2013
Name: 1113, dtype: object ['10', '13', '2013']
game_id      2013101400
home_team            SD
game_date    10/14/2013
Name: 1114, dtype: object ['10', '14', '2013']
game_id      2013101700
home_team           ARI
game_date    10/17/2013
Name: 1115, dtype: object ['10', '17', '2013']
game_id      2013102000
home_team           ATL
game_date    10/20/2013
Name: 1116, dtype: object ['10', '20', '2013']
game_id      2013102001
home_team           CAR
game_date    10/20/2013
Name: 1117, dtype: object ['10', '20', '2013']
game_id      2013102002
home_team           DET
game_date    10/20/2013
Name: 1118, dtype: object ['10', '20', '2013']
g

Name: 1389, dtype: object ['10', '26', '2014']
game_id      2014102604
home_team            KC
game_date    10/26/2014
Name: 1390, dtype: object ['10', '26', '2014']
game_id      2014102605
home_team            NE
game_date    10/26/2014
Name: 1391, dtype: object ['10', '26', '2014']
game_id      2014102606
home_team           NYJ
game_date    10/26/2014
Name: 1392, dtype: object ['10', '26', '2014']
game_id      2014102607
home_team            TB
game_date    10/26/2014
Name: 1393, dtype: object ['10', '26', '2014']
game_id      2014102608
home_team           TEN
game_date    10/26/2014
Name: 1394, dtype: object ['10', '26', '2014']
game_id      2014102609
home_team           ARI
game_date    10/26/2014
Name: 1395, dtype: object ['10', '26', '2014']
game_id      2014102610
home_team           CLE
game_date    10/26/2014
Name: 1396, dtype: object ['10', '26', '2014']
game_id      2014102611
home_team           PIT
game_date    10/26/2014
Name: 1397, dtype: object ['10', '26', '2014']
g

Name: 1673, dtype: object ['11', '15', '2015']
game_id      2015111506
home_team           BAL
game_date    11/15/2015
Name: 1674, dtype: object ['11', '15', '2015']
game_id      2015111507
home_team           PIT
game_date    11/15/2015
Name: 1675, dtype: object ['11', '15', '2015']
game_id      2015111508
home_team           OAK
game_date    11/15/2015
Name: 1676, dtype: object ['11', '15', '2015']
game_id      2015111509
home_team           NYG
game_date    11/15/2015
Name: 1677, dtype: object ['11', '15', '2015']
game_id      2015111510
home_team           DEN
game_date    11/15/2015
Name: 1678, dtype: object ['11', '15', '2015']
game_id      2015111511
home_team           SEA
game_date    11/15/2015
Name: 1679, dtype: object ['11', '15', '2015']
game_id      2015111600
home_team           CIN
game_date    11/16/2015
Name: 1680, dtype: object ['11', '16', '2015']
game_id      2015111900
home_team           JAX
game_date    11/19/2015
Name: 1681, dtype: object ['11', '19', '2015']
g

Name: 1954, dtype: object ['11', '24', '2016']
game_id      2016112700
home_team           ATL
game_date    11/27/2016
Name: 1955, dtype: object ['11', '27', '2016']
game_id      2016112701
home_team           BAL
game_date    11/27/2016
Name: 1956, dtype: object ['11', '27', '2016']
game_id      2016112702
home_team           BUF
game_date    11/27/2016
Name: 1957, dtype: object ['11', '27', '2016']
game_id      2016112703
home_team           CHI
game_date    11/27/2016
Name: 1958, dtype: object ['11', '27', '2016']
game_id      2016112705
home_team           HOU
game_date    11/27/2016
Name: 1959, dtype: object ['11', '27', '2016']
game_id      2016112706
home_team           MIA
game_date    11/27/2016
Name: 1960, dtype: object ['11', '27', '2016']
game_id      2016112707
home_team            NO
game_date    11/27/2016
Name: 1961, dtype: object ['11', '27', '2016']
game_id      2016112708
home_team            TB
game_date    11/27/2016
Name: 1962, dtype: object ['11', '27', '2016']
g

Name: 164, dtype: object ['11', '29', '2009']
game_id      2009112902
home_team           CIN
game_date    11/29/2009
Name: 165, dtype: object ['11', '29', '2009']
game_id      2009112903
home_team           HOU
game_date    11/29/2009
Name: 166, dtype: object ['11', '29', '2009']
game_id      2009112904
home_team           MIN
game_date    11/29/2009
Name: 167, dtype: object ['11', '29', '2009']
game_id      2009112905
home_team           NYJ
game_date    11/29/2009
Name: 168, dtype: object ['11', '29', '2009']
game_id      2009112906
home_team           PHI
game_date    11/29/2009
Name: 169, dtype: object ['11', '29', '2009']
game_id      2009112907
home_team           STL
game_date    11/29/2009
Name: 170, dtype: object ['11', '29', '2009']
game_id      2009112908
home_team           TEN
game_date    11/29/2009
Name: 171, dtype: object ['11', '29', '2009']
game_id      2009112909
home_team            SD
game_date    11/29/2009
Name: 172, dtype: object ['11', '29', '2009']
game_id   

Name: 410, dtype: object ['11', '21', '2010']
game_id      2010112110
home_team           STL
game_date    11/21/2010
Name: 411, dtype: object ['11', '21', '2010']
game_id      2010112111
home_team            SF
game_date    11/21/2010
Name: 412, dtype: object ['11', '21', '2010']
game_id      2010112112
home_team            NE
game_date    11/21/2010
Name: 413, dtype: object ['11', '21', '2010']
game_id      2010112113
home_team           PHI
game_date    11/21/2010
Name: 414, dtype: object ['11', '21', '2010']
game_id      2010112200
home_team            SD
game_date    11/22/2010
Name: 415, dtype: object ['11', '22', '2010']
game_id      2010112500
home_team           DET
game_date    11/25/2010
Name: 416, dtype: object ['11', '25', '2010']
game_id      2010112501
home_team           DAL
game_date    11/25/2010
Name: 417, dtype: object ['11', '25', '2010']
game_id      2010112502
home_team           NYJ
game_date    11/25/2010
Name: 418, dtype: object ['11', '25', '2010']
game_id   

Name: 654, dtype: object ['11', '13', '2011']
game_id      2011111312
home_team            SF
game_date    11/13/2011
Name: 655, dtype: object ['11', '13', '2011']
game_id      2011111313
home_team           NYJ
game_date    11/13/2011
Name: 656, dtype: object ['11', '13', '2011']
game_id      2011111400
home_team            GB
game_date    11/14/2011
Name: 657, dtype: object ['11', '14', '2011']
game_id      2011111700
home_team           DEN
game_date    11/17/2011
Name: 658, dtype: object ['11', '17', '2011']
game_id      2011112000
home_team           MIN
game_date    11/20/2011
Name: 659, dtype: object ['11', '20', '2011']
game_id      2011112001
home_team           ATL
game_date    11/20/2011
Name: 660, dtype: object ['11', '20', '2011']
game_id      2011112002
home_team           MIA
game_date    11/20/2011
Name: 661, dtype: object ['11', '20', '2011']
game_id      2011112003
home_team           CLE
game_date    11/20/2011
Name: 662, dtype: object ['11', '20', '2011']
game_id   

Name: 895, dtype: object ['11', '4', '2012']
game_id      2012110410
home_team           NYG
game_date     11/4/2012
Name: 896, dtype: object ['11', '4', '2012']
game_id      2012110411
home_team           ATL
game_date     11/4/2012
Name: 897, dtype: object ['11', '4', '2012']
game_id      2012110500
home_team            NO
game_date     11/5/2012
Name: 898, dtype: object ['11', '5', '2012']
game_id      2012110800
home_team           JAX
game_date     11/8/2012
Name: 899, dtype: object ['11', '8', '2012']
game_id      2012111100
home_team           CAR
game_date    11/11/2012
Name: 900, dtype: object ['11', '11', '2012']
game_id      2012111101
home_team            TB
game_date    11/11/2012
Name: 901, dtype: object ['11', '11', '2012']
game_id      2012111102
home_team           MIA
game_date    11/11/2012
Name: 902, dtype: object ['11', '11', '2012']
game_id      2012111103
home_team            NE
game_date    11/11/2012
Name: 903, dtype: object ['11', '11', '2012']
game_id      20

Name: 1141, dtype: object ['10', '27', '2013']
game_id      2013102800
home_team           STL
game_date    10/28/2013
Name: 1142, dtype: object ['10', '28', '2013']
game_id      2013103100
home_team           MIA
game_date    10/31/2013
Name: 1143, dtype: object ['10', '31', '2013']
game_id      2013110300
home_team           BUF
game_date     11/3/2013
Name: 1144, dtype: object ['11', '3', '2013']
game_id      2013110301
home_team           CAR
game_date     11/3/2013
Name: 1145, dtype: object ['11', '3', '2013']
game_id      2013110302
home_team           DAL
game_date     11/3/2013
Name: 1146, dtype: object ['11', '3', '2013']
game_id      2013110303
home_team           NYJ
game_date     11/3/2013
Name: 1147, dtype: object ['11', '3', '2013']
game_id      2013110304
home_team           STL
game_date     11/3/2013
Name: 1148, dtype: object ['11', '3', '2013']
game_id      2013110305
home_team           WAS
game_date     11/3/2013
Name: 1149, dtype: object ['11', '3', '2013']
game_id

Name: 1376, dtype: object ['10', '19', '2014']
game_id      2014101906
home_team           JAX
game_date    10/19/2014
Name: 1377, dtype: object ['10', '19', '2014']
game_id      2014101907
home_team           STL
game_date    10/19/2014
Name: 1378, dtype: object ['10', '19', '2014']
game_id      2014101908
home_team           WAS
game_date    10/19/2014
Name: 1379, dtype: object ['10', '19', '2014']
game_id      2014101909
home_team            SD
game_date    10/19/2014
Name: 1380, dtype: object ['10', '19', '2014']
game_id      2014101910
home_team           DAL
game_date    10/19/2014
Name: 1381, dtype: object ['10', '19', '2014']
game_id      2014101911
home_team           OAK
game_date    10/19/2014
Name: 1382, dtype: object ['10', '19', '2014']
game_id      2014101912
home_team           DEN
game_date    10/19/2014
Name: 1383, dtype: object ['10', '19', '2014']
game_id      2014102000
home_team           PIT
game_date    10/20/2014
Name: 1384, dtype: object ['10', '20', '2014']
g

Name: 1626, dtype: object ['10', '22', '2015']
game_id      2015102500
home_team           JAX
game_date    10/25/2015
Name: 1627, dtype: object ['10', '25', '2015']
game_id      2015102501
home_team           STL
game_date    10/25/2015
Name: 1628, dtype: object ['10', '25', '2015']
game_id      2015102502
home_team           MIA
game_date    10/25/2015
Name: 1629, dtype: object ['10', '25', '2015']
game_id      2015102503
home_team            NE
game_date    10/25/2015
Name: 1630, dtype: object ['10', '25', '2015']
game_id      2015102504
home_team            KC
game_date    10/25/2015
Name: 1631, dtype: object ['10', '25', '2015']
game_id      2015102505
home_team           WAS
game_date    10/25/2015
Name: 1632, dtype: object ['10', '25', '2015']
game_id      2015102506
home_team           DET
game_date    10/25/2015
Name: 1633, dtype: object ['10', '25', '2015']
game_id      2015102507
home_team           IND
game_date    10/25/2015
Name: 1634, dtype: object ['10', '25', '2015']
g

Name: 1870, dtype: object ['10', '16', '2016']
game_id      2016101602
home_team           DET
game_date    10/16/2016
Name: 1871, dtype: object ['10', '16', '2016']
game_id      2016101603
home_team           MIA
game_date    10/16/2016
Name: 1872, dtype: object ['10', '16', '2016']
game_id      2016101604
home_team            NE
game_date    10/16/2016
Name: 1873, dtype: object ['10', '16', '2016']
game_id      2016101605
home_team            NO
game_date    10/16/2016
Name: 1874, dtype: object ['10', '16', '2016']
game_id      2016101606
home_team           NYG
game_date    10/16/2016
Name: 1875, dtype: object ['10', '16', '2016']
game_id      2016101607
home_team           TEN
game_date    10/16/2016
Name: 1876, dtype: object ['10', '16', '2016']
game_id      2016101608
home_team           WAS
game_date    10/16/2016
Name: 1877, dtype: object ['10', '16', '2016']
game_id      2016101609
home_team           OAK
game_date    10/16/2016
Name: 1878, dtype: object ['10', '16', '2016']
g

IndexError: ('list index out of range', 'occurred at index 2046')

In [4]:
df.head()

Unnamed: 0_level_0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,home_favorite,spread_result,OU_result
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2009-09-10PIT,09/10/2009,2009,1,False,Pittsburgh Steelers,13,10,Tennessee Titans,PIT,-6.5,35.0,Heinz Field,False,67.0,9.0,64.0,,1,-1,-1
2009-09-13ARI,09/13/2009,2009,1,False,Arizona Cardinals,16,20,San Francisco 49ers,ARI,-4.5,45.0,University of Phoenix Stadium,False,72.0,0.0,,DOME,1,-1,-1
2009-09-13ATL,09/13/2009,2009,1,False,Atlanta Falcons,19,7,Miami Dolphins,ATL,-4.0,44.5,Georgia Dome,False,72.0,0.0,,DOME,1,1,-1
2009-09-13BAL,09/13/2009,2009,1,False,Baltimore Ravens,38,24,Kansas City Chiefs,BAL,-13.0,36.5,M&T Bank Stadium,False,69.0,7.0,74.0,,1,1,1
2009-09-13CAR,09/13/2009,2009,1,False,Carolina Panthers,10,38,Philadelphia Eagles,PHI,-2.5,43.5,Bank of America Stadium,False,77.0,7.0,59.0,,0,1,1
