In [None]:
TODO:
#Use player scores to inform priors
#Use first few weeks to assess "market confidence"

#Incorporate data from:
#- Sequential Home/Away
#- Player Scores
#- Injuries

In [21]:
import numpy as np
import pandas as pd
import datetime

In [30]:
#Import basic game stats for each NFL game from 2013-2017 seasons
cols = ['Key','Pts','FD','Fum','FumL','PY','PA','PI','PS', 'PSY', 'RA', 'RY']

for y in range(2013,2018):
    #Imports and creates initial season game files
    game_temp = pd.read_csv("base_data/nflstats" + str(y) + ".csv")
    game_temp.rename(index = str, columns = {'HPS': 'APS', 'HPSY': 'APSY', 'HPS.1':'HPS', 'HPSY.1':'HPSY'}, inplace = True)
    game_temp['Datetime'] = pd.to_datetime(game_temp['Start'])
    game_temp['Date'] = game_temp['Datetime'].map(lambda x: x.date())
    game_temp['Key'] = game_temp['Datetime'].dt.strftime('%Y%m%d') + game_temp['Home']
    game_temp['Spread'] = game_temp['HPts'] - game_temp['APts']
    date_temp = game_temp[['Key', 'Home', 'Away', 'Date']]

    #New base dataframe to join everything onto
    game_final = game_temp[['Key','Datetime','Home','Away','Spread']]
    first_team = True

    #Calculate all the cumulative stats
    teams = game_temp['Home'].unique()
    for t in teams:
        team_stats = weekly_team_stats(game_temp, t, cols)
        
        #Home merge
        right = team_stats.add_prefix('H')
        right.rename(index = str, columns = {'HKey': 'Key'}, inplace = True)
        right['Home'] = t
        game_final = game_final.merge(right, how = 'left', on = ['Key', 'Home'])
        
        #Away merge
        right = team_stats.add_prefix('A')
        right.rename(index = str, columns = {'AKey': 'Key'}, inplace = True)
        right['Away'] = t
        game_final = game_final.merge(right, how = 'left', on = ['Key', 'Away'])
        
        #Take care of dup columns
        if first_team:
            first_team = False
        else:
            for col in cols:
                if col == 'Key':
                    next
                else:
                    game_final['A'+col] = game_final[['A'+col+'_x', 'A'+col+'_y']].sum(axis = 1)
                    game_final['H'+col] = game_final[['H'+col+'_x', 'H'+col+'_y']].sum(axis = 1)
                    game_final.drop(['A'+col+'_x', 'A'+col + '_y', 'H'+col+'_x', 'H'+col + '_y'], axis = 1, inplace = True)
    
        
    #Timezone Features
    timezones = pd.read_csv("timezone_data/timezones" + str(y) + ".csv")
    
    Away_timezones = timezones[['Team_Code','Team_Zone','Zone_Value']]
    Away_timezones = Away_timezones.rename(index=str, columns = {"Team_Code":"Away","Team_Zone":"AZone","Zone_Value":"AZoneVal"})

    Home_timezones = timezones[['Team_Code','Team_Zone','Zone_Value']]
    Home_timezones = Home_timezones.rename(index=str, columns = {"Team_Code":"Home","Team_Zone":"HZone","Zone_Value":"HZoneVal"})

    game_final = game_final.join(Away_timezones.set_index('Away'), on='Away',how = 'left')
    game_final = game_final.join(Home_timezones.set_index('Home'), on='Home',how = 'left')
    
    #Timezone diff
    game_final = game_final.assign(AZDiff = lambda x: game_final.HZoneVal - game_final.AZoneVal)
    
    #Days since last Game
    Home_date, Away_date = time_since_last_game(date_temp)
    game_final = game_final.join(Away_date.set_index('Key'), on='Key',how = 'left')
    game_final = game_final.join(Home_date.set_index('Key'), on='Key',how = 'left')
    game_final = game_final.assign(H_LWGAdv = lambda x: game_final.H_LWHomeG - game_final.A_LWHomeG)

    #Stadium Features (only join on Home)
    stadiums = pd.read_csv("stadiums_data/stadiums" + str(y) + ".csv")
    stadiums = stadiums[['Team_Code','Turf','Grass','Hybrid','Roof_Open','Roof_Fixed','Roof_Retract']]
    stadiums.rename(index = str, columns={'Team_Code':'Home'}, inplace = True)
    
    game_final = game_final.join(stadiums.set_index('Home'), on = 'Home', how = 'left')
    
    game_final.to_csv('processed_data/'+str(y)+"processed_extra.csv", index = False)

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
#Takes game stats for each week and breaks them into dataframes of cumulative prior week averages for each team
#This is the data we want to use for prediction
def weekly_team_stats(stats, team, cols):
    t_stats = stats.query('Home == @team | Away == @team')
    weekly_stats = pd.DataFrame(columns = cols)
    
    #Get our "prior" for the season
    avg_stats = team_stat_priors(team, cols)
    count = 3
    
    for index,week in t_stats.iterrows():
        week = week.to_dict()
        avg_stats['Key'] = week['Key']
             
        #Add avg stats so far as entry for current week
        weekly_stats = weekly_stats.append(avg_stats, ignore_index = True)
        
        #Get stats for this week
        if team == week['Home']:
            prefix = 'H'
            week.pop('Home')
        else:
            prefix = 'A'
            week.pop('Away')
        w_stats = {k[1:]: v for k, v in week.items() if k[0] == prefix}
        
        #Compute cumulative avgs including this week
        count += 1
        weight = 1/count
        for key,val in avg_stats.items(): 
            if key == 'Key':
                avg_stats[key] = "Bad Key"
            else:
                avg_stats[key] = val*(1-weight) + w_stats[key]*weight
                
    return weekly_stats

In [4]:
#Generates 3 weeks of a "prior" data to use in the cumulative stat averaging
#Helps to make predicitons early in the season, diminishes in contribution
#To the average as season progresses
def team_stat_priors(team, cols):
    return {k:0 for k in cols}

In [29]:
#Generates columns for time since last game feature. Adapted from time since last game notebook

def time_since_last_game(date_frame):
    #create empty dataframes
    Away_team = pd.DataFrame(columns = date_frame.columns)
    Home_team = pd.DataFrame(columns = date_frame.columns)
    
    teams = date_frame['Home'].unique()

    for team in teams:
        #create team specific table 
        t_table = date_frame.loc[(date_frame['Away'] == team) | (date_frame['Home'] == team)]

        #sort t_table by DT_date 
        t_table = t_table.sort_values(by = ['Date'])

        #create boolean for Last Week Home Game -- week 1 starts as 1 for every team
        LW_HG = []

        for h in range(0,len(t_table['Home'])):
            if h == 0:
                val = 1
                LW_HG.append(val)  
            else:
                if t_table['Home'][h-1] == team:
                    val = 1
                else:
                    val = 0
                LW_HG.append(val)

        #create temp dataframe for LW_Home -- note column name is number 0
        LWHG = pd.DataFrame(data = LW_HG)

        #reset indexes, join tables, rename column
        t_table = t_table.reset_index(drop = True)
        t_table = t_table.join(LWHG)
        t_table = t_table.rename(index=str, columns = {0:"LW_HomeGame"})


        #create boolean for Last Week Away Game -- week 1 starts as 0 for every team
        LW_AG = []

        for a in range(0,len(t_table['Away'])):
            if a == 0:
                val = 0
                LW_AG.append(val)  
            else:
                if t_table['Away'][a-1] == team:
                    val = 1
                else:
                    val = 0
                LW_AG.append(val)

        #create temp dataframe for LW_Home -- note column name is number 0
        LWAG = pd.DataFrame(data = LW_AG)

        #reset indexes, join tables, rename column
        t_table = t_table.reset_index(drop = True)
        t_table = t_table.join(LWAG)
        t_table = t_table.rename(index=str, columns = {0:"LW_AwayGame"}) 


        #create Days Since Last Game 
        DT_Days_LastGame = []

        for i in range(0,len(t_table['Date'])):
            if i == 0:
                #first week set to 14 for all teams to act as rest week
                val = t_table['Date'][i] - (t_table['Date'][i] - datetime.timedelta(days=14))
                DT_Days_LastGame.append(val)  
            else:
                val = t_table['Date'][i] - t_table['Date'][i-1]
                DT_Days_LastGame.append(val)


        #create integer values for days since last game
        Days_SLG = []

        for i in range(0,len(DT_Days_LastGame)):
            days = DT_Days_LastGame[i].total_seconds()/(60*60*24)
            Days_SLG.append(round(days))

        #create temp dataframe for Days_SLG -- note column name is number 0
        DSLG = pd.DataFrame(data = Days_SLG)

        #reset indexes, join tables, rename column
        t_table = t_table.reset_index(drop = True)
        t_table = t_table.join(DSLG)
        t_table = t_table.rename(index=str, columns = {0:"DaysSLG"})   


        #breakup into home games and away games. Create new or append to exisiting tables
        H_team = t_table.loc[t_table['Home'] == team]
        Home_team = Home_team.append(H_team,ignore_index=True)

        A_team = t_table.loc[t_table['Away'] == team]
        Away_team = Away_team.append(A_team,ignore_index=True)
        
    #drop unnecessary columns Away table & rename columns
    Away_team = Away_team.drop(['Away', 'Date', 'Home'], axis = 1)
    Away_team = Away_team.rename(index=str, columns = {'DaysSLG':"A_DaysSLG", 'LW_HomeGame':"A_LWHomeG",'LW_AwayGame': "A_LWAwayG"})

    #drop unnecessary columns Home table & rename columns
    Home_team = Home_team.drop(['Away', 'Date', 'Home'], axis = 1)
    Home_team = Home_team.rename(index=str, columns = {'DaysSLG':"H_DaysSLG", 'LW_HomeGame':"H_LWHomeG",'LW_AwayGame': "H_LWAwayG"})

    return Home_team, Away_team

In [19]:
#Debugging
teams = game_stats['Home'].unique()
team_stats = {}
for t in teams:
    team_stats.update({t:weekly_team_stats(game_stats, t)})

In [21]:
#Debugging
print(team_stats['NYJ'])

            Key      Pts       FD      Fum      FumL       PY       PA  \
0             0        0        0        0         0        0        0   
1   20140907NYJ       19       20        2         1      190       29   
2   20140914GNB     21.5     19.5      1.5       0.5      178     30.5   
3   20140922NYJ  20.6667       20  2.66667  0.666667  218.667  34.6667   
4   20140928NYJ    19.75    19.25      2.5      0.75      215    34.25   
5   20141005SDG     15.8     17.6      2.2       0.8      184     33.6   
6   20141012NYJ       16  17.1667  2.16667  0.833333  182.167  35.1667   
7   20141016NWE  17.2857  18.7143        2  0.714286  185.429       35   
8   20141026NYJ       18   19.125     2.25     0.875  179.375   36.125   
9   20141102KAN  17.1111  19.4444  2.11111  0.777778  184.444  36.1111   
10  20141109NYJ     17.4       19      1.9       0.7    178.5     34.3   
11  20141124BUF  16.0909  18.2727  1.72727  0.636364  173.727       34   
12  20141201NYJ  15.8333    18.25  1.6