In [1]:
import numpy as np
import pandas as pd
import random
import statsmodels.api as sm
from joblib import dump, load
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Download and prep Data

In [2]:
#download data
!aws s3 cp s3://capstone-nfl-data/NFL_PBP_Data_2010_2022_original.parquet NFL_PBP_Data_2010_2022_original.parquet

download: s3://capstone-nfl-data/NFL_PBP_Data_2010_2022_original.parquet to ./NFL_PBP_Data_2010_2022_original.parquet


In [3]:
#Load Data
df = pd.read_parquet('NFL_PBP_Data_2010_2022_original.parquet', engine='pyarrow')
df.head()
print(df.shape)

(588078, 384)


In [4]:
df = df[df['play_type'].isin(['pass', 'run', 'punt', 'extra_point', 'field_goal']) ]
df = df[~(df['game_half'] == 'Overtime' )]

In [5]:
def transform_off_personnel(row):

   rb_count = 0
   te_count = 0
   wr_count = 0
   ol_count = 0
   dl_count = 0
   db_count = 0

   if not pd.isna(row['offense_personnel']):
       personnel = row['offense_personnel'].split(', ')
       for p in personnel:
           if p[2:4] == 'RB':
               rb_count = int(p[0])
           elif p[2:4] == 'TE':
                te_count = int(p[0])
           elif p[2:4] == 'WR':
                wr_count = int(p[0])
           elif p[2:4] == 'OL':
                ol_count = int(p[0])
           elif p[2:4] == 'DL':
                dl_count = int(p[0])
           elif p[2:4] == 'DB':
               db_count = int(p[0])
       
   formation = str(rb_count) + str(te_count)
   return pd.Series([rb_count,te_count,wr_count,ol_count,dl_count, db_count, formation])

df[['off_rb_count','off_te_count','off_wr_count','off_ol_count','off_dl_count', 'off_db_count', 'formation']] = df.apply(transform_off_personnel, axis=1)

In [6]:
def transform_def_personnel(row):

   dl_count = 0
   db_count = 0
   lb_count = 0
   rb_count = 0
   wr_count = 0
   ol_count = 0
   
   if not pd.isna(row['defense_personnel']):
       personnel = row['defense_personnel'].split(', ')
       for p in personnel:
           if p[2:4] == 'LB':
                lb_count = int(p[0])
           elif p[2:4] == 'DL':
                dl_count = int(p[0])
           elif p[2:4] == 'DB':
                db_count = int(p[0])
           elif p[2:4] == 'WR':
                wr_count = int(p[0])
           elif p[2:4] == 'RB':
                rb_count = int(p[0])
           elif p[2:4] == 'OL':
                ol_count = int(p[0])
                
   return pd.Series([dl_count,db_count,lb_count,rb_count,wr_count,ol_count])

df[['def_dl_count','def_db_count','def_lb_count','def_rb_count','def_wr_count','def_ol_count']] = df.apply(transform_def_personnel, axis=1)

In [7]:
def PlayType_normalized(s):
  if s['play_type_nfl'] == 'PASS' and s['pass_location'] != None and s['pass_length'] != None: 
    return 'PASS' + '_' + str(s['pass_location']).upper() + '_' + str(s['pass_length']).upper()
  elif s['play_type_nfl'] == 'RUSH' and s['run_location'] != None: 
    return 'RUSH' + '_' + str(s['run_location']).upper()
  else:
    return 'OTHER'
df['PlayType_normalized'] = df.apply(PlayType_normalized, axis=1)

In [8]:
df = df[~(df['PlayType_normalized'] == 'OTHER' )]

In [9]:
bins = [0,10,20,30,40,50,60,70,80,90,100]
df['yardline_binned'] = pd.cut(df['yardline_100'], bins)

In [10]:
def red_zone(row): 
    if row.yardline_100 <= 20: 
        return 1
    else: 
        return 0

df['red_zone'] = df.apply(red_zone, axis=1)
# plt.hist(df.red_zone)

In [11]:
def ydstogo_binning(row): 
    if not row.red_zone: 
        if row.ydstogo > 10: 
            return 'long'
        elif row.ydstogo > 5: 
            return '10'
        else:
            return '5'
    
    if row.red_zone:
        if row.ydstogo > 10: 
            return 'long'
        elif row.ydstogo > 5:
            return '10'
        elif row.ydstogo > 3:
            return '5'
        else:
            return '3'
        
df['ydstogo_binned'] = df.apply(ydstogo_binning, axis=1)
# plt.hist(df[['ydstogo_binned']])

In [12]:
def score_diff_possession(row): 
    diff = row.score_differential
    if diff > 7: 
        return 2
    elif diff > 0 and diff < 7: 
        return 1
    elif diff == 0: 
        return 0
    elif diff < 0 and diff > -7: 
        return -1
    elif diff < -7: 
        return -2
    else:
        return None

df['poss_differential'] = df.apply(score_diff_possession, axis=1)
# plt.hist(df.poss_differential)

In [13]:
operson = np.array(df['offense_personnel'])

num_rb = []
num_te = []
num_wr = []

def personnel (formation):
    """Finding the number of RB's, TE's, WR's in each formation
    """
    for i in range(len(formation)):
        # takes the personnel and saves it as 'form'
        form = formation[i]
        try:
            rbloc = form.find('RB')-2
            num_rb.append(int(form[rbloc]))
        except: 
            num_rb.append(int(0))
        try:
            teloc = form.find('TE')-2
            num_te.append(int(form[teloc]))
        except: 
            num_te.append(int(0))
        try:
            wrloc = form.find('WR')-2
            num_wr.append(int(form[wrloc]))
        except: 
            num_wr.append(int(0))
            
personnel(operson)

# appending the new columns to the df
df['num_rb'] = num_rb
df['num_te'] = num_te
df['num_wr'] = num_wr

# get dummy for down
dummy = pd.get_dummies(df['down'])
dummy.columns = ['firstdown', 'seconddown', 'thirddown', 'fourthdown']
df = pd.concat([df, dummy], axis=1) 
# drop fourthdown to avoid multicollinearity
df = df.drop(columns=['fourthdown'])

# get dummy for offensive team
posteamdummy = pd.get_dummies(df['posteam'])
posteamdummy.columns = ['oARI', 'oATL', 'oBAL', 'oBUF', 'oCAR', 'oCHI', 'oCIN', 'oCLE', 'oDAL', 
                        'oDEN','oDET', 'oGB', 'oHOU', 'oIND', 'oJAX', 'oKC', 'oLA', 'oLAC', 'oLV', 
                        'oMIA', 'oMIN','oNE', 'oNO', 'oNYG', 'oNYJ', 'oPHI', 'oPIT', 'oSEA', 'oSF', 
                        'oTB', 'oTEN', 'oWAS']
df = pd.concat([df, posteamdummy], axis=1) 
# drop oWAS to avoid multicollinearity
df = df.drop(columns=['oWAS'])

# get dummy for defensive team
defteamdummy = pd.get_dummies(df['defteam'])
defteamdummy.columns = ['dARI', 'dATL', 'dBAL', 'dBUF', 'dCAR', 'dCHI', 'dCIN', 'dCLE', 'dDAL', 
                        'dDEN','dDET', 'dGB', 'dHOU', 'dIND', 'dJAX', 'dKC', 'dLA', 'dLAC', 'dLV', 
                        'dMIA', 'dMIN','dNE', 'dNO', 'dNYG', 'dNYJ', 'dPHI', 'dPIT', 'dSEA', 'dSF', 
                        'dTB', 'dTEN', 'dWAS']
df = pd.concat([df, defteamdummy], axis=1) 
# drop dWAS to avoid multicollinearity
df = df.drop(columns=['dWAS'])

# get dummy for each season
# it looks like 2016 is the start of when they tracked personnel
seasondummy = pd.get_dummies(df['season'])
seasondummy.columns = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']
df = pd.concat([df, seasondummy], axis=1) 
# drop 2022 to avoid multicollinearity
df = df.drop(columns=['2022'])

df.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
2,58.0,2010_01_ARI_STL,2010091208,LA,ARI,REG,1,ARI,away,LA,...,0,0,0,0,0,0,0,0,0,0
3,82.0,2010_01_ARI_STL,2010091208,LA,ARI,REG,1,ARI,away,LA,...,0,0,0,0,0,0,0,0,0,0
4,103.0,2010_01_ARI_STL,2010091208,LA,ARI,REG,1,ARI,away,LA,...,0,0,0,0,0,0,0,0,0,0
5,132.0,2010_01_ARI_STL,2010091208,LA,ARI,REG,1,ARI,away,LA,...,0,0,0,0,0,0,0,0,0,0
6,156.0,2010_01_ARI_STL,2010091208,LA,ARI,REG,1,ARI,away,LA,...,0,0,0,0,0,0,0,0,0,0


In [14]:
df['poss_differential_2'] = df['poss_differential'].map(lambda p: 1 if p == 2 else 0)
df['poss_differential_1'] = df['poss_differential'].map(lambda p: 1 if p == 1 else 0)
df['poss_differential_0'] = df['poss_differential'].map(lambda p: 1 if p == 0 else 0)
df['poss_differential_-1']= df['poss_differential'].map(lambda p: 1 if p == -1 else 0)
df['poss_differential_-2']= df['poss_differential'].map(lambda p: 1 if p == -2 else 0)

In [15]:
df['first_down_flag'] = df['down'].map(lambda p: 1 if p == 1 else 0)
df['second_down_flag']= df['down'].map(lambda p: 1 if p == 2 else 0)
df['third_down_flag'] = df['down'].map(lambda p: 1 if p == 3 else 0)
df['forth_down_flag'] = df['down'].map(lambda p: 1 if p == 4 else 0)

  df['second_down_flag']= df['down'].map(lambda p: 1 if p == 2 else 0)
  df['third_down_flag'] = df['down'].map(lambda p: 1 if p == 3 else 0)
  df['forth_down_flag'] = df['down'].map(lambda p: 1 if p == 4 else 0)


In [16]:
def clock_binned(row): 
    if row.play_clock == None: 
        return None
    diff = int(row.play_clock)
    if diff > 20: 
        return 'Greater_than_20 sec'
    elif diff >= 11 and diff <= 20: 
        return 'Between 11 and 20 sec'
    elif diff == 0: 
        return 'clock run-out'
    elif diff >= 5 and diff <= 10: 
        return 'Between 5 and 10 sec'
    elif diff > 0 and diff <= 5 : 
        return 'Between 0 and 5'
    else:
        return None

#applyig on the data set    
df['Play_clock_categorized'] = df.apply(clock_binned, axis=1)

#Creating binary columns for Play_clock_categorized

df['Play_clock_Greater_than_20'] = df['Play_clock_categorized'].map(lambda p: 1 if p == 'Greater_than_20 sec' else 0)
df['Play_clock_Between_11_and_20'] = df['Play_clock_categorized'].map(lambda p: 1 if p == 'Between 11 and 20 sec' else 0)
df['Play_clock_equal_0'] = df['Play_clock_categorized'].map(lambda p: 1 if p == 'clock run-out' else 0)
df['Play_clock_Between_5_and_10'] = df['Play_clock_categorized'].map(lambda p: 1 if p == 'Between 5 and 10 sec' else 0)
df['Play_clock_errors'] = df['Play_clock_categorized'].map(lambda p: 1 if p == None else 0)

In [17]:
temp_df = df[['posteam']]
temp_df = pd.get_dummies(temp_df['posteam'], prefix='pos')
df = pd.concat([df, temp_df], axis=1)

In [18]:
temp_df = df[['defteam']]
temp_df= pd.get_dummies(temp_df['defteam'], prefix='def')
df = pd.concat([df, temp_df], axis=1)

In [19]:
temp_df = df[['season']]
temp_df= pd.get_dummies(temp_df['season'])
df = pd.concat([df, temp_df], axis=1)

In [24]:
# Creating more detailed pass & run categories
# temp_df = df[['PlayType_normalized','pass_location','pass_length', 'run_location','run_gap']]

def Play_type_detailed(s):
  if 'PASS' in s['PlayType_normalized'] and s['pass_location'] != None and s['pass_length'] != None: 
    return 'PASS' + '_' + str(s['pass_location']).upper() + '_' + str(s['pass_length']).upper()
  elif 'RUSH' in s['PlayType_normalized'] and s['run_location'] != None and s['run_gap'] != None: 
    return 'RUSH' + '_' + str(s['run_location']).upper()+ '_' + str(s['run_gap']).upper()
  else:
    return 'OTHER'
# temp_df['Play_type_detailed'] = temp_df.apply(Play_type_detailed, axis=1)
# temp_df = temp_df[~(temp_df['Play_type_detailed'] == 'OTHER' )]

# #validation
# temp_df = temp_df.drop_duplicates(subset=['Play_type_detailed'])
# temp_df

# applyin on the data model

df['Play_type_detailed'] = df.apply(Play_type_detailed, axis=1)
# df = df[~(df['Play_type_detailed'] == 'OTHER' )]

In [25]:
print(df.Play_type_detailed.unique())

['PASS_RIGHT_SHORT' 'RUSH_LEFT_END' 'RUSH_LEFT_GUARD' 'RUSH_RIGHT_TACKLE'
 'RUSH_LEFT_TACKLE' 'PASS_LEFT_SHORT' 'RUSH_RIGHT_END' 'PASS_MIDDLE_SHORT'
 'PASS_RIGHT_DEEP' 'PASS_LEFT_DEEP' 'OTHER' 'PASS_MIDDLE_DEEP'
 'RUSH_RIGHT_GUARD' 'RUSH_MIDDLE_END']


In [26]:
temp_df = df[['Play_type_detailed']]
temp_df= pd.get_dummies(temp_df['Play_type_detailed'], prefix='PTDetailed')
df = pd.concat([df, temp_df], axis=1)

# Create Bandit

In [17]:
class BaselineContextualBandit: 
    def __init__(self, arms, e, yg_model, yg_features): 
        self.arms = arms
        self.e = e
        self.n = 0
        self.mean_reward = 0
        self.context_rewards = {}
        self.context_n = {}
        self.match_count = 0
        self.success_count = 0
        self.yg_model = yg_model
        self.yg_features = yg_features
        
    def get_defaults(self, red_zone, down, yds_to_go): 
            
            yards_gained = -1
            first_down = -1
            touchdown = -1
            
            
            if red_zone == 0: 

                if down == 1 and (yds_to_go == 'long' or yds_to_go == '10'): 
                    yards_gained = 0.9
                    first_down = 0.1
                    touchdown = 0
                elif down == 1 and yds_to_go == '5':
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 2 and yds_to_go == 'long': 
                    yards_gained = 0.8
                    first_down = 0.2
                    touchdown = 0
                elif down == 2 and yds_to_go == '10': 
                    yards_gained = 0.75
                    first_down = 0.25
                    touchdown = 0
                elif down == 2 and yds_to_go == '5': 
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 3 and yds_to_go == 'long': 
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 3 and yds_to_go == '10': 
                    yards_gained = 0.1
                    first_down = 0.9
                    touchdown = 0
                elif down == 3 and yds_to_go == '5': 
                    yards_gained = 0
                    first_down = 1
                    touchdown = 0
                elif down == 4 : 
                    yards_gained = 0
                    first_down = 1
                    touchdown = 0
            
            elif red_zone == 1: 
                
                if yds_to_go == 'long' or yds_to_go == '10': 
                    yards_gained = 0.5
                    first_down = 0
                    touchdown = 0.5
                elif yds_to_go == '5':
                    yards_gained = 0
                    first_down = 0
                    touchdown = 1
    
            return (yards_gained, first_down, touchdown)
    
    def get_reward_weights(self, quarter, down, yds_to_go, red_zone, poss_diff): 
        reward_weights = {}
        
        yards_gained = -1
        first_down = -1
        touchdown = -1
        
        if poss_diff == 2 and quarter == 1:
            
            if red_zone == 0 and down == 3 and yds_to_go == 10: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            
            elif red_zone == 1: 
                if yds_to_go == '10' or yds_to_go == 'long': 
                    yards_gained = 0.75
                    first_down = 0
                    touchdown = 0.25
                elif yds_to_go == '5':
                    yards_gained = 0.5
                    first_down = 0
                    touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        
        elif poss_diff == -1 and quarter == 1: 
            if down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        
        elif poss_diff == -2 and quarter == 1: 
            if down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.2
                first_down = 0.8
                touchdown = 0
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 2 and (quarter == 2 or quarter == 3):
            if down == 3 and yds_to_go == '10' and red_zone == 0:
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10':
                yards_gained = 0.75
                first_down = 0
                touchdown = 0.25
            elif red_zone == 1 and yds_to_go == '5':
                yards_gained = 0.5
                first_down = 0
                touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -1 and (quarter == 2 or quarter == 3): 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and yds_to_go == 'long': 
                yards_gained = 0.25
                first_down = 0.5
                touchdown = 0.25
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -2 and (quarter == 2 or quarter == 3): 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0:
                yards_gained = 0.2
                first_down = 0.8
                touchdown = 0
            elif down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.75
                first_down = 0.25
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10': 
                yards_gained = 0.25
                first_down = 0
                touchdown = 0.75
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 2 and quarter == 4: 
            if down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and (yds_to_go == '10' or yds_to_go == 'long'): 
                yards_gained = 1
                first_down = 0
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '5':
                yards_gained = 0.5
                first_down = 0
                touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 1 and quarter == 4: 
            if red_zone == 1 and yds_to_go == 'long': 
                yards_gained = 0.25
                first_down = 0.5
                touchdown = 0.25
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -1 and quarter == 4: 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10':
                yards_gained = 0.25
                first_down = 0.25
                touchdown = 0.75
            else:
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -2 and quarter == 4: 
            if down == 1 and (yds_to_go == '10' or yds_to_go == 'long') and red_zone == 0: 
                yards_gained = 1
                first_down = 0
                touchdown = 0
            elif down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.75
                first_down = 0.25
                touchdown = 0
            elif red_zone == 1 and (yds_to_go == '10' or yds_to_go == 'long'): 
                yards_gained = 0
                first_down = 0
                touchdown = 1
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go)         
    
        reward_weights['yards_gained'] = yards_gained
        reward_weights['first_down'] = first_down
        reward_weights['touchdown'] = touchdown
        return reward_weights
    
    def pull(self, context, row, weights):
        '''
        Use a row in the dataframe to update average rewards given context
        '''
        
        #Current rewards for context
        curr_context_rewards = self.context_rewards[context]
        
        #decide to use highest reward or try something new
        p = np.random.rand()
        if p < self.e: 
            selected_arm_idx = random.randint(0, len(self.arms) - 1)
        else: 
            selected_arm_idx = np.argmax(curr_context_rewards)
        
        #Get reward for decision
        reward = self.get_reward(self.arms[selected_arm_idx], context, row, weights)

        #Update counts
        self.n += 1
        self.context_n[context][selected_arm_idx] += 1
        
        #Update average rewards
        self.mean_reward = self.mean_reward + (reward - self.mean_reward) / self.n
        curr_context_rewards[selected_arm_idx] = curr_context_rewards[selected_arm_idx] + (reward - curr_context_rewards[selected_arm_idx]) / self.context_n[context][selected_arm_idx]
    
    def get_reward(self, arm, context, row, weights):
        '''
        Find expected reward for chosen arm in context
        '''
        # yds_gained = np.random.normal(loc=6.0, scale = 8.6)
        yds_gained = self.yg_model.predict(row[self.yg_features].to_numpy().reshape(1, -1))
        touchdown_pct = np.random.normal(loc=0.04, scale = 0.20)
        first_down_pct = np.random.normal(loc=0.31, scale = 0.46)
        
        yds_gained_weight = weights['yards_gained']
        first_down_weight = weights['first_down']
        touchdown_weight = weights['touchdown']
        
        return (yds_gained_weight * (yds_gained / 100)) + (touchdown_weight * touchdown_pct) + (first_down_weight * first_down_pct)
    
    def train(self, df): 
        
        #Iterate over dataframe
        for index, row in df.iterrows():
            
            #Pull context
            context_key, weights = self.get_context_key(row)
            
            #Initialize rewards if this is new context
            if context_key not in self.context_rewards.keys(): 
                self.context_rewards[context_key] = np.zeros(len(self.arms))
                self.context_n[context_key] = np.zeros(len(self.arms))
            
            #Update rewards
            self.pull(context_key, row, weights)

    def predict(self, df, evaluate=True, generate_output = False): 
        '''
        Predict play for each context based on max reward
        '''
        predictions = []
        unseen_context = 0
        
        #Iterate over dataframe
        for index, row in df.iterrows(): 

            #Pull context
            context_key = self.get_context_key(row, test=1)[0]
            
            #Pull relevant rewards vector
            if context_key in self.context_rewards.keys(): 
                rewards = self.context_rewards[context_key]
            else: 
                rewards = list(np.array([1])) + list(np.zeros(len(self.arms) - 1))
                # print(context_key)
                unseen_context += 1

            if not generate_output: 
                #find highest rewards
                max_idx = np.argmax(rewards)
                #Select associated arm with highest rewards
                arm = self.arms[max_idx] 
                #Add to predictions
                predictions.append(arm)
                
            else : 
                idx = sorted(range(len(rewards)), key=lambda i: rewards[i])[-3:]
                arms = []
                for i in idx: 
                    arms.append(self.arms[i])
                predictions.append(pd.Series(arms))
                
            if evaluate: 
                if row.PlayType_normalized == arm:
                    self.match_count += 1
                    if row.success: 
                        self.success_count += 1
 

        # print(unseen_context)
        return predictions

    def get_context_key(self, row, test = 0): 
        yds_to_go = row.ydstogo_binned
        red_zone = row.red_zone
        poss_diff = row.poss_differential
        down = row.down
        quarter = row.qtr
        
        if red_zone: 
            down = 0.0
        if quarter == 3.0: 
            quarter = 2.0
        
        context_key = str(quarter) + "_" + str(down) + "_" + str(yds_to_go) + "_" + str(poss_diff) + "_" + str(red_zone)
        
        if not test: 
            weights = self.get_reward_weights(quarter, down, yds_to_go, red_zone, poss_diff) 
        else: 
            weights = None
        
        return context_key, weights

    def get_evaluation_metrics(self):
        return self.match_count, self.success_count, self.success_count / self.match_count
    

In [15]:
def define_successful_play(row): 
    if row.yards_gained > 3 or row.touchdown or row.first_down:
        return 1
    else:
        return 0
    
df['success'] = df.apply(define_successful_play, axis=1)

# Download Models

In [31]:
from huggingface_hub import notebook_login
from huggingface_hub import hf_hub_download
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [26]:
#Yards Gained Model
model_location = hf_hub_download(repo_id="ic-hua/yards-gained-model", filename="yards-gained-clf.joblib")
download_yards_gained_model = load(model_location) 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [35]:
yg_features = ['yardline_100', 'firstdown', 'seconddown', 'thirddown', 
            'ydstogo', 'game_seconds_remaining', 'score_differential', 
            'num_rb', 'num_te', 'num_wr','posteam_timeouts_remaining', 'defteam_timeouts_remaining',
            'oARI', 'oATL', 'oBAL', 'oBUF', 'oCAR', 'oCHI', 'oCIN', 'oCLE', 'oDAL', 
            'oDEN','oDET', 'oGB', 'oHOU', 'oIND', 'oJAX', 'oKC', 'oLA', 'oLAC', 'oLV', 
            'oMIA', 'oMIN','oNE', 'oNO', 'oNYG', 'oNYJ', 'oPHI', 'oPIT', 'oSEA', 'oSF', 
            'oTB', 'oTEN',
            'dARI', 'dATL', 'dBAL', 'dBUF', 'dCAR', 'dCHI', 'dCIN', 'dCLE', 'dDAL', 
            'dDEN','dDET', 'dGB', 'dHOU', 'dIND', 'dJAX', 'dKC', 'dLA', 'dLAC', 'dLV', 
            'dMIA', 'dMIN','dNE', 'dNO', 'dNYG', 'dNYJ', 'dPHI', 'dPIT', 'dSEA', 'dSF', 
            'dTB', 'dTEN',
            '2016', '2017', '2018', '2019', '2020', '2021']

predictions = download_yards_gained_model.predict(df[yg_features].to_numpy())

390510
390510


In [32]:
fd_model_location = hf_hub_download(repo_id="Adi-khurana-berk/regressor_first_down", filename="regressor_first_down.joblib")
download_first_down_model = load(fd_model_location) 

Downloading:   0%|          | 0.00/3.93k [00:00<?, ?B/s]

In [46]:
fd_features = ['ydstogo', 'yardline_100', 'season', 'game_seconds_remaining', 'score_differential', 
               'poss_differential_2', 'poss_differential_1', 'poss_differential_0', 'poss_differential_-1', 'poss_differential_-2', 
  'first_down_flag', 'second_down_flag', 'third_down_flag', 'forth_down_flag', 
  'Play_clock_errors', 'Play_clock_equal_0', 'Play_clock_Between_5_and_10', 'Play_clock_Between_11_and_20', 'Play_clock_Greater_than_20',
  'off_rb_count', 'off_te_count', 'off_wr_count', 'off_ol_count', 'off_dl_count', 'off_db_count',
  'def_dl_count', 'def_db_count', 'def_lb_count', 'def_rb_count', 'def_wr_count', 'def_ol_count',
               # 'defenders_in_box', 'n_offense', 'n_defense',
 'pos_ARI', 'pos_ATL', 'pos_BAL', 'pos_BUF', 'pos_CAR', 'pos_CHI', 'pos_CIN', 'pos_CLE', 'pos_DAL', 'pos_DEN', 'pos_DET', 'pos_GB',
 'pos_HOU', 'pos_IND', 'pos_JAX', 'pos_KC', 'pos_LA', 'pos_LAC', 'pos_LV', 'pos_MIA', 'pos_MIN', 'pos_NE', 'pos_NO', 'pos_NYG', 'pos_NYJ',
 'pos_PHI', 'pos_PIT', 'pos_SEA', 'pos_SF', 'pos_TB', 'pos_TEN', 'pos_WAS', 'pos_ARI', 'pos_ATL', 'pos_BAL', 'pos_BUF', 'pos_CAR', 'pos_CHI', 'pos_CIN',
 'pos_CLE', 'pos_DAL', 'pos_DEN', 'pos_DET', 'pos_GB', 'pos_HOU', 'pos_IND', 'pos_JAX', 'pos_KC', 'pos_LA', 'pos_LAC', 'pos_LV',
 'pos_MIA', 'pos_MIN', 'pos_NE', 'pos_NO', 'pos_NYG', 'pos_NYJ', 'pos_PHI', 'pos_PIT', 'pos_SEA', 'pos_SF', 'pos_TB', 'pos_TEN', 'pos_WAS',
 'def_ARI', 'def_ATL', 'def_BAL', 'def_BUF', 'def_CAR', 'def_CHI', 'def_CIN', 'def_CLE', 'def_DAL', 'def_DEN', 'def_DET', 'def_GB',
 'def_HOU', 'def_IND', 'def_JAX', 'def_KC', 'def_LA', 'def_LAC', 'def_LV', 'def_MIA', 'def_MIN', 'def_NE', 'def_NO', 'def_NYG', 'def_NYJ',
 'def_PHI', 'def_PIT', 'def_SEA', 'def_SF', 'def_TB', 'def_TEN', 'def_WAS',  2016, 2017, 2018, 2019, 2020, 2021,
  'PTDetailed_PASS_LEFT_DEEP', 'PTDetailed_PASS_LEFT_SHORT', 'PTDetailed_PASS_MIDDLE_DEEP', 'PTDetailed_PASS_MIDDLE_SHORT', 'PTDetailed_PASS_RIGHT_DEEP',
 'PTDetailed_PASS_RIGHT_SHORT', 'PTDetailed_RUSH_LEFT_END', 'PTDetailed_RUSH_LEFT_GUARD', 'PTDetailed_RUSH_LEFT_TACKLE', 'PTDetailed_RUSH_MIDDLE_END',
 'PTDetailed_RUSH_RIGHT_END', 'PTDetailed_RUSH_RIGHT_GUARD', 'PTDetailed_RUSH_RIGHT_TACKLE']

display(df[fd_features].head())
# print(len(fd_features))
# print(df[fd_features].to_numpy().shape)
predictions = download_first_down_model.predict(df[fd_features].to_numpy())
# download_first_down_model.rank_

Unnamed: 0,ydstogo,yardline_100,season,game_seconds_remaining,score_differential,poss_differential_2,poss_differential_1,poss_differential_0,poss_differential_-1,poss_differential_-2,...,PTDetailed_PASS_MIDDLE_SHORT,PTDetailed_PASS_RIGHT_DEEP,PTDetailed_PASS_RIGHT_SHORT,PTDetailed_RUSH_LEFT_END,PTDetailed_RUSH_LEFT_GUARD,PTDetailed_RUSH_LEFT_TACKLE,PTDetailed_RUSH_MIDDLE_END,PTDetailed_RUSH_RIGHT_END,PTDetailed_RUSH_RIGHT_GUARD,PTDetailed_RUSH_RIGHT_TACKLE
2,10.0,78.0,2010,3595.0,0.0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
3,10.0,78.0,2010,3564.0,0.0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0
4,5.0,73.0,2010,3523.0,0.0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
5,10.0,55.0,2010,3497.0,0.0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
6,10.0,38.0,2010,3472.0,0.0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0


ValueError: X has 146 features, but LinearRegression is expecting 210 features as input.

In [29]:
train_df, test_df = train_test_split(df, test_size=.20, random_state = 123)

In [38]:
possible_plays = list(df.PlayType_normalized.unique())

bandit = BaselineContextualBandit(possible_plays, 0.1, download_yards_gained_model, yg_features)
bandit.train(train_df)

# print(bandit.context_rewards)
# print("==========")
# print(bandit.context_n)
# print("==========")
print(bandit.mean_reward)

{'4.0_3.0_5_-2.0_0': array([ 0.30623482,  0.24665543,  0.3156042 ,  0.31195406,  0.24258357,
        0.29860315, -0.06053754,  0.30288096,  0.30553806]), '2.0_1.0_10_nan_0': array([0.06870136, 0.06762046, 0.06945802, 0.06898567, 0.06700272,
       0.07054091, 0.07137818, 0.06876992, 0.069302  ]), '4.0_1.0_10_nan_0': array([0.06963763, 0.07097234, 0.06822603, 0.05536831, 0.0701157 ,
       0.05673232, 0.04992918, 0.06669056, 0.06461693]), '2.0_2.0_10_0.0_0': array([0.10922899, 0.09933856, 0.10444306, 0.11047516, 0.10624395,
       0.10923559, 0.09045883, 0.09655063, 0.10335579]), '1.0_1.0_10_0.0_0': array([0.07080875, 0.06898762, 0.06860621, 0.07120411, 0.07156399,
       0.07001123, 0.07106548, 0.07070981, 0.07045266]), '2.0_1.0_10_-2.0_0': array([0.07074069, 0.07145679, 0.07156085, 0.07049566, 0.07140338,
       0.07132628, 0.07140405, 0.06566474, 0.06163295]), '4.0_0.0_10_-2.0_1': array([ 0.02897868,  0.04036614,  0.03748496,  0.0438576 ,  0.0370471 ,
       -0.00454277, -0.00678372,

In [39]:
predictions = bandit.predict(test_df)

In [40]:
print(bandit.get_evaluation_metrics())

(8605, 4552, 0.5289947704822777)


In [382]:
# keys = list(bandit.context_rewards.keys())
# keys.sort()
# print(keys)

In [383]:
all_predicted_plays = bandit.predict(df)
df['pred_play'] = all_predicted_plays
train_df, test_df = train_test_split(df, test_size=.20, random_state = 123)

In [384]:
#Uses what actually happened to establish a baseline
class FormationContextualBandit: 
    def __init__(self, arms, e): 
        self.arms = arms
        self.e = e
        self.n = 0
        self.mean_reward = 0
        self.context_rewards = {}
        self.context_n = {}
        self.match_count = 0
        self.success_count = 0 
        
    def get_defaults(self, red_zone, down, yds_to_go): 
            
            yards_gained = -1
            first_down = -1
            touchdown = -1
            
            
            if red_zone == 0: 

                if down == 1 and (yds_to_go == 'long' or yds_to_go == '10'): 
                    yards_gained = 0.9
                    first_down = 0.1
                    touchdown = 0
                elif down == 1 and yds_to_go == '5':
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 2 and yds_to_go == 'long': 
                    yards_gained = 0.8
                    first_down = 0.2
                    touchdown = 0
                elif down == 2 and yds_to_go == '10': 
                    yards_gained = 0.75
                    first_down = 0.25
                    touchdown = 0
                elif down == 2 and yds_to_go == '5': 
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 3 and yds_to_go == 'long': 
                    yards_gained = 0.5
                    first_down = 0.5
                    touchdown = 0
                elif down == 3 and yds_to_go == '10': 
                    yards_gained = 0.1
                    first_down = 0.9
                    touchdown = 0
                elif down == 3 and yds_to_go == '5': 
                    yards_gained = 0
                    first_down = 1
                    touchdown = 0
                elif down == 4 : 
                    yards_gained = 0
                    first_down = 1
                    touchdown = 0
            
            elif red_zone == 1: 
                
                if yds_to_go == 'long' or yds_to_go == '10': 
                    yards_gained = 0.5
                    first_down = 0
                    touchdown = 0.5
                elif yds_to_go == '5':
                    yards_gained = 0
                    first_down = 0
                    touchdown = 1
    
            return (yards_gained, first_down, touchdown)
    
    def get_reward_weights(self, quarter, down, yds_to_go, red_zone, poss_diff): 
        reward_weights = {}
        
        yards_gained = -1
        first_down = -1
        touchdown = -1
        
        if poss_diff == 2 and quarter == 1:
            
            if red_zone == 0 and down == 3 and yds_to_go == 10: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            
            elif red_zone == 1: 
                if yds_to_go == '10' or yds_to_go == 'long': 
                    yards_gained = 0.75
                    first_down = 0
                    touchdown = 0.25
                elif yds_to_go == '5':
                    yards_gained = 0.5
                    first_down = 0
                    touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        
        elif poss_diff == -1 and quarter == 1: 
            if down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        
        elif poss_diff == -2 and quarter == 1: 
            if down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.2
                first_down = 0.8
                touchdown = 0
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 2 and (quarter == 2 or quarter == 3):
            if down == 3 and yds_to_go == '10' and red_zone == 0:
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10':
                yards_gained = 0.75
                first_down = 0
                touchdown = 0.25
            elif red_zone == 1 and yds_to_go == '5':
                yards_gained = 0.5
                first_down = 0
                touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -1 and (quarter == 2 or quarter == 3): 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and yds_to_go == 'long': 
                yards_gained = 0.25
                first_down = 0.5
                touchdown = 0.25
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -2 and (quarter == 2 or quarter == 3): 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0:
                yards_gained = 0.2
                first_down = 0.8
                touchdown = 0
            elif down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.75
                first_down = 0.25
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10': 
                yards_gained = 0.25
                first_down = 0
                touchdown = 0.75
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 2 and quarter == 4: 
            if down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.25
                first_down = 0.75
                touchdown = 0
            elif red_zone == 1 and (yds_to_go == '10' or yds_to_go == 'long'): 
                yards_gained = 1
                first_down = 0
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '5':
                yards_gained = 0.5
                first_down = 0
                touchdown = 0.5
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == 1 and quarter == 4: 
            if red_zone == 1 and yds_to_go == 'long': 
                yards_gained = 0.25
                first_down = 0.5
                touchdown = 0.25
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -1 and quarter == 4: 
            if down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif red_zone == 1 and yds_to_go == '10':
                yards_gained = 0.25
                first_down = 0.25
                touchdown = 0.75
            else:
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        elif poss_diff == -2 and quarter == 4: 
            if down == 1 and (yds_to_go == '10' or yds_to_go == 'long') and red_zone == 0: 
                yards_gained = 1
                first_down = 0
                touchdown = 0
            elif down == 1 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.9
                first_down = 0.1
                touchdown = 0
            elif down == 2 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 2 and yds_to_go == '5' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == 'long' and red_zone == 0: 
                yards_gained = 0.8
                first_down = 0.2
                touchdown = 0
            elif down == 3 and yds_to_go == '10' and red_zone == 0: 
                yards_gained = 0.75
                first_down = 0.25
                touchdown = 0
            elif red_zone == 1 and (yds_to_go == '10' or yds_to_go == 'long'): 
                yards_gained = 0
                first_down = 0
                touchdown = 1
            else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go) 
        else: 
                yards_gained, first_down, touchdown = self.get_defaults(red_zone, down, yds_to_go)         
    
        reward_weights['yards_gained'] = yards_gained
        reward_weights['first_down'] = first_down
        reward_weights['touchdown'] = touchdown
        return reward_weights
    
    def pull(self, context, row, weights):
        '''
        Use a row in the dataframe to update average rewards given context
        '''
        
        #Current rewards for context
        curr_context_rewards = self.context_rewards[context]
        
        #decide to use highest reward or try something new
        p = np.random.rand()
        if p < self.e: 
            selected_arm_idx = random.randint(0, len(self.arms) - 1)
        else: 
            selected_arm_idx = np.argmax(curr_context_rewards)
        
        #Get reward for decision
        reward = self.get_reward(self.arms[selected_arm_idx], context, row, weights)

        #Update counts
        self.n += 1
        self.context_n[context][selected_arm_idx] += 1
        
        #Update average rewards
        self.mean_reward = self.mean_reward + (reward - self.mean_reward) / self.n
        curr_context_rewards[selected_arm_idx] = curr_context_rewards[selected_arm_idx] + (reward - curr_context_rewards[selected_arm_idx]) / self.context_n[context][selected_arm_idx]
    
    def get_reward(self, arm, context, row, weights):
        '''
        Find expected reward for chosen arm in context
        '''
        
        if arm == '21':
            yds_gained = np.random.normal(loc=5.87, scale = 8.55)
            touchdown_pct = np.random.normal(loc=0.03, scale = 0.18)
            first_down_pct = np.random.normal(loc=0.29, scale = 0.45)
        elif arm == '12':
            yds_gained = np.random.normal(loc=5.73, scale = 8.44)
            touchdown_pct = np.random.normal(loc=0.04, scale = 0.19)
            first_down_pct = np.random.normal(loc=0.29, scale = 0.45)
        elif arm == '11':
            yds_gained = np.random.normal(loc=6.30, scale = 8.64)
            touchdown_pct = np.random.normal(loc=0.04, scale = 0.20)
            first_down_pct = np.random.normal(loc=0.32, scale = 0.47)
        elif arm == '22':
            yds_gained = np.random.normal(loc=4.51, scale = 7.46)
            touchdown_pct = np.random.normal(loc=0.09, scale = 0.29)
            first_down_pct = np.random.normal(loc=0.33, scale = 0.47)
        elif arm == '13':
            yds_gained = np.random.normal(loc=5.05, scale = 8.30)
            touchdown_pct = np.random.normal(loc=0.06, scale = 0.25)
            first_down_pct = np.random.normal(loc=0.31, scale = 0.46)
        
        yds_gained_weight = weights['yards_gained']
        first_down_weight = weights['first_down']
        touchdown_weight = weights['touchdown']
        
        return (yds_gained_weight * (yds_gained / 100)) + (touchdown_weight * touchdown_pct) + (first_down_weight * first_down_pct)
    
    def train(self, df): 
        possible_plays = ['PASS_LEFT_SHORT', 'PASS_MIDDLE_SHORT', 'PASS_RIGHT_SHORT', 'PASS_LEFT_DEEP', 'PASS_MIDDLE_DEEP', 'PASS_RIGHT_DEEP', 'RUSH_LEFT', 'RUSH_MIDDLE', 'RUSH_RIGHT']
        #Iterate over dataframe
        for index, row in df.iterrows():
            
            for play in possible_plays: 
                #Pull context
                context_key, weights = self.get_context_key(row, play_override=play)

                #Initialize rewards if this is new context
                if context_key not in self.context_rewards.keys(): 
                    self.context_rewards[context_key] = np.zeros(len(self.arms))
                    self.context_n[context_key] = np.zeros(len(self.arms))

                #Update rewards
                self.pull(context_key, row, weights)

    def predict(self, df, evaluate=True, generate_output = False): 
        '''
        Predict play for each context based on max reward
        '''
        predictions = []
        unseen_context = 0
        
        #Iterate over dataframe
        for index, row in df.iterrows(): 
        
            if not generate_output: 

                #Pull context
                context_key = self.get_context_key(row, test=1)[0]

                #Pull relevant rewards vector
                if context_key in self.context_rewards.keys(): 
                    rewards = self.context_rewards[context_key]
                else: 
                    rewards = list(np.array([1])).append(np.zeros(len(self.arms) - 1))
                    unseen_context += 1



                #find highest rewards
                max_idx = np.argmax(rewards)
                #Select associated arm with highest rewards
                arm = self.arms[max_idx] 
                #Add to predictions
                predictions.append(arm)

                if evaluate: 
                    if row.formation == arm:
                        self.match_count += 1
                        if row.success: 
                            self.success_count += 1


            else: 
                row_predictions = []
                for i in range(1,4): 
                    context_key = self.get_context_key(row, test=1, play=i)[0]
                    
                    #Pull relevant rewards vector
                    if context_key in self.context_rewards.keys(): 
                        rewards = self.context_rewards[context_key]
                    else: 
                        rewards = list(np.array([1])) + list(np.zeros(len(self.arms) - 1))
                        # print(context_key)
                        unseen_context += 1

                    #find highest rewards
                    max_idx = np.argmax(rewards)
                    #Select associated arm with highest rewards
                    arm = self.arms[max_idx] 
                    #Add to predictions
                    row_predictions.append(arm)

                predictions.append(pd.Series(row_predictions))
                if evaluate: 
                    if row.formation == arm:
                        self.match_count += 1
                        if row.success: 
                            self.success_count += 1
                
                
        # print(unseen_context)    
        return predictions

    def get_context_key(self, row, test = 0, play_override=None, play=None): 
        yds_to_go = row.ydstogo_binned
        red_zone = row.red_zone
        poss_diff = row.poss_differential
        down = row.down
        quarter = row.qtr
        
        if play_override: 
            pred_play = play_override
        
        else: 
            if not play: 
                pred_play = row.pred_play
            else: 
                pred_play = row[['pred_play_' + str(play)]][0]
        
        if red_zone: 
            down = 0.0
        if quarter == 3.0: 
            quarter = 2.0
        
        context_key = str(quarter) + "_" + str(down) + "_" + str(yds_to_go) + "_" + str(poss_diff) + "_" + str(red_zone) + "_" + str(pred_play)
        
        if not test: 
            weights = self.get_reward_weights(quarter, down, yds_to_go, red_zone, poss_diff) 
        else: 
            weights = None
        
        return context_key, weights

    def get_evaluation_metrics(self):
        return self.match_count, self.success_count, self.success_count / self.match_count
    

In [385]:
formation_bandit = FormationContextualBandit(['21', '12', '11', '22', '13'], 0.1)
formation_bandit.train(train_df)
print(formation_bandit.mean_reward)

0.10949935854164027


In [386]:
# formation_predictions = formation_bandit.predict(test_df)
# print(formation_bandit.get_evaluation_metrics())

# Generate Output

In [387]:
!aws s3 cp s3://capstone-nfl-data/input_sheet.csv input_sheet.csv

download: s3://capstone-nfl-data/input_sheet.csv to ./input_sheet.csv


In [388]:
# output = {
#     'qtr' : [1.0] * 65 + [2.0] * 65 + [3.0] * 65 + [4.0] * 65,
#     'down' : (([1.0] * 2 + [2.0] * 3 + [3.0] * 3 + [4.0] * 2 + [0.0] * 3) * 5) * 4,
#     'poss_differential' : ([-2.0] * 13 + [-1.0] * 13 + [0.0] * 13 + [1.0] * 13 + [2.0] * 13) * 4,
#     'ydstogo_binned' : (['long', '5', 'long', '10', '5', 'long', '10', '5', '10', '5', 'long', '10', '5'] * 5) * 4,
#     'red_zone' : (([0] * 10 + [1] * 3) * 5) * 4
# }
output_df = pd.read_csv("input_sheet.csv")
display(output_df)

Unnamed: 0,qtr,down,poss_differential,ydstogo_binned,red_zone,title
0,1,1,-2,long,0,1st & XL (15-20)
1,1,1,-2,10,0,1st & 10
2,1,1,-2,5,0,1st & Short (1-5)
3,1,2,-2,long,0,2nd & XL (10+)
4,1,2,-2,10,0,2nd & Long (6-10)
...,...,...,...,...,...,...
220,4,4,2,5,0,4th & Short (1-5)
221,4,0,2,long,1,Red Zone (+20 - +11)
222,4,0,2,10,1,Red Zone (+10 - +6)
223,4,0,2,5,1,Red Zone (+3 - +5)


In [389]:
output_df[['qtr']] = output_df[['qtr']].astype(float)
output_df[['down']] = output_df[['down']].astype(float)
output_df[['poss_differential']] = output_df[['poss_differential']].astype(float)

In [390]:
output_df[['pred_play_1', 'pred_play_2', 'pred_play_3']] = bandit.predict(output_df, evaluate=False, generate_output=True)
display(output_df)

Unnamed: 0,qtr,down,poss_differential,ydstogo_binned,red_zone,title,pred_play_1,pred_play_2,pred_play_3
0,1.0,1.0,-2.0,long,0,1st & XL (15-20),PASS_RIGHT_SHORT,PASS_MIDDLE_DEEP,PASS_LEFT_DEEP
1,1.0,1.0,-2.0,10,0,1st & 10,PASS_LEFT_DEEP,PASS_MIDDLE_DEEP,PASS_RIGHT_DEEP
2,1.0,1.0,-2.0,5,0,1st & Short (1-5),RUSH_MIDDLE,PASS_MIDDLE_DEEP,RUSH_RIGHT
3,1.0,2.0,-2.0,long,0,2nd & XL (10+),RUSH_MIDDLE,PASS_MIDDLE_DEEP,PASS_RIGHT_SHORT
4,1.0,2.0,-2.0,10,0,2nd & Long (6-10),PASS_RIGHT_DEEP,PASS_RIGHT_SHORT,PASS_LEFT_SHORT
...,...,...,...,...,...,...,...,...,...
220,4.0,4.0,2.0,5,0,4th & Short (1-5),PASS_RIGHT_SHORT,RUSH_RIGHT,RUSH_MIDDLE
221,4.0,0.0,2.0,long,1,Red Zone (+20 - +11),PASS_RIGHT_DEEP,RUSH_LEFT,RUSH_RIGHT
222,4.0,0.0,2.0,10,1,Red Zone (+10 - +6),PASS_RIGHT_DEEP,PASS_MIDDLE_SHORT,PASS_RIGHT_SHORT
223,4.0,0.0,2.0,5,1,Red Zone (+3 - +5),PASS_LEFT_DEEP,PASS_RIGHT_SHORT,RUSH_LEFT


In [391]:
# print(bandit.context_rewards)

In [392]:
# df[['qtr', 'down', 'ydstogo_binned', 'red_zone', 'poss_differential']].dtypes

In [393]:
# output_df.dtypes

In [394]:
# for index, row in output_df.iterrows(): 
#     output = str(row[['pred_play_1']][0])
#     print(row.pred_play_1)
#     print(output)
#     break

In [395]:
output_df[['pred_formation_1', 'pred_formation_2', 'pred_formation_3']] = formation_bandit.predict(output_df, evaluate=False, generate_output=True)
display(output_df)

Unnamed: 0,qtr,down,poss_differential,ydstogo_binned,red_zone,title,pred_play_1,pred_play_2,pred_play_3,pred_formation_1,pred_formation_2,pred_formation_3
0,1.0,1.0,-2.0,long,0,1st & XL (15-20),PASS_RIGHT_SHORT,PASS_MIDDLE_DEEP,PASS_LEFT_DEEP,21,22,21
1,1.0,1.0,-2.0,10,0,1st & 10,PASS_LEFT_DEEP,PASS_MIDDLE_DEEP,PASS_RIGHT_DEEP,11,11,12
2,1.0,1.0,-2.0,5,0,1st & Short (1-5),RUSH_MIDDLE,PASS_MIDDLE_DEEP,RUSH_RIGHT,22,21,12
3,1.0,2.0,-2.0,long,0,2nd & XL (10+),RUSH_MIDDLE,PASS_MIDDLE_DEEP,PASS_RIGHT_SHORT,21,12,13
4,1.0,2.0,-2.0,10,0,2nd & Long (6-10),PASS_RIGHT_DEEP,PASS_RIGHT_SHORT,PASS_LEFT_SHORT,21,11,22
...,...,...,...,...,...,...,...,...,...,...,...,...
220,4.0,4.0,2.0,5,0,4th & Short (1-5),PASS_RIGHT_SHORT,RUSH_RIGHT,RUSH_MIDDLE,13,22,13
221,4.0,0.0,2.0,long,1,Red Zone (+20 - +11),PASS_RIGHT_DEEP,RUSH_LEFT,RUSH_RIGHT,21,21,11
222,4.0,0.0,2.0,10,1,Red Zone (+10 - +6),PASS_RIGHT_DEEP,PASS_MIDDLE_SHORT,PASS_RIGHT_SHORT,11,11,11
223,4.0,0.0,2.0,5,1,Red Zone (+3 - +5),PASS_LEFT_DEEP,PASS_RIGHT_SHORT,RUSH_LEFT,11,13,13


In [396]:
output_df.to_csv("output_sheet.csv")

# All Teams Output

In [397]:
teams = ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN','DET', 'GB', 'HOU', 'IND', 'JAX', 'KC', 'LA', 'LAC', 'LV', 'MIA', 'MIN', 'NE', 'NO', 'NYG', 'NYJ', 'PHI', 'PIT', 'SEA', 'SF', 'TB', 'TEN', 'WAS']

for team1 in teams: 
    for team2 in teams: 

        if team1 == team2: 
            continue
        else: 
            print("Generating " + team1 + " vs " + team2 + "...")
            
            team_output_df = pd.read_csv("input_sheet.csv")
            team_output_df[['qtr']] = team_output_df[['qtr']].astype(float)
            team_output_df[['down']] = team_output_df[['down']].astype(float)
            team_output_df[['poss_differential']] = team_output_df[['poss_differential']].astype(float)
            
            team_output_df[['pred_play_1', 'pred_play_2', 'pred_play_3']] = bandit.predict(team_output_df, evaluate=False, generate_output=True)
            team_output_df[['pred_formation_1', 'pred_formation_2', 'pred_formation_3']] = formation_bandit.predict(team_output_df, evaluate=False, generate_output=True)
            team_output_df.to_csv("outputs/" + team1 + "_" + team2 + ".csv")

Generating ARI vs ATL...
Generating ARI vs BAL...
Generating ARI vs BUF...
Generating ARI vs CAR...
Generating ARI vs CHI...
Generating ARI vs CIN...
Generating ARI vs CLE...
Generating ARI vs DAL...
Generating ARI vs DEN...
Generating ARI vs DET...
Generating ARI vs GB...
Generating ARI vs HOU...
Generating ARI vs IND...
Generating ARI vs JAX...
Generating ARI vs KC...
Generating ARI vs LA...
Generating ARI vs LAC...
Generating ARI vs LV...
Generating ARI vs MIA...
Generating ARI vs MIN...
Generating ARI vs NE...
Generating ARI vs NO...
Generating ARI vs NYG...
Generating ARI vs NYJ...
Generating ARI vs PHI...
Generating ARI vs PIT...
Generating ARI vs SEA...
Generating ARI vs SF...
Generating ARI vs TB...
Generating ARI vs TEN...
Generating ARI vs WAS...
Generating ATL vs ARI...
Generating ATL vs BAL...
Generating ATL vs BUF...
Generating ATL vs CAR...
Generating ATL vs CHI...
Generating ATL vs CIN...
Generating ATL vs CLE...
Generating ATL vs DAL...
Generating ATL vs DEN...
Generati

In [437]:
%%bash

bash_teams=('ARI' 'ATL' 'BAL' 'BUF' 'CAR' 'CHI' 'CIN' 'CLE' 'DAL' 'DEN' 'DET' 'GB' 'HOU' 'IND' 'JAX' 'KC' 'LA' 'LAC' 'LV' 'MIA' 'MIN' 'NE' 'NO' 'NYG' 'NYJ' 'PHI' 'PIT' 'SEA' 'SF' 'TB' 'TEN' 'WAS')

for i in "${bash_teams[@]}"
do
    for j in "${bash_teams[@]}"
    do
        if [ "$i" = "$j" ]
        then
            continue
        else
            aws s3 cp outputs/${i}_${j}.csv s3://capstone-nfl-data/outputs/${i}_${j}.csv
        fi
    done
done

Uploading ARI_ATL.csv ...
upload: outputs/ARI_ATL.csv to s3://capstone-nfl-data/outputs/ARI_ATL.csv
Uploading ARI_BAL.csv ...
upload: outputs/ARI_BAL.csv to s3://capstone-nfl-data/outputs/ARI_BAL.csv
Uploading ARI_BUF.csv ...
upload: outputs/ARI_BUF.csv to s3://capstone-nfl-data/outputs/ARI_BUF.csv
Uploading ARI_CAR.csv ...
upload: outputs/ARI_CAR.csv to s3://capstone-nfl-data/outputs/ARI_CAR.csv
Uploading ARI_CHI.csv ...
upload: outputs/ARI_CHI.csv to s3://capstone-nfl-data/outputs/ARI_CHI.csv
Uploading ARI_CIN.csv ...
upload: outputs/ARI_CIN.csv to s3://capstone-nfl-data/outputs/ARI_CIN.csv
Uploading ARI_CLE.csv ...
upload: outputs/ARI_CLE.csv to s3://capstone-nfl-data/outputs/ARI_CLE.csv
Uploading ARI_DAL.csv ...
upload: outputs/ARI_DAL.csv to s3://capstone-nfl-data/outputs/ARI_DAL.csv
Uploading ARI_DEN.csv ...
upload: outputs/ARI_DEN.csv to s3://capstone-nfl-data/outputs/ARI_DEN.csv
Uploading ARI_DET.csv ...
upload: outputs/ARI_DET.csv to s3://capstone-nfl-data/outputs/ARI_DET.csv


# Next Steps

* ~~Evaluation of bandit~~
* ~~Move context pulling to helper function~~
* ~~Create baseline bandit for formation calling~~
* ~~Bucket yards gained into finer buckets~~
* ~~Combine bandits into output production script~~
* ~~Find feature for Left, Middle, Right~~
* Combine with rewards models
* ~~Combine Quarter 2 & 3~~
* ~~Three outputs~~
* ~~Get Input from S3~~
* Publish output to S3
* Code Cleanup

##### 