In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import numpy as np
import glob

In [2]:
temp = []
data = []
subpxp = []
for year in range(2008,2018):    
    path ='/Users/ericherrmann/GitHub/CalFootball_4thDown/NCAA-Football-Data/' + str(year) + 'PXP' # use your path
    allFiles = glob.glob(path + "/*.csv")
    frame = pd.DataFrame()
    list_ = []
    cols = ['gameId', 'driveIndex', 'year', 'week', 'clock', 'quarter', 'down', 'yardLine', 'distance', 'yardsGained',
            'offenseTeam', 'defenseTeam', 'description', 'type',
            'homeScore', 'awayScore', 'homeTeam', 'awayTeam']
    for file_ in allFiles:
        df = pd.read_csv(file_,index_col=None, header=0, usecols=cols)
        list_.append(df)
        subpxp = pd.concat(list_, ignore_index=True)
    temp.append(subpxp)
    data= pd.concat(temp, ignore_index=True)

In [3]:
import cal_football as cf
pxp = cf.extract_data(data)
badyard = ~((pxp['yrdline100']>=100)|(pxp['yrdline100']<=0))
# possession_values = cf.get_possession_value(ko_and_pos)

In [4]:
ko_and_poss = cf.extract_kickoffs_and_possessions(pxp)
fgs = cf.extract_field_goals(pxp)
third_downs = cf.extract_third_downs(pxp)
punts = cf.extract_punts(pxp)

ekv,epv_model = cf.epv_model(ko_and_poss)
exp_conv_pct_model = cf.exp_conv_pct_model(third_downs)
exp_net_punt_dist_model = cf.exp_net_punt_dist_model(punts)
exp_fg_prob_model = cf.exp_fg_prob_model(fgs)

In [5]:
epv_model.to_pickle("./epv_model.pkl")
exp_conv_pct_model.to_pickle("./exp_conv_pct_model.pkl")
exp_net_punt_dist_model.to_pickle("./exp_net_punt_dist_model.pkl")
exp_fg_prob_model.to_pickle("./exp_fg_prob_model.pkl")

In [6]:
def build_decision_maker(ekv, epv_model, exp_conv_pct_model, 
                         exp_net_punt_dist_model, exp_fg_prob_model):
    
    YRDSTOGO_CAP = 10  # Model restriction
    PUNT_LIM = 29      # Model restriction
    FG_LIM = 63        # Model restriction
    FG_OFFSET = 8      # Field goal placement distance (not including the 10 for the endzone)
    FG_FAIL_RESET = 0  # Where the ball is placed after a failed field goal.  If 0, at the spot.  
                       # Otherwise, at the spot or at the reset spot, whichever is better.
    
    def decision_maker(yrdline100, ydstogo, print_message=False):
        if yrdline100 < 10:
            region = 'Inside10'
        elif yrdline100 < 20:
            region = '10to20'
        else:
            region = 'Beyond20'
            
        # 1. Expected value of going for it
        
        # Value of failing (approximately turning over at same spot)
        conv_fail_yrdline100 = 100 - yrdline100
        conv_fail_epv = -epv_model[conv_fail_yrdline100] 
        
        # Value of converting (approximately at the first down marker)
        if yrdline100 == ydstogo:
            conv_succ_epv = 6 - ekv
        else:
            first_down_yrdline100 = yrdline100 - ydstogo
            conv_succ_epv = epv_model[first_down_yrdline100] -1

        # Conversion Pct
        exp_conv_pct = exp_conv_pct_model[(region, ydstogo)] if ydstogo < YRDSTOGO_CAP else None
        
        # Overall expected value of going for it
        go_ev = exp_conv_pct * conv_succ_epv + (1 - exp_conv_pct) * conv_fail_epv
        
        # 2. Expected value of punting
        
        # Expected next yardline
        if yrdline100 >= PUNT_LIM:
            exp_net_punt_dist = exp_net_punt_dist_model[yrdline100]
            exp_yrdline100 = 100 - yrdline100 + int(exp_net_punt_dist)
            
            # Overall expected value of punting
            punt_ev = -epv_model[exp_yrdline100]
        else:
            punt_ev = None
            
        # 3. Expected value of kicking a field goal
        fg_dist =  yrdline100 + 10 + FG_OFFSET
        if fg_dist <= FG_LIM:
            # Probability of success
            exp_fg_prob = exp_fg_prob_model[fg_dist ]

            # Expected value of field success
            fg_succ_epv = 3 - ekv

            # EPV of field goal fail
            fg_fail_yrdline100 = 100 - yrdline100 - FG_OFFSET
            fg_fail_epv = -epv_model[fg_fail_yrdline100]

            # Overall expected value kicking
            fg_ev = fg_succ_epv * exp_fg_prob + fg_fail_epv * (1 - exp_fg_prob)
        else:
            fg_ev = None
    
        choices = [('go for it', go_ev), ('punt', punt_ev), ('kick', fg_ev)]
        max_val = -100
        for choice, ev in choices:
            if ev is None:
                continue
            if ev > max_val:
                max_val = ev
                decision = choice
    
        if print_message:
            #print("Expected Values at {:.2f}".format(yrdline100) + " yard line and dist to go {:.2f}".format(ydstogo))
            #print("Go for it: {:.2f}".format(go_ev))
            out = " {:.2f}".format(yrdline100) + ","
            out += " {:.2f}".format(go_ev) + ","
            if punt_ev is not None:
                #print("Punt: {:.2f}".format(punt_ev))
                out += " {:.2f}".format(punt_ev) + ","
            else:
                #print("Punt: TOO CLOSE TO PUNT")
                out += " !punt" + ","
            if fg_ev is not None:
                #print("FG: {:.2f}".format(fg_ev))
                out += " {:.2f}".format(fg_ev) + ","
            else:
                #print("FG: TOO FAR TO KICK")
                out += " !kick" + ","
            print(out)
            

        return decision
    
    return decision_maker

# Build the decision maker
decision_maker = build_decision_maker(
    ekv, epv_model, exp_conv_pct_model, exp_net_punt_dist_model, exp_fg_prob_model)

In [7]:
#test
dist = 4 #user input here
rg_max = 91 + dist
rg_min = 0 + dist
for x in range(rg_min, rg_max):
    decision_maker(x, dist, print_message=True)

 4.00, 2.58, !punt, 3.31,
 5.00, 1.58, !punt, 3.25,
 6.00, 1.52, !punt, 3.19,
 7.00, 1.47, !punt, 3.13,
 8.00, 1.41, !punt, 3.07,
 9.00, 1.36, !punt, 3.00,
 10.00, 1.84, !punt, 2.93,
 11.00, 1.79, !punt, 2.87,
 12.00, 1.74, !punt, 2.80,
 13.00, 1.68, !punt, 2.72,
 14.00, 1.63, !punt, 2.64,
 15.00, 1.57, !punt, 2.56,
 16.00, 1.52, !punt, 2.48,
 17.00, 1.46, !punt, 2.40,
 18.00, 1.41, !punt, 2.31,
 19.00, 1.35, !punt, 2.22,
 20.00, 1.36, !punt, 2.13,
 21.00, 1.30, !punt, 2.03,
 22.00, 1.25, !punt, 1.93,
 23.00, 1.19, !punt, 1.83,
 24.00, 1.14, !punt, 1.73,
 25.00, 1.08, !punt, 1.62,
 26.00, 1.03, !punt, 1.50,
 27.00, 0.97, !punt, 1.38,
 28.00, 0.92, !punt, 1.26,
 29.00, 0.86, -0.90, 1.12,
 30.00, 0.81, -0.90, 0.97,
 31.00, 0.75, -0.84, 0.82,
 32.00, 0.70, -0.84, 0.66,
 33.00, 0.64, -0.79, 0.47,
 34.00, 0.59, -0.79, 0.28,
 35.00, 0.54, -0.73, 0.09,
 36.00, 0.48, -0.73, -0.11,
 37.00, 0.43, -0.68, -0.31,
 38.00, 0.37, -0.62, -0.51,
 39.00, 0.32, -0.62, -0.72,
 40.00, 0.26, -0.57, -0.93,
 4