In [75]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import numpy as np
import glob

In [76]:
temp = []
data = []
subpxp = []
for year in range(2008,2018):    
    path ='/Users/ericherrmann/GitHub/CalFootball_4thDown/NCAA-Football-Data/' + str(year) + 'PXP' # use your path
    allFiles = glob.glob(path + "/*.csv")
    frame = pd.DataFrame()
    list_ = []
    cols = ['gameId', 'driveIndex', 'year', 'week', 'clock', 'quarter', 'down', 'yardLine', 'distance', 'yardsGained',
            'offenseTeam', 'defenseTeam', 'description', 'type',
            'homeScore', 'awayScore', 'homeTeam', 'awayTeam']
    for file_ in allFiles:
        df = pd.read_csv(file_,index_col=None, header=0, usecols=cols)
        list_.append(df)
        subpxp = pd.concat(list_, ignore_index=True)
    temp.append(subpxp)
    data= pd.concat(temp, ignore_index=True)

In [77]:
import cal_football as cf
pxp = cf.extract_data(data)
badyard = ~((pxp['yrdline100']>=100)|(pxp['yrdline100']<=0))
# possession_values = cf.get_possession_value(ko_and_pos)

In [78]:
ko_and_poss = cf.extract_kickoffs_and_possessions(pxp)
fgs = cf.extract_field_goals(pxp)
third_downs = cf.extract_third_downs(pxp)
punts = cf.extract_punts(pxp)

ekv,epv_model = cf.epv_model(ko_and_poss)
exp_conv_pct_model = cf.exp_conv_pct_model(third_downs)
exp_net_punt_dist_model = cf.exp_net_punt_dist_model(punts)
exp_fg_prob_model = cf.exp_fg_prob_model(fgs)

In [79]:
epv_model.to_pickle("./epv_model.pkl")
exp_conv_pct_model.to_pickle("./exp_conv_pct_model.pkl")
exp_net_punt_dist_model.to_pickle("./exp_net_punt_dist_model.pkl")
exp_fg_prob_model.to_pickle("./exp_fg_prob_model.pkl")

In [80]:
def build_decision_maker(ekv, epv_model, exp_conv_pct_model, 
                         exp_net_punt_dist_model, exp_fg_prob_model):
    
    YRDSTOGO_CAP = 10  # Model restriction
    PUNT_LIM = 29      # Model restriction
    FG_LIM = 63        # Model restriction
    FG_OFFSET = 8      # Field goal placement distance (not including the 10 for the endzone)
    FG_FAIL_RESET = 0  # Where the ball is placed after a failed field goal.  If 0, at the spot.  
                       # Otherwise, at the spot or at the reset spot, whichever is better.
    
    def decision_maker(yrdline100, ydstogo, print_message=False):
        if yrdline100 < 10:
            region = 'Inside10'
        elif yrdline100 < 20:
            region = '10to20'
        else:
            region = 'Beyond20'
            
        # 1. Expected value of going for it
        
        # Value of failing (approximately turning over at same spot)
        conv_fail_yrdline100 = 100 - yrdline100
        conv_fail_epv = -epv_model[conv_fail_yrdline100] 
        
        # Value of converting (approximately at the first down marker)
        if yrdline100 == ydstogo:
            conv_succ_epv = 6 - ekv
        else:
            first_down_yrdline100 = yrdline100 - ydstogo
            conv_succ_epv = epv_model[first_down_yrdline100] -1

        # Conversion Pct
        exp_conv_pct = exp_conv_pct_model[(region, ydstogo)] if ydstogo < YRDSTOGO_CAP else None
        
        # Overall expected value of going for it
        go_ev = exp_conv_pct * conv_succ_epv + (1 - exp_conv_pct) * conv_fail_epv
        
        # 2. Expected value of punting
        
        # Expected next yardline
        if yrdline100 >= PUNT_LIM:
            exp_net_punt_dist = exp_net_punt_dist_model[yrdline100]
            exp_yrdline100 = 100 - yrdline100 + int(exp_net_punt_dist)
            
            # Overall expected value of punting
            punt_ev = -epv_model[exp_yrdline100]
        else:
            punt_ev = None
            
        # 3. Expected value of kicking a field goal
        fg_dist =  yrdline100 + 10 + FG_OFFSET
        if fg_dist <= FG_LIM:
            # Probability of success
            exp_fg_prob = exp_fg_prob_model[fg_dist ]

            # Expected value of field success
            fg_succ_epv = 3 - ekv

            # EPV of field goal fail
            fg_fail_yrdline100 = 100 - yrdline100 - FG_OFFSET
            fg_fail_epv = -epv_model[fg_fail_yrdline100]

            # Overall expected value kicking
            fg_ev = fg_succ_epv * exp_fg_prob + fg_fail_epv * (1 - exp_fg_prob)
        else:
            fg_ev = None
    
        choices = [('go for it', go_ev), ('punt', punt_ev), ('kick', fg_ev)]
        max_val = -100
        for choice, ev in choices:
            if ev is None:
                continue
            if ev > max_val:
                max_val = ev
                decision = choice
    
        if print_message:
            print("Expected Values at {:.2f}".format(yrdline100) + " yard line and dist to go {:.2f}".format(ydstogo))
            print("Go for it: {:.2f}".format(go_ev))
            if punt_ev is not None:
                print("Punt: {:.2f}".format(punt_ev))
            else:
                print("Punt: TOO CLOSE TO PUNT")
            if fg_ev is not None:
                print("FG: {:.2f}".format(fg_ev))
            else:
                print("FG: TOO FAR TO KICK")  
            print()

        return decision
    
    return decision_maker

# Build the decision maker
decision_maker = build_decision_maker(
    ekv, epv_model, exp_conv_pct_model, exp_net_punt_dist_model, exp_fg_prob_model)

In [81]:
#test
dist = 5 #user input here
rg_max = 101 - dist
rg_min = 0 + dist
for x in range(rg_min, rg_max):
    decision_maker(x, dist, print_message=True)

Expected Values at 5.00 yard line and dist to go 5.00
Go for it: 2.22
Punt: TOO CLOSE TO PUNT
FG: 3.25

Expected Values at 6.00 yard line and dist to go 5.00
Go for it: 1.35
Punt: TOO CLOSE TO PUNT
FG: 3.19

Expected Values at 7.00 yard line and dist to go 5.00
Go for it: 1.29
Punt: TOO CLOSE TO PUNT
FG: 3.13

Expected Values at 8.00 yard line and dist to go 5.00
Go for it: 1.24
Punt: TOO CLOSE TO PUNT
FG: 3.07

Expected Values at 9.00 yard line and dist to go 5.00
Go for it: 1.18
Punt: TOO CLOSE TO PUNT
FG: 3.00

Expected Values at 10.00 yard line and dist to go 5.00
Go for it: 1.67
Punt: TOO CLOSE TO PUNT
FG: 2.93

Expected Values at 11.00 yard line and dist to go 5.00
Go for it: 1.62
Punt: TOO CLOSE TO PUNT
FG: 2.87

Expected Values at 12.00 yard line and dist to go 5.00
Go for it: 1.56
Punt: TOO CLOSE TO PUNT
FG: 2.80

Expected Values at 13.00 yard line and dist to go 5.00
Go for it: 1.51
Punt: TOO CLOSE TO PUNT
FG: 2.72

Expected Values at 14.00 yard line and dist to go 5.00
Go fo