In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import nquad
from keras.models import load_model
import time


# All punt plays of interest
plays = pd.read_csv('../data/plays.csv').query('specialTeamsPlayType == "Punt"')
punt_plays = plays[plays['specialTeamsResult']!='Non-Special Teams Result'][['gameId','playId','kickerId']]


# Load all the models we will use to make predictions
outcome_models = [] # Models for predicting probabilities for Fair Catch, Return, No Field outcomes
field_models = [] # Models for predicting whether a punt is Fielded or not
returnlen_models = [] # Models for predicting the return length of a punt
for i in range(5):
    outcome_models.append(load_model(f'./Models/ThreeOutcomeModel{i}.h5'))
    field_models.append(load_model(f'./Models/FieldingModel{i}.h5'))
    returnlen_models.append(load_model(f'./Models/ReturnModel{i}.h5'))
    
mdn_model = load_model('./Models/MDN_BivariateNorm.h5',compile=False) # Model for finding location of bouncing ball

# Normalization statistics for input features for each model
field_norm_stats = pd.read_csv('../ReducedData/training_statistics.csv').values # Outcome/Field models
return_norm_stats = pd.read_csv('../ReducedData/return_training_statistics.csv').values # Return models
bounce_norm_stats = pd.read_csv('../ReducedData/bounce_training_statistics.csv').values # Return models


# Raw feature data for punts
data = pd.read_csv('../ReducedData/ExtraPuntLocations.csv') # Untrained punts
trained_data = pd.read_csv('../ReducedData/BC_AllPlayers.csv') # Raw data for the punt training data
fb_land = pd.read_csv('../ReducedData/fb_land.csv') # Actual landing spot for all punts in the previous two files

# The landing data is not corrected so all punts go to the right (larger X), so we must rotate the field
fb_land_corrected_dir = fb_land.copy()
fb_land_corrected_dir.loc[fb_land_corrected_dir['playDirection']=='left','x'] = 120-fb_land_corrected_dir.loc[fb_land_corrected_dir['playDirection']=='left','x']
fb_land_corrected_dir.loc[fb_land_corrected_dir['playDirection']=='left','y'] = (160/3-fb_land_corrected_dir.loc[fb_land_corrected_dir['playDirection']=='left','y']).round(2)

# Add landing location of the football and hangtime for the punt in untrained data
data = data.merge(fb_land_corrected_dir[['gameId','playId','x','y','hangTime']]).rename({'x':'x_land','y':'y_land'},axis=1)

# Combine all the raw feature information for the punts
all_punt_data = pd.concat([trained_data,data],axis=0).drop(columns=['Type','specialTeamsResult'])

In [2]:
# Calculate the magnitude of the velocity of a punt given its distance travelled and hangtime
def calc_const(dx, dy, ht):
    g = 32.1/3 # Gravitational constant in yards/s**2
    dist = np.sqrt(dx**2 + dy**2)
    v_const = (dist/ht)**2 + (g*ht/2)**2
    return np.sqrt(v_const)

# Calculate the hangtime of a punt with a given velocity (magnitude) and the distance travelled
# Return two solutions corresponding to low-kick and high-kick solution
def calc_time(del_x, v_const):
    g = 32.1/3 # Gravitational constant in yards/s**2
    p1 = 2/g**2*v_const**2
    p2 = 2/g**2*np.sqrt(v_const**4 - g**2 * del_x**2)
    return [np.sqrt(p1-p2),np.sqrt(p1+p2)]

# Calculate the average velocity of an NFL punt from the punter's location and the landing spot
dx = trained_data['x_punt'] - trained_data['x_land']
dy = trained_data['y_punt'] - trained_data['y_land']
ht = trained_data['hangTime']
v_const = calc_const(dx,dy,ht)
vc_avg = v_const.describe()['mean']

In [6]:
# Select a year and punter to evaluate
eval_year_punts = all_punt_data[all_punt_data['gameId']//1e6>=2020]
num_punts_kicker = eval_year_punts['nflId_punt'].value_counts()

min_punts = 16
punts_to_eval = eval_year_punts[eval_year_punts['nflId_punt'].isin(num_punts_kicker[num_punts_kicker>=min_punts].index.values)]

# Raw feature data for all punts of a particular punter
kickerId_to_eval = 50975

# Either evaluate all hypothetical punts by one particular punter, or evaluate the expected value of a real punt.
#kicker_punts = punts_to_eval.query('nflId_punt == @kickerId_to_eval').sort_values(['gameId','playId'])
kicker_punts = trained_data[trained_data['x_punt']>45].sample()

# Consider a grid of possible landing positions for each punt
x_loc = np.arange(11,110,1)
y_loc = np.arange(1/6,160/3, 1)
x_grid, y_grid = np.meshgrid(x_loc,y_loc)
xy_mesh = pd.DataFrame(zip(x_grid.flatten(),y_grid.round(2).flatten()),columns=['x_land','y_land'])

# Apply grid to each punt, keeping only punts that move 20-yards past the original line of scrimmage
potential_land = kicker_punts.drop(columns=['x_land','y_land','hangTime']).merge(xy_mesh, how='cross')
potential_land = potential_land[potential_land['x_land'] >= potential_land['x_fb'] + 20]
potential_land['dist'] = np.sqrt((potential_land['x_land']-potential_land['x_punt'])**2 + (potential_land['y_land']-potential_land['y_punt'])**2)

# Evaluate each punt-location based on what a league-average punter would achieve given the situation
# Note, for hangtime calculation, we assume the larger hangtime solution
potential_land = potential_land[potential_land['dist'] <= 3*vc_avg**2/32.1]
_,potential_land['hangTime'] = calc_time(potential_land['dist'],vc_avg)

In [8]:
# Several Functions that will be useful in converting the raw features of a punt to the specific features of each model


# Calculate the array of coordinate differences between two sets of teams
def diff_coord(team1, team2):
    t1 = team1.stack().reset_index()
    t2 = team2.stack().reset_index()
    t1xt2 = t1.merge(t2,on='level_0')
    t1xt2['diff'] = t1xt2['0_y'] - t1xt2['0_x']
    return t1xt2.set_index(['level_0','level_1_y','level_1_x'])['diff']


# Calculate the array of coordinate differences between a particular team and some coordinate
def diff_team_from_loc(team, coord):
    return -team.sub(coord,axis=0)


# Standardize features given the normalization statistics
def standardize_features(tensor, norm_stats):
    norm_tensor = np.copy(tensor)
    for i in range(tensor.shape[-1]):
        norm_tensor[...,i] = (norm_tensor[...,i] - norm_stats[i,0])/norm_stats[i,1]
    return norm_tensor


# Isolate each teams x and y coordinates
def team_coords(df):
    punt_x = df.iloc[:,5:24:2]
    punt_y = df.iloc[:,6:25:2]
    ret_x = df.iloc[:,25:46:2]
    ret_y = df.iloc[:,26:47:2]
    return [punt_x,punt_y,ret_x,ret_y]


# Convert the raw data into the specific features used in the outcome_models and field_models
# Predict probability of a punt being fielded, fair caught, returned, or not fielded
def convert_to_field_ft(df):
    features = np.zeros((len(df),11,10,9))
    punt_x,punt_y,ret_x,ret_y = team_coords(df)
    
    features[:,:,:,0] = diff_coord(punt_x,ret_x).unstack().values.reshape(len(df),11,10)
    features[:,:,:,1] = diff_coord(punt_y,ret_y).unstack().values.reshape(len(df),11,10)
    
    features[:,:,:,2] = np.tile((diff_team_from_loc(ret_x, df['x_punt']).values)[:,:,np.newaxis],10)
    features[:,:,:,3] = np.tile((diff_team_from_loc(ret_y, df['y_punt']).values)[:,:,np.newaxis],10)
    
    features[:,:,:,4] = np.tile((diff_team_from_loc(ret_x, df['x_land']).values)[:,:,np.newaxis],10)
    features[:,:,:,5] = np.tile((diff_team_from_loc(ret_y, df['y_land']).values)[:,:,np.newaxis],10)
    
    features[:,:,:,6] = np.tile(df['x_fb'].values,(10,11,1)).T
    features[:,:,:,7] = np.tile(df['y_fb'].values,(10,11,1)).T
    
    features[:,:,:,8] = np.tile(df['hangTime'].values,(10,11,1)).T
    
    return standardize_features(features, field_norm_stats)


# Convert the raw data into the specific features used in the returnlen_models
# Length of return predictions
def convert_to_return_ft(df):
    features = np.zeros((len(df),10,11,7))
    punt_x,punt_y,ret_x,ret_y = team_coords(df)
    
    features[:,:,:,0] = diff_coord(ret_x,punt_x).unstack().values.reshape(len(df),10,11)
    features[:,:,:,1] = diff_coord(ret_y,punt_y).unstack().values.reshape(len(df),10,11)
    
    features[:,:,:,2] = np.tile((diff_team_from_loc(ret_x, df['x_punt']).values)[:,np.newaxis],(1,10,1))
    features[:,:,:,3] = np.tile((diff_team_from_loc(ret_y, df['y_punt']).values)[:,np.newaxis],(1,10,1))
    
    features[:,:,:,4] = np.tile((diff_team_from_loc(punt_x, df['x_land']).values)[:,:,np.newaxis],11)
    features[:,:,:,5] = np.tile((diff_team_from_loc(punt_y, df['y_land']).values)[:,:,np.newaxis],11)
    
    features[:,:,:,6] = np.tile(df['hangTime'].values,(11,10,1)).T
    
    return standardize_features(features, return_norm_stats)


# Convert the raw data into the specific features used in the mdn_model
# Bounce location statistic predictions
def convert_to_bounce_ft(df):
    features = np.zeros((len(df),5))
    
    dx = (df['x_land']-df['x_punt']).values
    dy = (df['y_land']-df['y_punt']).values
    v_c = calc_const(dx, dy, df['hangTime'].values)
    dist = np.sqrt(dx**2 + dy**2)
    
    features[:,0] = df['x_fb'].values
    features[:,1] = (df['y_land'] - df['y_punt']).values
    features[:,2] = df['hangTime'].values
    features[:,3] = v_c
    features[:,4] = dist/(v_c*df['hangTime'].values)
    
    return standardize_features(features, bounce_norm_stats)


# Return the three multi-dim arrays to be fed into models for predictions
# outcome_models/field_models , returnlen_models , mdn_model
def get_model_features(df):
    return [convert_to_field_ft(df),convert_to_return_ft(df),convert_to_bounce_ft(df)]

In [9]:
# Convert punts into features and get model predictions for each
field_ft, return_ft, bounce_ft = get_model_features(potential_land)

num_models = 5
field_pred = np.zeros((field_ft.shape[0], 4*num_models))
return_pred = np.zeros((return_ft.shape[0], num_models))
bounce_pred = mdn_model.predict(bounce_ft)

In [10]:
for i in range(num_models):
    # Each model in outcome_models will write to the array in the form (P_FairCatch, P_NoField, P_Return)
    # Each model in field_models will write to the array  1-P_Field
    # The field prediction array will have each pair of models recording (P_FC, P_NF, P_R, 1-P_F)
    field_pred[:,4*i:4*i+3] = outcome_models[i].predict(field_ft)
    field_pred[:,4*i+3] = field_models[i].predict(field_ft).reshape(-1,)
    
    # Return length will just record each model's prediction
    return_pred[:,i] = returnlen_models[i].predict(return_ft).reshape(-1,)

    

# For the fielding probabilities, we take an ensemble average over the model predictions
ens_avg_pred = np.zeros((field_pred.shape[0],4))

# Average probability that the punt will not be fielded
ens_avg_pred[:,0] = 1/10*(field_pred[:,1::4].sum(axis=1) + field_pred[:,3::4].sum(axis=1))

# Average probability that the punt will be fielded
# i.e. [0] + [1] = 1.00
ens_avg_pred[:,1] = 1/10*((1-field_pred[:,3::4]).sum(axis=1) + field_pred[:,0::4].sum(axis=1) + field_pred[:,2::4].sum(axis=1))

# Average probability for a Fair Catch (2) and a return (3)
# i.e. [2] + [3] = [1]
ens_avg_pred[:,2] = ens_avg_pred[:,1] * field_pred[:,0::4].sum(axis=1)/(field_pred[:,0::4].sum(axis=1)+field_pred[:,2::4].sum(axis=1))
ens_avg_pred[:,3] = ens_avg_pred[:,1] * field_pred[:,2::4].sum(axis=1)/(field_pred[:,0::4].sum(axis=1)+field_pred[:,2::4].sum(axis=1))

# Combine fielding and return predictions for each punt into a DataFrame
ens_df = pd.concat([potential_land[['gameId','playId','x_fb','y_fb','x_land','y_land']].reset_index(drop=True),pd.DataFrame(ens_avg_pred, columns=['NF_avg','Field_avg','FC_avg','R_avg'])],axis=1)
ens_df['R_len'] = np.sinh(return_pred).mean(axis=1)

In [17]:
# Change of basis from x-y coordinates into those defined by the direction of the punt (punter -> landing location)
cos = ((potential_land['x_land'] - potential_land['x_punt'])/potential_land['dist']).values
sin = ((potential_land['y_land'] - potential_land['y_punt'])/potential_land['dist']).values

# Mean predicted bounce length in the punt-direction coordinates
mu_v = bounce_pred[:,0]
mu_vp = bounce_pred[:,1]

# Roughly approximate the expected x,y values of the bounces
ens_df['x_exp_bounce'] = mu_v*cos - mu_vp*sin
ens_df['y_exp_bounce'] = mu_v*sin + mu_v*cos

# Additional statistics about the punt and bounce
ens_df['x_to_ez'] = 110 - ens_df['x_land']
ens_df['y_to_oob'] = (80/3-np.abs(80/3 - ens_df['y_land'])) * np.sign(ens_df['y_land']-80/3)
ens_df['y_rat'] = ens_df['y_to_oob']/ens_df['y_exp_bounce']
ens_df['x_exp_notb'] = ens_df['x_exp_bounce']*ens_df['y_rat']
ens_df.loc[(ens_df['y_rat']>1)|(ens_df['y_rat']<0),'x_exp_notb'] = ens_df['x_exp_bounce']

In [42]:
# Bounce model outputs statistics of the bivariate normal distributions
# Functions to be used in calculating the probabilities and expected values

# Standardize the random variables in the exponential of distributions
def standardize(x,mu,sigma):
    return (x-mu)/sigma

# Given a set of distribution parameters, return the bivariate normal distribution function
def get_bivnorm(mx,my, sx,sy, r):
    def biv_norm(x,y):
        x_stand = standardize(x,mx,sx)
        y_stand = standardize(y,my,sy)
        return 1/(2*np.pi*sx*sy*np.sqrt(1-r**2)) * np.exp(-1/(2*(1-r**2))*(x_stand**2+y_stand**2+2*r*x_stand*y_stand))
    return biv_norm

# Rotate on-field coordinates to punt-direction coordinates before acquiring PDF
def get_rot_pdf(mx,my, sx,sy, r, ctheta,stheta):
    pdf = get_bivnorm(mx,my, sx,sy, r)
    def rotated_biv_norm(x,y):
        x_rot = x*ctheta + y*stheta
        y_rot = y*ctheta - x*stheta
        return pdf(x_rot,y_rot)
    return rotated_biv_norm

# Calculate the expected value of x-field coordinate
def get_rot_ExpX(mx,my, sx,sy, r, ctheta,stheta):
    pdf = get_bivnorm(mx,my, sx,sy, r)
    def rotated_expX(x,y):
        x_rot = x*ctheta + y*stheta
        y_rot = y*ctheta - x*stheta
        return x * pdf(x_rot,y_rot)
    return rotated_expX

# Calculate expected value of x-field coordinate when the ball bounces out of bound
# Assuming a straight path from the bounce location to the final location, the x-value is truncated when the ball leaves play
def get_rot_truncatedExpX(mx,my, sx,sy, r, ctheta,stheta, y_thres):
    pdf = get_bivnorm(mx,my, sx,sy, r)
    def rotated_truncexpX(x,y):
        x_rot = x*ctheta + y*stheta
        y_rot = y*ctheta - x*stheta
        return x*y_thres/y * pdf(x_rot,y_rot)
    return rotated_truncexpX

# Returns y-field integration bounds depending on whether the punt is closer to the y = 0 or y = 160/3 sidelines
def y_oob(y_togo):
    ranges = [[-np.inf,y_togo],[y_togo,np.inf]]
    if y_togo>0:
        return ranges
    return [ranges[1],ranges[0]]

# Return x-field integration bounds, for integrating both touchbacks and out of bounds
def get_XBounds(x_ez,y_oob):
    slope = x_ez/y_oob
    def tb_xbound(y):
        x_minbound = slope*(y-y_oob)+x_ez
        return [x_minbound, np.inf]

    def oob_xbound(y):
        x_upbound = slope*(y-y_oob)+x_ez
        return [x_ez, x_upbound]
    return [tb_xbound,oob_xbound]

In [43]:
ens_df['p_tb'] = 0
ens_df['ex_ntb'] = (1-ens_df['p_tb'])*ens_df['x_exp_notb']

# The Field Value is the absolute yardline of the average position of the ball after the punt
# Fair Catch : Value -> x_land
# Return : Value -> x_land - return_length
# No Field : Touchback Value -> 90 = x_land + (90 - x_land)
# No Field : Other, Value -> x_land + x_bounce
ens_df['Field_Val'] = ens_df['x_land'] - ens_df['R_len']*ens_df['R_avg'] + ens_df['NF_avg']*(ens_df['p_tb']*(90-ens_df['x_land']) + ens_df['ex_ntb'])

ens_df['has_calcd'] = False


# Split the punts into two categories, based on how quickly the algorithm can calculate the true value
fb_split = 55
max_FV = ens_df.sort_values('x_fb',ascending=False).drop_duplicates(subset=['gameId','playId']).sort_values(['gameId','playId'])
midfield_punts = ens_df.reset_index().merge(max_FV.query('x_fb < @fb_split')[['gameId','playId']]).set_index('index')
pin_punts = ens_df.reset_index().merge(max_FV.query('x_fb >= @fb_split')[['gameId','playId']]).set_index('index')

print(midfield_punts.drop_duplicates(subset=['gameId','playId']).shape, pin_punts.drop_duplicates(subset=['gameId','playId']).shape)

(16, 21) (4, 21)


In [44]:
# For each punt, loop over highest field value locations and perform the full integration of expected values
# If the Field Value remains maximal, the location is the true maximum field value
# This works since we over-approximating the Field Value with earlier approximation
# When the full evaluation remains maximal, or we arrive at a point which we have already calculated, we are done

games_to_eval = midfield_punts.copy()
optimal_values = np.zeros((games_to_eval.drop_duplicates(subset=['gameId','playId']).shape[0],6))
# index, x_land, y_land, Field Value, P_TB, ex_ntb

start_all = time.perf_counter()
for i,((g,p), land_stats) in enumerate(games_to_eval.groupby(['gameId','playId'])):
    print(i,g,p)
    highest_newVal = 0
    
    start = time.perf_counter()
    converge = False
    while not converge:
        best_id = land_stats.sort_values('Field_Val',ascending=False).head(1).index[0]
        
        xl = land_stats.loc[best_id,'x_land']
        yl = land_stats.loc[best_id,'y_land']
    
        x_to_ez = land_stats.loc[best_id,'x_to_ez']
        y_to_oob = land_stats.loc[best_id, 'y_to_oob']
        fv_old = land_stats.loc[best_id,'Field_Val']
        
        # Only evaluate the integral if we have not explicitly integrated this point already
        if not land_stats.loc[best_id, 'has_calcd']:
            y_bounds = y_oob(y_to_oob)
            x_bound_tb, x_bound_oob = get_XBounds(x_to_ez, y_to_oob)

            # Get relevant functionals
            rotated_PDF = get_rot_pdf(*bounce_pred[best_id], cos[best_id], sin[best_id])
            rotated_ExpX = get_rot_ExpX(*bounce_pred[best_id], cos[best_id], sin[best_id])
            rot_trunc_ExpX = get_rot_truncatedExpX(*bounce_pred[best_id], cos[best_id], sin[best_id],y_to_oob)
            
            # Evaluate the integrals
            # Note, it is faster to split the conditional integration bounds due to the decrease in function calls
            ex_ip = nquad(rotated_ExpX, [[-110,x_to_ez],y_bounds[0]])[0]
            p_tb_new = nquad(rotated_PDF, [[x_to_ez,np.inf],y_bounds[0]])[0] + nquad(rotated_PDF, [x_bound_tb,y_bounds[1]])[0]
            ex_oob = nquad(rot_trunc_ExpX,[[-110,x_to_ez],y_bounds[1]])[0] + nquad(rot_trunc_ExpX,[x_bound_oob,y_bounds[1]])[0]

            # When evaluating a point near the sideline, can update touchback probabilities in the middle of the field
            # Unless already explicitly evaluated those locations
            # The logic is identical to the previous step
            # Note, this is only a useful speed-up when punting near the endzone
            interior_locations = land_stats.loc[
                (land_stats['x_land']==xl) \
                & (~land_stats['has_calcd']) \
                & (land_stats['y_land'].between(*sorted([80/3, land_stats.loc[best_id, 'y_land']]),inclusive='both'))].index.values

            # Update the integrated values for locations closer to the middle of the field than the current location
            land_stats.loc[interior_locations, 'p_tb'] = p_tb_new

            # E(X|no TB) * P(no TB) unsure whether it should generally increase as we move towards from the middle of the field
            # Therefore, do not update this value except when exactly calculating it
            land_stats.loc[best_id, 'ex_ntb'] = (ex_ip + ex_oob)
            
            # Update expected value of field position for any interior point
            land_stats.loc[interior_locations,'Field_Val'] = land_stats.loc[interior_locations,'x_land'] \
                    - land_stats.loc[interior_locations,'R_avg']*land_stats.loc[interior_locations,'R_len'] \
                    + land_stats.loc[interior_locations,'NF_avg']*( \
                                land_stats.loc[interior_locations,'p_tb']*(90-land_stats.loc[interior_locations,'x_land']) \
                                + land_stats.loc[interior_locations,'ex_ntb'])

        land_stats.loc[best_id, 'has_calcd'] = True
        fv = land_stats.loc[best_id,'Field_Val']
        
        # Print message when we have a local maxima in checked values
        # As Old - New goes to zeros, the current loop is closer to finishing
        if fv > highest_newVal:
            highest_newVal = fv
            print(f'Field Value ({best_id}): {fv_old:.2f} -> {fv:.2f}')

        if best_id == land_stats.sort_values('Field_Val',ascending=False).head(1).index[0]:
            end = time.perf_counter()
            print(f'{xl:.2f}, {yl:.2f}, {fv:.2f}')
            optimal_values[i, 0] = best_id
            optimal_values[i, 1] = xl
            optimal_values[i, 2] = yl
            optimal_values[i, 3] = fv
            optimal_values[i, 4] = land_stats.loc[best_id,'p_tb']
            optimal_values[i, 5] = land_stats.loc[best_id,'ex_ntb']
            converge = True

    print(f'Eval Time: {end - start:.1f}')
    print()

end_all = time.perf_counter()
print(f'Total Eval Time: {end_all - start_all:.1f}')

0 2020110810 775
Field Value (62): 89.80 -> 89.01
Field Value (259): 89.18 -> 89.03
93.00, 7.17, 89.03
Eval Time: 11.5

1 2020110810 3645
Field Value (3047): 104.07 -> 97.72
Field Value (3285): 103.41 -> 98.26
Field Value (3417): 102.31 -> 98.80
Field Value (3449): 100.63 -> 98.85
Field Value (3481): 100.40 -> 99.00
Field Value (3513): 100.13 -> 99.14
Field Value (3545): 99.78 -> 99.17
101.00, 51.17, 99.17
Eval Time: 162.7

2 2020112601 3413
Field Value (4764): 93.79 -> 93.19
Field Value (5110): 93.64 -> 93.20
97.00, 42.17, 93.20
Eval Time: 17.1

3 2020120800 293
Field Value (5951): 98.33 -> 96.03
Field Value (5780): 97.79 -> 96.11
Field Value (5647): 97.45 -> 96.21
99.00, 5.17, 96.21
Eval Time: 61.4

4 2020121302 2370
Field Value (8381): 104.43 -> 97.76
Field Value (7786): 104.41 -> 97.83
Field Value (7581): 103.64 -> 98.41
Field Value (7448): 102.82 -> 99.12
Field Value (7351): 102.01 -> 99.49
101.00, 2.17, 99.49
Eval Time: 166.5

5 2020122013 268
Field Value (10782): 94.89 -> 93.93


In [45]:
sideline_locs = pin_punts.reset_index().query('x_land >= 100').sort_values('y_land').groupby(['gameId','playId','x_land']).agg(['first', 'last']).stack().reset_index().drop(columns='level_3').set_index('index').copy()

start_all = time.perf_counter()
start = time.perf_counter()
for i, cur_id in enumerate(sideline_locs.index.values):
    if i%50 == 0:
        end = time.perf_counter()
        print(f'{(i/len(sideline_locs)):.2f} Time: {end-start:.2f}')
        start = time.perf_counter()

    x_to_ez = sideline_locs.loc[cur_id,'x_to_ez']
    y_to_oob = sideline_locs.loc[cur_id, 'y_to_oob']

    y_bounds = y_oob(y_to_oob)
    x_bound_tb, x_bound_oob = get_XBounds(x_to_ez, y_to_oob)

    # Get relevant functionals
    rotated_PDF = get_rot_pdf(*bounce_pred[cur_id], cos[cur_id], sin[cur_id])
    rotated_ExpX = get_rot_ExpX(*bounce_pred[cur_id], cos[cur_id], sin[cur_id])
    rot_trunc_ExpX = get_rot_truncatedExpX(*bounce_pred[cur_id], cos[cur_id], sin[cur_id],y_to_oob)

    # Evaluate the integrals
    # Note, it is faster to split the conditional integration bounds due to the decrease in function calls
    ex_ip = nquad(rotated_ExpX, [[-110,x_to_ez],y_bounds[0]])[0]
    p_tb_new = nquad(rotated_PDF, [[x_to_ez,np.inf],y_bounds[0]])[0] + nquad(rotated_PDF, [x_bound_tb,y_bounds[1]])[0]
    ex_oob = nquad(rot_trunc_ExpX,[[-110,x_to_ez],y_bounds[1]])[0] + nquad(rot_trunc_ExpX,[x_bound_oob,y_bounds[1]])[0]

    # Update the integrated values for locations closer to the middle of the field than the current location
    sideline_locs.loc[cur_id, 'p_tb'] = p_tb_new

    # E(X|no TB) * P(no TB) unsure whether it should generally increase as we move towards from the middle of the field
    # Therefore, do not update this value except when exactly calculating it
    sideline_locs.loc[cur_id, 'ex_ntb'] = (ex_ip + ex_oob)
    
    
    sideline_locs.loc[cur_id, 'has_calcd'] = True
    
sideline_locs['Field_Val'] = sideline_locs['x_land'] - sideline_locs['R_avg']*sideline_locs['R_len'] + sideline_locs['NF_avg']*(sideline_locs['p_tb']*(90-sideline_locs['x_land']) + sideline_locs['ex_ntb'])

end_all = time.perf_counter()
print(f'Total Eval Time: {end_all - start_all:.1f}')

0.00 Time: 0.00
0.62 Time: 90.96
Total Eval Time: 136.0


In [46]:
midfield_locs = midfield_punts.copy()
midfield_locs.loc[optimal_values[:,0],['Field_Val','p_tb','ex_ntb']] = optimal_values[:,3:]
midfield_locs.loc[optimal_values[:,0], 'has_calcd'] = True
check_low = midfield_locs.loc[optimal_values[:,0]]

check_high = sideline_locs.sort_values('Field_Val',ascending=False).drop_duplicates(subset=['gameId','playId']).sort_values(['gameId','playId'])

pd.concat([check_low,check_high]).to_csv(f'../PredictedData/OptimalLocation/OptLoc_{kickerId_to_eval}.csv',index=True)