## Make predictions on upcoming Tournament

In [14]:
import pandas as pd
import numpy as np

from config import majors, metrics, feat_cols, target_col
from utils import create_rolling_agg_features_by_golfer

import pickle

In [20]:
# Reading in the historical event and pre tournament data
event_df = pd.read_csv('../data/historical_event_data.csv')
year = 2021
event_name = 'bmw_championship'
pre_tourney = pd.read_csv(f'../data/{year}_{event_name}_pre_tourney_snapshot.csv')
pre_tourney.rename(columns={'bet365':'close_odds'}, inplace=True)

#Appending the new event onto the historical data
new_event_df = event_df.append(pre_tourney[['dg_id', 'player_name','event_name','close_odds']], sort=False)
new_event_df['major'] = np.where(new_event_df.event_name.isin(majors), 1, 0)

#Creating the lagging aggregate features
for metric in metrics:
    field = metric.get('field')
    agg = metric.get('agg')
    for period in metric.get('periods'):
        col = f"{field}_in_prev_{period}_events"
        new_event_df[col] = create_rolling_agg_features_by_golfer(new_event_df, field, 1, period, agg)
        
#Filtering for only the new event
new_event_df = new_event_df.loc[pd.isnull(new_event_df.fin_num)]

#Subsetting feature columns
X_test = new_event_df[feat_cols]

#Loading in the random forest model
rf = pickle.load(open("../models/rf_model.pkl", "rb"))

#Making Predictions
new_event_df['sg_preds'] = rf.predict(X_test)

#Merging in the prediction data with the pre-tourney data
final_df = pre_tourney.merge(new_event_df[['dg_id','sg_preds']], how='left', on='dg_id')

### Strokes gained predictions relative to DraftKings Salary
The `rnk_diff` field indicates whether the predictions we should consider betting or staying away from a golfer.
- If `rnk_diff` is extremely **positive**, it means his predicted strokes gained ranks much better than his salary, thus we may be a good bet
- If `rnk_diff` is extremely **negative**, it means his predicted strokes gained ranks much lower than his salary, thus we should not on him

In [58]:
# Surfacing Potential Golfers to bet on or stay away from
final_df['dk_salary'] = final_df['dk_salary'].astype(int)
final_df['dk_salary_rnk'] = final_df['dk_salary'].rank(ascending=False)
final_df['sg_preds_rnk'] = final_df['sg_preds'].rank(ascending=False)
final_df['rnk_diff'] = final_df['dk_salary_rnk'] - final_df['sg_preds_rnk']

print_df = (final_df[['dg_id', 'player_name', 'dk_salary','sg_preds',
           'dk_salary_rnk', 'sg_preds_rnk','rnk_diff']])

print('Top 10 Golfers with positive Rnk_Diff')
print_df.sort_values('rnk_diff', ascending=False).reset_index(drop=True).head(10)

Top 10 Golfers with positive Rnk_Diff


Unnamed: 0,dg_id,player_name,dk_salary,sg_preds,dk_salary_rnk,sg_preds_rnk,rnk_diff
0,12808,"Grillo, Emiliano",6500,0.972802,53.5,44.0,9.5
1,5768,"Hoffman, Charley",7000,1.173585,43.5,34.0,9.5
2,7108,"Casey, Paul",8200,3.317274,23.0,15.0,8.0
3,19841,"DeChambeau, Bryson",9300,4.183491,12.0,6.0,6.0
4,13900,"Lowry, Shane",7900,3.133729,26.0,20.0,6.0
5,5665,"Cink, Stewart",6600,0.865861,51.5,46.0,5.5
6,14013,"Tringale, Cameron",7500,1.770246,33.5,28.0,5.5
7,1547,"Mickelson, Phil",6300,0.759738,58.0,53.0,5.0
8,15575,"Hoge, Tom",6000,-0.176624,67.5,63.0,4.5
9,7452,"Na, Kevin",7600,2.193589,31.5,27.0,4.5


In [57]:
print('Bottom 10 Golfers with negative Rnk_Diff')
print_df.sort_values('rnk_diff', ascending=True).reset_index(drop=True).head(10)

Bottom 10 Golfers with negative Rnk_Diff


Unnamed: 0,dg_id,player_name,dk_salary,sg_preds,dk_salary_rnk,sg_preds_rnk,rnk_diff
0,13508,"Vegas, Jhonattan",7100,0.779403,41.5,51.0,-9.5
1,18841,"Hovland, Viktor",9100,3.090208,14.0,22.0,-8.0
2,14735,"Ortiz, Carlos",6600,0.384003,51.5,59.0,-7.5
3,17550,"Van Rooyen, Erik",7500,1.013864,33.5,41.0,-7.5
4,7649,"Leishman, Marc",6700,0.508251,49.5,57.0,-7.5
5,13562,"Matsuyama, Hideki",8800,2.966931,17.0,24.0,-7.0
6,16243,"Koepka, Brooks",10200,3.654264,6.0,11.0,-5.0
7,13470,"Kizzire, Patton",6200,-0.422936,61.0,66.0,-5.0
8,12337,"Kokrak, Jason",7800,1.204271,27.5,32.0,-4.5
9,7672,"Oosthuizen, Louis",9600,3.596776,9.0,13.0,-4.0
