## Make predictions on upcoming Tournament

In [1]:
import pandas as pd
import numpy as np

from config import majors, metrics, feat_cols, target_col
from utils import create_rolling_agg_features_by_golfer

import pickle

In [2]:
# Reading in the historical event and pre tournament data
event_df = pd.read_csv('../data/historical_event_data.csv')
year = 2021
event_name = 'tour_championship'
pre_tourney = pd.read_csv(f'../data/{year}_{event_name}_pre_tourney_snapshot.csv')
pre_tourney.rename(columns={'bet365':'close_odds'}, inplace=True)

#Appending the new event onto the historical data
new_event_df = event_df.append(pre_tourney[['dg_id', 'player_name','event_name','close_odds']], sort=False)
new_event_df['major'] = np.where(new_event_df.event_name.isin(majors), 1, 0)

#Creating the lagging aggregate features
for metric in metrics:
    field = metric.get('field')
    agg = metric.get('agg')
    for period in metric.get('periods'):
        col = f"{field}_in_prev_{period}_events"
        new_event_df[col] = create_rolling_agg_features_by_golfer(new_event_df, field, 1, period, agg)
        
#Filtering for only the new event
new_event_df = new_event_df.loc[pd.isnull(new_event_df.fin_num)]

#Subsetting feature columns
X_test = new_event_df[feat_cols].fillna(75)

#Loading in the random forest model
rf = pickle.load(open("../models/rf_model.pkl", "rb"))

#Making Predictions
new_event_df['sg_preds'] = rf.predict(X_test)

#Merging in the prediction data with the pre-tourney data
final_df = pre_tourney.merge(new_event_df[['dg_id','sg_preds']], how='left', on='dg_id')

### Strokes gained predictions relative to DraftKings Salary
The `rnk_diff` field indicates whether the predictions we should consider betting or staying away from a golfer.
- If `rnk_diff` is extremely **positive**, it means his predicted strokes gained ranks much better than his salary, thus we may be a good bet
- If `rnk_diff` is extremely **negative**, it means his predicted strokes gained ranks much lower than his salary, thus we should not on him

In [3]:
# Surfacing Potential Golfers to bet on or stay away from
final_df['dk_salary'] = final_df['dk_salary'].astype(int)
final_df = final_df.query("dk_salary > 0")
final_df['dk_salary_rnk'] = final_df['dk_salary'].rank(ascending=False)
final_df['sg_preds_rnk'] = final_df['sg_preds'].rank(ascending=False)
final_df['rnk_diff'] = final_df['dk_salary_rnk'] - final_df['sg_preds_rnk']

print_df = (final_df[['dg_id', 'player_name', 'dk_salary','sg_preds',
           'dk_salary_rnk', 'sg_preds_rnk','rnk_diff']])

print('Top 10 Golfers with positive Rnk_Diff')
print_df.sort_values('rnk_diff', ascending=False).reset_index(drop=True).head(10)

Top 10 Golfers with positive Rnk_Diff


Unnamed: 0,dg_id,player_name,dk_salary,sg_preds,dk_salary_rnk,sg_preds_rnk,rnk_diff
0,17488,"Im, Sungjae",7300,4.214691,18.0,10.0,8.0
1,18417,"Scheffler, Scottie",6800,4.141973,19.0,12.0,7.0
2,18238,"Ancer, Abraham",9600,5.338575,9.0,3.0,6.0
3,18841,"Hovland, Viktor",8000,4.253895,15.0,9.0,6.0
4,7672,"Oosthuizen, Louis",7600,4.21457,17.0,11.0,6.0
5,22085,"Morikawa, Collin",8300,4.300189,14.0,8.0,6.0
6,19895,"Schauffele, Xander",8900,4.343199,11.0,6.0,5.0
7,14139,"Thomas, Justin",11000,5.35654,5.0,1.0,4.0
8,15856,"Smith, Cameron",10100,4.912073,8.0,4.0,4.0
9,7452,"Na, Kevin",6000,2.879928,23.0,19.0,4.0


In [4]:
print('Bottom 10 Golfers with negative Rnk_Diff')
print_df.sort_values('rnk_diff', ascending=True).reset_index(drop=True).head(10)

Bottom 10 Golfers with negative Rnk_Diff


Unnamed: 0,dg_id,player_name,dk_salary,sg_preds,dk_salary_rnk,sg_preds_rnk,rnk_diff
0,15466,"Cantlay, Patrick",13400,3.52257,1.0,18.0,-17.0
1,12422,"Johnson, Dustin",10700,3.751289,6.0,17.0,-11.0
2,19841,"DeChambeau, Bryson",12300,4.119099,3.0,13.0,-10.0
3,14636,"Spieth, Jordan",10400,3.97433,7.0,14.0,-7.0
4,13562,"Matsuyama, Hideki",6400,0.622401,21.0,26.0,-5.0
5,16243,"Koepka, Brooks",7800,2.807302,16.0,20.0,-4.0
6,19483,"Burns, Sam",8500,3.754448,13.0,16.0,-3.0
7,12337,"Kokrak, Jason",6600,1.388072,20.0,23.0,-3.0
8,19195,"Rahm, Jon",13000,4.818204,2.0,5.0,-3.0
9,14577,"English, Harris",8700,3.835979,12.0,15.0,-3.0
