***

# Imports

In [1]:
# Imports all of the libraries that will be utilized throughout this project
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import datetime, timedelta

from io import StringIO
import requests

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge, Lasso
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor


# Enables view of all columns when viewing Pandas DataFrames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Private API Key utilized in the paramater variables below to extract data
api_key = ''


# itables import below
# from itables import init_notebook_mode
# init_notebook_mode(all_interactive=True)
# from itables import show

In [2]:
# Retrieves main set of data
api_url = "https://feeds.datagolf.com/historical-raw-data/rounds"

df_list = []

for year in range (2017, 2025):
    params = {
        'tour': 'pga',
        'event_id': 'all',
        'year': year,
        'file_format': 'csv',
        'key': api_key
    }

    response = requests.get(api_url, params=params)

    if response.status_code == 200:
        # Use StringIO to convert the response content into a file-like object for read_csv
        data = StringIO(response.text)
        df = pd.read_csv(data)
        df_list.append(df)
        print(f"Data added for year: {year}")
    else:
        print("Failed to fetch data from the API. Status code:", response.status_code)

tn_df = pd.concat(df_list, ignore_index=True)

print("\nData Retrieved!")

Data added for year: 2017
Data added for year: 2018
Data added for year: 2019
Data added for year: 2020
Data added for year: 2021
Data added for year: 2022
Data added for year: 2023
Data added for year: 2024

Data Retrieved!


In [3]:
# Retrieves main set of data
api_url = "https://feeds.datagolf.com/historical-raw-data/rounds"

df_list = []

for year in range (2024, 2025):
    params = {
        'tour': 'pga',
        'event_id': 'all',
        'year': year,
        'file_format': 'csv',
        'key': api_key
    }

    response = requests.get(api_url, params=params)

    if response.status_code == 200:
        # Use StringIO to convert the response content into a file-like object for read_csv
        data = StringIO(response.text)
        df = pd.read_csv(data)
        df_list.append(df)
        print(f"Data added for year: {year}")
    else:
        print("Failed to fetch data from the API. Status code:", response.status_code)

tt_df = pd.concat(df_list, ignore_index=True)

print("\nData Retrieved!")

Data added for year: 2024

Data Retrieved!


In [4]:
tn_df['year'].value_counts()

year
2019    19049
2023    18717
2022    18486
2021    18236
2018    18168
2017    18161
2020    14652
2024     4938
Name: count, dtype: int64

In [5]:
tt_df['year'].value_counts()

year
2024    4938
Name: count, dtype: int64

In [6]:
api_url = "https://feeds.datagolf.com/get-player-list"

params = {
    'file_format': 'csv',
    'key': api_key
}

response = requests.get(api_url, params=params)

if response.status_code == 200:
    # Use StringIO to convert the response content into a file-like object for read_csv
    data = StringIO(response.text)
    player_df = pd.read_csv(data)
else:
    print("Failed to fetch data from the API. Status code:", response.status_code)

player_df.to_csv('rank_df.csv', index=False)

In [7]:
api_url = 'https://feeds.datagolf.com/field-updates'

params = {
    'tour': 'pga',
    'file_format': 'csv',
    'key': api_key
}

response = requests.get(api_url, params=params)

if response.status_code == 200:
    # Use StringIO to convert the response content into a file-like object for read_csv
    data = StringIO(response.text)
    field_df = pd.read_csv(data)
else:
    print("Failed to fetch data from the API. Status code:", response.status_code)

field_df.to_csv('rank_df.csv', index=False)


***

# EDA

In [37]:
train_df = tn_df.copy()
test_df = tt_df.copy()

## Train Data Preprocessing

In [38]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130407 entries, 0 to 130406
Data columns (total 30 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   tour             130407 non-null  object 
 1   year             130407 non-null  int64  
 2   season           130407 non-null  int64  
 3   event_completed  130407 non-null  object 
 4   event_name       130407 non-null  object 
 5   event_id         130407 non-null  int64  
 6   player_name      130407 non-null  object 
 7   dg_id            130407 non-null  int64  
 8   fin_text         130407 non-null  object 
 9   round_num        130407 non-null  int64  
 10  course_name      130407 non-null  object 
 11  course_num       130407 non-null  int64  
 12  course_par       130407 non-null  int64  
 13  start_hole       130407 non-null  int64  
 14  teetime          130407 non-null  object 
 15  round_score      129947 non-null  float64
 16  sg_putt          103238 non-null  floa

In [39]:
# convert dates
train_df['event_completed'] = train_df['event_completed'].astype('datetime64[ns]')

In [40]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130407 entries, 0 to 130406
Data columns (total 30 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   tour             130407 non-null  object        
 1   year             130407 non-null  int64         
 2   season           130407 non-null  int64         
 3   event_completed  130407 non-null  datetime64[ns]
 4   event_name       130407 non-null  object        
 5   event_id         130407 non-null  int64         
 6   player_name      130407 non-null  object        
 7   dg_id            130407 non-null  int64         
 8   fin_text         130407 non-null  object        
 9   round_num        130407 non-null  int64         
 10  course_name      130407 non-null  object        
 11  course_num       130407 non-null  int64         
 12  course_par       130407 non-null  int64         
 13  start_hole       130407 non-null  int64         
 14  teetime          130

In [41]:
# Remove nulls
train_df = train_df.dropna(axis=0)

In [42]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 98625 entries, 0 to 130406
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             98625 non-null  object        
 1   year             98625 non-null  int64         
 2   season           98625 non-null  int64         
 3   event_completed  98625 non-null  datetime64[ns]
 4   event_name       98625 non-null  object        
 5   event_id         98625 non-null  int64         
 6   player_name      98625 non-null  object        
 7   dg_id            98625 non-null  int64         
 8   fin_text         98625 non-null  object        
 9   round_num        98625 non-null  int64         
 10  course_name      98625 non-null  object        
 11  course_num       98625 non-null  int64         
 12  course_par       98625 non-null  int64         
 13  start_hole       98625 non-null  int64         
 14  teetime          98625 non-null  object   

In [43]:
def round_date(train_df):
    if train_df['round_num'] == 1:
        return train_df['event_completed'] - pd.Timedelta(days=3)
    elif train_df['round_num'] == 2:
        return train_df['event_completed'] - pd.Timedelta(days=2)
    elif train_df['round_num'] == 3:
        return train_df['event_completed'] - pd.Timedelta(days=1)
    else:
        return train_df['event_completed']


train_df['round_completed'] = train_df.apply(round_date,axis=1)

In [44]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 98625 entries, 0 to 130406
Data columns (total 31 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             98625 non-null  object        
 1   year             98625 non-null  int64         
 2   season           98625 non-null  int64         
 3   event_completed  98625 non-null  datetime64[ns]
 4   event_name       98625 non-null  object        
 5   event_id         98625 non-null  int64         
 6   player_name      98625 non-null  object        
 7   dg_id            98625 non-null  int64         
 8   fin_text         98625 non-null  object        
 9   round_num        98625 non-null  int64         
 10  course_name      98625 non-null  object        
 11  course_num       98625 non-null  int64         
 12  course_par       98625 non-null  int64         
 13  start_hole       98625 non-null  int64         
 14  teetime          98625 non-null  object   

In [45]:
train_df.head()

Unnamed: 0,tour,year,season,event_completed,event_name,event_id,player_name,dg_id,fin_text,round_num,course_name,course_num,course_par,start_hole,teetime,round_score,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,driving_dist,driving_acc,gir,scrambling,prox_rgh,prox_fw,great_shots,poor_shots,round_completed
0,pga,2017,2017,2017-01-08,SBS Tournament of Champions,16,"Herman, Jim",12846,T12,1,Plantation Course at Kapalua,656,73,1,11:20am,67.0,1.248,-0.605,2.353,0.347,2.095,3.344,292.8,0.867,0.889,0.667,27.051,21.601,3.0,2.0,2017-01-05
1,pga,2017,2017,2017-01-08,SBS Tournament of Champions,16,"Gomez, Fabian",8571,20,1,Plantation Course at Kapalua,656,73,1,11:30am,70.0,1.832,0.357,-1.338,-0.507,-1.489,0.344,281.6,0.733,0.722,0.625,70.45,40.227,4.0,1.0,2017-01-05
2,pga,2017,2017,2017-01-08,SBS Tournament of Champions,16,"Knox, Russell",13831,T17,4,Plantation Course at Kapalua,656,73,1,9:35am,73.0,0.885,0.378,-1.844,-1.294,-2.76,-1.875,288.2,0.533,0.778,0.5,28.339,33.636,1.0,6.0,2017-01-08
4,pga,2017,2017,2017-01-08,SBS Tournament of Champions,16,"Knox, Russell",13831,T17,2,Plantation Course at Kapalua,656,73,1,11:10am,69.0,1.211,1.257,-1.412,0.037,-0.118,1.094,282.6,0.933,0.667,1.0,26.804,39.292,2.0,4.0,2017-01-06
5,pga,2017,2017,2017-01-08,SBS Tournament of Champions,16,"Knox, Russell",13831,T17,1,Plantation Course at Kapalua,656,73,1,12:30pm,71.0,-0.126,-0.373,1.473,-1.629,-0.53,-0.656,280.7,0.733,0.833,1.0,23.548,31.604,3.0,3.0,2017-01-05


In [46]:
train_df['month'] = train_df['round_completed'].dt.month
train_df['day'] = train_df['round_completed'].dt.day

In [47]:
train_df.tail()

Unnamed: 0,tour,year,season,event_completed,event_name,event_id,player_name,dg_id,fin_text,round_num,course_name,course_num,course_par,start_hole,teetime,round_score,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,driving_dist,driving_acc,gir,scrambling,prox_rgh,prox_fw,great_shots,poor_shots,round_completed,month,day
130402,pga,2024,2024,2024-03-24,Valspar Championship,475,"Tosti, Alejandro",17032,T75,4,Innisbrook Resort (Copperhead Course),665,71,1,8:07AM,77.0,-4.623,0.296,-0.968,-0.361,-1.032,-5.649,297.6,0.462,0.444,0.462,57.422,32.144,3.0,7.0,2024-03-24,3,24
130403,pga,2024,2024,2024-03-24,Valspar Championship,475,"Hoffman, Charley",5768,CUT,1,Innisbrook Resort (Copperhead Course),665,71,10,2:02PM,72.0,-1.867,0.61,1.317,-0.988,0.94,-0.928,285.0,0.231,0.5,0.727,42.518,23.829,3.0,5.0,2024-03-21,3,21
130404,pga,2024,2024,2024-03-24,Valspar Championship,475,"Hoffman, Charley",5768,CUT,2,Innisbrook Resort (Copperhead Course),665,71,1,8:52AM,71.0,-1.663,0.543,1.001,0.983,2.527,0.864,284.4,0.615,0.667,0.857,60.479,34.941,4.0,5.0,2024-03-22,3,22
130405,pga,2024,2024,2024-03-24,Valspar Championship,475,"Coody, Parker",27870,T67,4,Innisbrook Resort (Copperhead Course),665,71,1,8:16AM,72.0,0.073,-0.283,-0.552,0.106,-0.728,-0.649,277.9,0.462,0.444,0.667,34.259,45.341,2.0,1.0,2024-03-24,3,24
130406,pga,2024,2024,2024-03-24,Valspar Championship,475,"Pendrith, Taylor",17780,CUT,2,Innisbrook Resort (Copperhead Course),665,71,10,7:51AM,72.0,1.527,-0.595,-1.355,0.287,-1.663,-0.136,300.6,0.462,0.556,0.636,54.621,32.916,5.0,5.0,2024-03-22,3,22


In [48]:
train_df['fin_num'] = train_df['fin_text'].str.extract('(\d+)')
train_df.loc[train_df['fin_num'].isna(), 'fin_num'] = '0'
train_df['fin_num'] = train_df['fin_num'].astype(int)
train_df = train_df.drop(['fin_text'], axis=1)

In [49]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 98625 entries, 0 to 130406
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             98625 non-null  object        
 1   year             98625 non-null  int64         
 2   season           98625 non-null  int64         
 3   event_completed  98625 non-null  datetime64[ns]
 4   event_name       98625 non-null  object        
 5   event_id         98625 non-null  int64         
 6   player_name      98625 non-null  object        
 7   dg_id            98625 non-null  int64         
 8   round_num        98625 non-null  int64         
 9   course_name      98625 non-null  object        
 10  course_num       98625 non-null  int64         
 11  course_par       98625 non-null  int64         
 12  start_hole       98625 non-null  int64         
 13  teetime          98625 non-null  object        
 14  round_score      98625 non-null  float64  

In [50]:
#
train_df['teetime_cleaned'] = pd.to_datetime(train_df['teetime'], format='%I:%M%p').dt.time
train_df['teetime_numeric'] = train_df['teetime_cleaned'].apply(lambda x: x.hour + x.minute / 60)
train_df = train_df.drop(['teetime','teetime_cleaned'], axis=1)

In [51]:
train_df['ohe_win'] = np.where(train_df['fin_num'] == 1, 1, 0)
train_df['ohe_top_five'] = np.where(train_df['fin_num'] <= 5, 1, 0)
train_df['ohe_top_ten'] = np.where(train_df['fin_num'] <= 10, 1, 0)
train_df['ohe_top_twenty'] = np.where(train_df['fin_num'] <= 20, 1, 0)
train_df['ohe_make_cut'] = np.where(train_df['fin_num'] == 0, 1, 0)

In [52]:
train_df_sorted = train_df.sort_values(by=['year', 'dg_id','round_completed'], ascending=[True,True,True])

In [53]:
train_df_sorted['L20_moving_avg_sg_putt'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_putt'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_putt'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_putt'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_putt'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_sg_arg'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_arg'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_arg'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_arg'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_arg'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_sg_app'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_app'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_app'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_app'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_app'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_sg_ott'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_ott'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_ott'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_ott'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_ott'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_sg_t2g'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_t2g'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_t2g'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_t2g'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_t2g'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_sg_total'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_sg_total'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_sg_total'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_sg_total'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_sg_total'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_driving_dist'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_driving_dist'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_driving_dist'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_driving_dist'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_driving_dist'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_driving_acc'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_driving_acc'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_driving_acc'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_driving_acc'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_driving_acc'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_gir'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_gir'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_gir'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_gir'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_gir'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


train_df_sorted['L20_moving_avg_scrambling'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg_scrambling'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg_scrambling'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg_scrambling'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg_scrambling'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())

In [54]:
train_df_sorted['career_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).mean())
train_df_sorted['L20_moving_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
train_df_sorted['L16_moving_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
train_df_sorted['L12_moving_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
train_df_sorted['L8_moving_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
train_df_sorted['L4_moving_avg'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())

train_df_sorted['career_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).min())
train_df_sorted['L20_moving_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).min())
train_df_sorted['L16_moving_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).min())
train_df_sorted['L12_moving_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).min())
train_df_sorted['L8_moving_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).min())
train_df_sorted['L4_moving_min'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).min())

train_df_sorted['career_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).max())
train_df_sorted['L20_moving_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).max())
train_df_sorted['L16_moving_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).max())
train_df_sorted['L12_moving_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).max())
train_df_sorted['L8_moving_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).max())
train_df_sorted['L4_moving_max'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).max())

In [55]:
# train_df_sorted['career_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_round_score_std_dev'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16__sg_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_putt_std_dev'] = train_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_arg_std_dev'] = train_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_t2g_std_dev'] = train_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_sg_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_sg_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_sg_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_sg_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_sg_total_std_dev'] = train_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_driving_dist_std_dev'] = train_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_driving_acc_std_dev'] = train_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_gir_std_dev'] = train_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# train_df_sorted['career_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.expanding(min_periods=1).std())
train_df_sorted['L20_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
train_df_sorted['L16_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
train_df_sorted['L12_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
train_df_sorted['L8_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
train_df_sorted['L4_scrambling_std_dev'] = train_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

  train_df_sorted['L12_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
  train_df_sorted['L8_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
  train_df_sorted['L4_sg_app_std_dev'] = train_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).std())
  train_df_sorted['L20_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
  train_df_sorted['L16_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
  train_df_sorted['L12_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
  train_df_sorted['L8_sg_ott_std_dev'] = train_df_sorted.groupby('player_name')['sg

In [56]:
now = datetime.now()

# Calculate days since each round
train_df_sorted['Days_Since'] = (now - train_df_sorted['round_completed']).dt.days

train_df_sorted['Last_365_Days'] = np.where(train_df_sorted['Days_Since'] <= 365, 1, 0)
train_df_sorted['Last_180_Days'] = np.where(train_df_sorted['Days_Since'] <= 180, 1, 0)
train_df_sorted['Last_90_Days'] = np.where(train_df_sorted['Days_Since'] <= 90, 1, 0)
train_df_sorted['Last_60_Days'] = np.where(train_df_sorted['Days_Since'] <= 60, 1, 0)
train_df_sorted['Last_30_Days'] = np.where(train_df_sorted['Days_Since'] <= 30, 1, 0)
train_df_sorted['Last_10_Days'] = np.where(train_df_sorted['Days_Since'] <= 10, 1, 0)
train_df_sorted['Last_5_Days'] = np.where(train_df_sorted['Days_Since'] <= 5, 1, 0)

  train_df_sorted['Days_Since'] = (now - train_df_sorted['round_completed']).dt.days
  train_df_sorted['Last_365_Days'] = np.where(train_df_sorted['Days_Since'] <= 365, 1, 0)
  train_df_sorted['Last_180_Days'] = np.where(train_df_sorted['Days_Since'] <= 180, 1, 0)
  train_df_sorted['Last_90_Days'] = np.where(train_df_sorted['Days_Since'] <= 90, 1, 0)
  train_df_sorted['Last_60_Days'] = np.where(train_df_sorted['Days_Since'] <= 60, 1, 0)
  train_df_sorted['Last_30_Days'] = np.where(train_df_sorted['Days_Since'] <= 30, 1, 0)
  train_df_sorted['Last_10_Days'] = np.where(train_df_sorted['Days_Since'] <= 10, 1, 0)
  train_df_sorted['Last_5_Days'] = np.where(train_df_sorted['Days_Since'] <= 5, 1, 0)


In [57]:
# train_df_sorted['career_rounds_played'] = train_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).count())

In [58]:
train_df_sorted['lagged_year'] = train_df_sorted.groupby('dg_id')['year'].shift(1)
train_df_sorted['lagged_season'] = train_df_sorted.groupby('dg_id')['season'].shift(1)
train_df_sorted['lagged_event_id'] = train_df_sorted.groupby('dg_id')['event_id'].shift(1)
train_df_sorted['lagged_round_num'] = train_df_sorted.groupby('dg_id')['round_num'].shift(1)
train_df_sorted['lagged_course_num'] = train_df_sorted.groupby('dg_id')['course_num'].shift(1)
train_df_sorted['lagged_course_par'] = train_df_sorted.groupby('dg_id')['course_par'].shift(1)
train_df_sorted['lagged_start_hole'] = train_df_sorted.groupby('dg_id')['start_hole'].shift(1)
train_df_sorted['lagged_round_score'] = train_df_sorted.groupby('dg_id')['round_score'].shift(1)
train_df_sorted['lagged_sg_putt'] = train_df_sorted.groupby('dg_id')['sg_putt'].shift(1)
train_df_sorted['lagged_sg_arg'] = train_df_sorted.groupby('dg_id')['sg_arg'].shift(1)
train_df_sorted['lagged_sg_app'] = train_df_sorted.groupby('dg_id')['sg_app'].shift(1)
train_df_sorted['lagged_sg_ott'] = train_df_sorted.groupby('dg_id')['sg_ott'].shift(1)
train_df_sorted['lagged_sg_t2g'] = train_df_sorted.groupby('dg_id')['sg_t2g'].shift(1)
train_df_sorted['lagged_sg_total'] = train_df_sorted.groupby('dg_id')['sg_total'].shift(1)
train_df_sorted['lagged_driving_dist'] = train_df_sorted.groupby('dg_id')['driving_dist'].shift(1)
train_df_sorted['lagged_driving_acc'] = train_df_sorted.groupby('dg_id')['driving_acc'].shift(1)
train_df_sorted['lagged_gir'] = train_df_sorted.groupby('dg_id')['gir'].shift(1)
train_df_sorted['lagged_scrambling'] = train_df_sorted.groupby('dg_id')['scrambling'].shift(1)
train_df_sorted['lagged_prox_rgh'] = train_df_sorted.groupby('dg_id')['prox_rgh'].shift(1)
train_df_sorted['lagged_prox_fw'] = train_df_sorted.groupby('dg_id')['prox_fw'].shift(1)
train_df_sorted['lagged_great_shots'] = train_df_sorted.groupby('dg_id')['great_shots'].shift(1)
train_df_sorted['lagged_poor_shots'] = train_df_sorted.groupby('dg_id')['poor_shots'].shift(1)
train_df_sorted['lagged_month'] = train_df_sorted.groupby('dg_id')['month'].shift(1)
train_df_sorted['lagged_day'] = train_df_sorted.groupby('dg_id')['day'].shift(1)
train_df_sorted['lagged_fin_num'] = train_df_sorted.groupby('dg_id')['fin_num'].shift(1)
train_df_sorted['lagged_teetime_numeric'] = train_df_sorted.groupby('dg_id')['teetime_numeric'].shift(1)
train_df_sorted['lagged_ohe_win'] = train_df_sorted.groupby('dg_id')['ohe_win'].shift(1)
train_df_sorted['lagged_ohe_top_five'] = train_df_sorted.groupby('dg_id')['ohe_top_five'].shift(1)
train_df_sorted['lagged_ohe_top_ten'] = train_df_sorted.groupby('dg_id')['ohe_top_ten'].shift(1)
train_df_sorted['lagged_ohe_top_twenty'] = train_df_sorted.groupby('dg_id')['ohe_top_twenty'].shift(1)
train_df_sorted['lagged_ohe_make_cut'] = train_df_sorted.groupby('dg_id')['ohe_make_cut'].shift(1)

  train_df_sorted['lagged_year'] = train_df_sorted.groupby('dg_id')['year'].shift(1)
  train_df_sorted['lagged_season'] = train_df_sorted.groupby('dg_id')['season'].shift(1)
  train_df_sorted['lagged_event_id'] = train_df_sorted.groupby('dg_id')['event_id'].shift(1)
  train_df_sorted['lagged_round_num'] = train_df_sorted.groupby('dg_id')['round_num'].shift(1)
  train_df_sorted['lagged_course_num'] = train_df_sorted.groupby('dg_id')['course_num'].shift(1)
  train_df_sorted['lagged_course_par'] = train_df_sorted.groupby('dg_id')['course_par'].shift(1)
  train_df_sorted['lagged_start_hole'] = train_df_sorted.groupby('dg_id')['start_hole'].shift(1)
  train_df_sorted['lagged_round_score'] = train_df_sorted.groupby('dg_id')['round_score'].shift(1)
  train_df_sorted['lagged_sg_putt'] = train_df_sorted.groupby('dg_id')['sg_putt'].shift(1)
  train_df_sorted['lagged_sg_arg'] = train_df_sorted.groupby('dg_id')['sg_arg'].shift(1)
  train_df_sorted['lagged_sg_app'] = train_df_sorted.groupby('dg_id'

In [59]:
train_df_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 98625 entries, 1092 to 128293
Columns: 200 entries, tour to lagged_ohe_make_cut
dtypes: datetime64[ns](2), float64(170), int32(2), int64(22), object(4)
memory usage: 150.5+ MB


In [60]:
train_df_sorted.isna().sum()

tour                              0
year                              0
season                            0
event_completed                   0
event_name                        0
event_id                          0
player_name                       0
dg_id                             0
round_num                         0
course_name                       0
course_num                        0
course_par                        0
start_hole                        0
round_score                       0
sg_putt                           0
sg_arg                            0
sg_app                            0
sg_ott                            0
sg_t2g                            0
sg_total                          0
driving_dist                      0
driving_acc                       0
gir                               0
scrambling                        0
prox_rgh                          0
prox_fw                           0
great_shots                       0
poor_shots                  

In [61]:
train_df_sorted = train_df_sorted.dropna(axis=0)

In [62]:
train_df_sorted.isna().sum()

tour                           0
year                           0
season                         0
event_completed                0
event_name                     0
event_id                       0
player_name                    0
dg_id                          0
round_num                      0
course_name                    0
course_num                     0
course_par                     0
start_hole                     0
round_score                    0
sg_putt                        0
sg_arg                         0
sg_app                         0
sg_ott                         0
sg_t2g                         0
sg_total                       0
driving_dist                   0
driving_acc                    0
gir                            0
scrambling                     0
prox_rgh                       0
prox_fw                        0
great_shots                    0
poor_shots                     0
round_completed                0
month                          0
day       

In [63]:
train_df_sorted.loc[train_df_sorted['player_name'] == 'Malnati, Peter',]

Unnamed: 0,tour,year,season,event_completed,event_name,event_id,player_name,dg_id,round_num,course_name,course_num,course_par,start_hole,round_score,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,driving_dist,driving_acc,gir,scrambling,prox_rgh,prox_fw,great_shots,poor_shots,round_completed,month,day,fin_num,teetime_numeric,ohe_win,ohe_top_five,ohe_top_ten,ohe_top_twenty,ohe_make_cut,L20_moving_avg_sg_putt,L16_moving_avg_sg_putt,L12_moving_avg_sg_putt,L8_moving_avg_sg_putt,L4_moving_avg_sg_putt,L20_moving_avg_sg_arg,L16_moving_avg_sg_arg,L12_moving_avg_sg_arg,L8_moving_avg_sg_arg,L4_moving_avg_sg_arg,L20_moving_avg_sg_app,L16_moving_avg_sg_app,L12_moving_avg_sg_app,L8_moving_avg_sg_app,L4_moving_avg_sg_app,L20_moving_avg_sg_ott,L16_moving_avg_sg_ott,L12_moving_avg_sg_ott,L8_moving_avg_sg_ott,L4_moving_avg_sg_ott,L20_moving_avg_sg_t2g,L16_moving_avg_sg_t2g,L12_moving_avg_sg_t2g,L8_moving_avg_sg_t2g,L4_moving_avg_sg_t2g,L20_moving_avg_sg_total,L16_moving_avg_sg_total,L12_moving_avg_sg_total,L8_moving_avg_sg_total,L4_moving_avg_sg_total,L20_moving_avg_driving_dist,L16_moving_avg_driving_dist,L12_moving_avg_driving_dist,L8_moving_avg_driving_dist,L4_moving_avg_driving_dist,L20_moving_avg_driving_acc,L16_moving_avg_driving_acc,L12_moving_avg_driving_acc,L8_moving_avg_driving_acc,L4_moving_avg_driving_acc,L20_moving_avg_gir,L16_moving_avg_gir,L12_moving_avg_gir,L8_moving_avg_gir,L4_moving_avg_gir,L20_moving_avg_scrambling,L16_moving_avg_scrambling,L12_moving_avg_scrambling,L8_moving_avg_scrambling,L4_moving_avg_scrambling,career_avg,L20_moving_avg,L16_moving_avg,L12_moving_avg,L8_moving_avg,L4_moving_avg,career_min,L20_moving_min,L16_moving_min,L12_moving_min,L8_moving_min,L4_moving_min,career_max,L20_moving_max,L16_moving_max,L12_moving_max,L8_moving_max,L4_moving_max,L20_round_score_std_dev,L16_round_score_std_dev,L12_round_score_std_dev,L8_round_score_std_dev,L4_round_score_std_dev,L20_sg_putt_std_dev,L16__sg_putt_std_dev,L12_putt_std_dev,L8_putt_std_dev,L4_sg_putt_std_dev,L20_sg_arg_std_dev,L16_sg_arg_std_dev,L12_sg_arg_std_dev,L8_sg_arg_std_dev,L4_sg_arg_std_dev,L20_sg_app_std_dev,L16_app_std_dev,L12_app_std_dev,L8_sg_app_std_dev,L4_sg_app_std_dev,L20_sg_ott_std_dev,L16_sg_ott_std_dev,L12_sg_ott_std_dev,L8_sg_ott_std_dev,L4_sg_ott_std_dev,L20_sg_t2g_std_dev,L16_sg_t2g_std_dev,L12_sg_t2g_std_dev,L8_sg_t2g_std_dev,L4_sg_t2g_std_dev,L20_sg_total_std_dev,L16_sg_total_std_dev,L12_sg_total_std_dev,L8_total_std_dev,L4_sg_total_std_dev,L20_driving_dist_std_dev,L16_driving_dist_std_dev,L12_driving_dist_std_dev,L8_driving_dist_std_dev,L4_driving_dist_std_dev,L20_driving_acc_std_dev,L16_driving_acc_std_dev,L12_driving_acc_std_dev,L8_driving_acc_std_dev,L4_driving_acc_std_dev,L20_gir_std_dev,L16_gir_std_dev,L12_gir_std_dev,L8_gir_std_dev,L4_gir_std_dev,L20_scrambling_std_dev,L16_scrambling_std_dev,L12_scrambling_std_dev,L8_scrambling_std_dev,L4_scrambling_std_dev,Days_Since,Last_365_Days,Last_180_Days,Last_90_Days,Last_60_Days,Last_30_Days,Last_10_Days,Last_5_Days,lagged_year,lagged_season,lagged_event_id,lagged_round_num,lagged_course_num,lagged_course_par,lagged_start_hole,lagged_round_score,lagged_sg_putt,lagged_sg_arg,lagged_sg_app,lagged_sg_ott,lagged_sg_t2g,lagged_sg_total,lagged_driving_dist,lagged_driving_acc,lagged_gir,lagged_scrambling,lagged_prox_rgh,lagged_prox_fw,lagged_great_shots,lagged_poor_shots,lagged_month,lagged_day,lagged_fin_num,lagged_teetime_numeric,lagged_ohe_win,lagged_ohe_top_five,lagged_ohe_top_ten,lagged_ohe_top_twenty,lagged_ohe_make_cut
353,pga,2017,2017,2017-01-15,Sony Open in Hawaii,6,"Malnati, Peter",14926,2,Waialae CC,6,70,1,66.0,2.487,1.107,-1.333,0.106,-0.119,2.368,285.9,0.571,0.722,0.778,55.389,30.367,5.0,2.0,2017-01-13,1,13,27,7.5,0,0,0,0,0,2.259,2.259,2.259,2.259,2.259,0.416,0.416,0.416,0.416,0.416,-0.706,-0.706,-0.706,-0.706,-0.706,-1.1605,-1.1605,-1.1605,-1.1605,-1.1605,-1.4505,-1.4505,-1.4505,-1.4505,-1.4505,0.809,0.809,0.809,0.809,0.809,284.8,284.8,284.8,284.8,284.8,0.5,0.5,0.5,0.5,0.5,0.6665,0.6665,0.6665,0.6665,0.6665,0.764,0.764,0.764,0.764,0.764,67.5,67.5,67.5,67.5,67.5,67.5,66.0,66.0,66.0,66.0,66.0,66.0,69.0,69.0,69.0,69.0,69.0,69.0,2.12132,2.12132,2.12132,2.12132,2.12132,0.322441,0.322441,0.322441,0.322441,0.322441,0.977222,0.977222,0.977222,0.977222,0.977222,0.886712,0.886712,0.886712,0.886712,0.886712,1.791101,1.791101,1.791101,1.791101,1.791101,1.883025,1.883025,1.883025,1.883025,1.883025,2.204759,2.204759,2.204759,2.204759,2.204759,1.555635,1.555635,1.555635,1.555635,1.555635,0.100409,0.100409,0.100409,0.100409,0.100409,0.078489,0.078489,0.078489,0.078489,0.078489,0.019799,0.019799,0.019799,0.019799,0.019799,2632,0,0,0,0,0,0,0,2017.0,2017.0,6.0,1.0,6.0,70.0,10.0,69.0,2.031,-0.275,-0.079,-2.427,-2.782,-0.75,283.7,0.429,0.611,0.75,45.843,31.384,2.0,4.0,1.0,12.0,27.0,12.166667,0.0,0.0,0.0,0.0,0.0
339,pga,2017,2017,2017-01-15,Sony Open in Hawaii,6,"Malnati, Peter",14926,3,Waialae CC,6,70,10,69.0,0.397,0.102,-0.566,-0.628,-1.092,-0.695,285.3,0.357,0.5,0.6,52.898,30.282,1.0,3.0,2017-01-14,1,14,27,11.166667,0,0,0,0,0,1.638333,1.638333,1.638333,1.638333,1.638333,0.311333,0.311333,0.311333,0.311333,0.311333,-0.659333,-0.659333,-0.659333,-0.659333,-0.659333,-0.983,-0.983,-0.983,-0.983,-0.983,-1.331,-1.331,-1.331,-1.331,-1.331,0.307667,0.307667,0.307667,0.3076667,0.3076667,284.966667,284.966667,284.966667,284.966667,284.966667,0.452333,0.452333,0.452333,0.452333,0.452333,0.611,0.611,0.611,0.611,0.611,0.709333,0.709333,0.709333,0.709333,0.709333,68.0,68.0,68.0,68.0,68.0,68.0,66.0,66.0,66.0,66.0,66.0,66.0,69.0,69.0,69.0,69.0,69.0,69.0,1.732051,1.732051,1.732051,1.732051,1.732051,1.098938,1.098938,1.098938,1.098938,1.098938,0.714385,0.714385,0.714385,0.714385,0.714385,0.632189,0.632189,0.632189,0.632189,0.632189,1.303281,1.303281,1.303281,1.303281,1.303281,1.347491,1.347491,1.347491,1.347491,1.347491,1.784513,1.784513,1.784513,1.784513,1.784513,1.137248,1.137248,1.137248,1.137248,1.137248,0.108891,0.108891,0.108891,0.108891,0.108891,0.111,0.111,0.111,0.111,0.111,0.095715,0.095715,0.095715,0.095715,0.095715,2631,0,0,0,0,0,0,0,2017.0,2017.0,6.0,2.0,6.0,70.0,1.0,66.0,2.487,1.107,-1.333,0.106,-0.119,2.368,285.9,0.571,0.722,0.778,55.389,30.367,5.0,2.0,1.0,13.0,27.0,7.5,0.0,0.0,0.0,0.0,0.0
346,pga,2017,2017,2017-01-15,Sony Open in Hawaii,6,"Malnati, Peter",14926,4,Waialae CC,6,70,10,65.0,1.094,1.614,0.537,0.068,2.221,3.315,285.7,0.429,0.667,1.0,54.866,34.23,3.0,2.0,2017-01-15,1,15,27,11.5,0,0,0,0,0,1.50225,1.50225,1.50225,1.50225,1.50225,0.637,0.637,0.637,0.637,0.637,-0.36025,-0.36025,-0.36025,-0.36025,-0.36025,-0.72025,-0.72025,-0.72025,-0.72025,-0.72025,-0.443,-0.443,-0.443,-0.443,-0.443,1.0595,1.0595,1.0595,1.0595,1.0595,285.15,285.15,285.15,285.15,285.15,0.4465,0.4465,0.4465,0.4465,0.4465,0.625,0.625,0.625,0.625,0.625,0.782,0.782,0.782,0.782,0.782,67.25,67.25,67.25,67.25,67.25,67.25,65.0,65.0,65.0,65.0,65.0,65.0,69.0,69.0,69.0,69.0,69.0,69.0,2.061553,2.061553,2.061553,2.061553,2.061553,0.937649,0.937649,0.937649,0.937649,0.937649,0.874337,0.874337,0.874337,0.874337,0.874337,0.790092,0.790092,0.790092,0.790092,0.790092,1.186807,1.186807,1.186807,1.186807,1.186807,2.089178,2.089178,2.089178,2.089178,2.089178,2.093801,2.093801,2.093801,2.093801,2.093801,0.998332,0.998332,0.998332,0.998332,0.998332,0.089672,0.089672,0.089672,0.089672,0.089672,0.094858,0.094858,0.094858,0.094858,0.094858,0.165013,0.165013,0.165013,0.165013,0.165013,2630,0,0,0,0,0,0,0,2017.0,2017.0,6.0,3.0,6.0,70.0,10.0,69.0,0.397,0.102,-0.566,-0.628,-1.092,-0.695,285.3,0.357,0.5,0.6,52.898,30.282,1.0,3.0,1.0,14.0,27.0,11.166667,0.0,0.0,0.0,0.0,0.0
616,pga,2017,2017,2017-01-22,CareerBuilder Challenge,2,"Malnati, Peter",14926,3,Stadium Course,704,72,10,71.0,-0.394,0.468,0.825,0.695,1.99,1.596,264.8,0.714,0.722,1.0,46.852,27.541,2.0,4.0,2017-01-21,1,21,77,8.666667,0,0,0,0,0,1.123,1.123,1.123,1.123,0.896,0.6032,0.6032,0.6032,0.6032,0.82275,-0.1232,-0.1232,-0.1232,-0.1232,-0.13425,-0.4372,-0.4372,-0.4372,-0.4372,0.06025,0.0436,0.0436,0.0436,0.0436,0.75,1.1668,1.1668,1.1668,1.1668,1.646,281.08,281.08,281.08,281.08,280.425,0.5,0.5,0.5,0.5,0.51775,0.6444,0.6444,0.6444,0.6444,0.65275,0.8256,0.8256,0.8256,0.8256,0.8445,68.0,68.0,68.0,68.0,68.0,67.75,65.0,65.0,65.0,65.0,65.0,65.0,71.0,71.0,71.0,71.0,71.0,71.0,2.44949,2.44949,2.44949,2.44949,2.753785,1.174113,1.174113,1.174113,1.174113,1.222509,0.760961,0.760961,0.760961,0.760961,0.671367,0.865533,0.865533,0.865533,0.865533,0.999024,1.20705,1.20705,1.20705,1.20705,0.541217,2.111255,2.111255,2.111255,2.111255,1.617567,1.82909,1.82909,1.82909,1.82909,1.711688,9.141772,9.141772,9.141772,9.141772,10.419653,0.142625,0.142625,0.142625,0.142625,0.158184,0.092899,0.092899,0.092899,0.092899,0.105082,0.172994,0.172994,0.172994,0.172994,0.193703,2624,0,0,0,0,0,0,0,2017.0,2017.0,6.0,4.0,6.0,70.0,10.0,65.0,1.094,1.614,0.537,0.068,2.221,3.315,285.7,0.429,0.667,1.0,54.866,34.23,3.0,2.0,1.0,15.0,27.0,11.5,0.0,0.0,0.0,0.0,0.0
615,pga,2017,2017,2017-01-22,CareerBuilder Challenge,2,"Malnati, Peter",14926,4,Stadium Course,704,72,10,77.0,-2.902,-1.104,-2.323,0.629,-2.798,-5.701,266.2,0.714,0.5,0.778,91.984,26.478,4.0,9.0,2017-01-22,1,22,77,8.833333,0,0,0,0,0,0.452167,0.452167,0.452167,0.452167,-0.45125,0.318667,0.318667,0.318667,0.318667,0.27,-0.489833,-0.489833,-0.489833,-0.489833,-0.38175,-0.2595,-0.2595,-0.2595,-0.2595,0.191,-0.43,-0.43,-0.43,-0.43,0.08025,0.022167,0.022167,0.022167,0.02216667,-0.37125,278.6,278.6,278.6,278.6,275.5,0.535667,0.535667,0.535667,0.535667,0.5535,0.620333,0.620333,0.620333,0.620333,0.59725,0.817667,0.817667,0.817667,0.817667,0.8445,69.5,69.5,69.5,69.5,69.5,70.5,65.0,65.0,65.0,65.0,65.0,65.0,77.0,77.0,77.0,77.0,77.0,77.0,4.27785,4.27785,4.27785,4.27785,5.0,1.950112,1.950112,1.950112,1.950112,1.743252,0.974169,0.974169,0.974169,0.974169,1.119768,1.18568,1.18568,1.18568,1.18568,1.426271,1.164062,1.164062,1.164062,1.164062,0.614207,2.216235,2.216235,2.216235,2.216235,2.441888,3.246162,3.246162,3.246162,3.246162,3.914484,10.186265,10.186265,10.186265,10.186265,11.562295,0.154617,0.154617,0.154617,0.154617,0.187646,0.10188,0.10188,0.10188,0.10188,0.114517,0.155946,0.155946,0.155946,0.155946,0.193703,2623,0,0,0,0,0,0,0,2017.0,2017.0,2.0,3.0,704.0,72.0,10.0,71.0,-0.394,0.468,0.825,0.695,1.99,1.596,264.8,0.714,0.722,1.0,46.852,27.541,2.0,4.0,1.0,21.0,77.0,8.666667,0.0,0.0,0.0,0.0,0.0
1396,pga,2017,2017,2017-01-29,Farmers Insurance Open,4,"Malnati, Peter",14926,1,Torrey Pines GC (South),4,72,10,74.0,-0.138,-0.607,0.738,-0.377,-0.246,-0.385,270.2,0.643,0.778,0.333,30.077,24.727,3.0,5.0,2017-01-26,1,26,0,9.333333,0,1,1,1,1,0.367857,0.367857,0.367857,0.367857,-0.585,0.186429,0.186429,0.186429,0.186429,0.09275,-0.314429,-0.314429,-0.314429,-0.314429,-0.05575,-0.276286,-0.276286,-0.276286,-0.276286,0.25375,-0.403714,-0.403714,-0.403714,-0.403714,0.29175,-0.036,-0.036,-0.036,-0.036,-0.29375,277.4,277.4,277.4,277.4,271.725,0.551,0.551,0.551,0.551,0.625,0.642857,0.642857,0.642857,0.642857,0.66675,0.748429,0.748429,0.748429,0.748429,0.77775,70.142857,70.142857,70.142857,70.142857,70.142857,71.75,65.0,65.0,65.0,65.0,65.0,65.0,77.0,77.0,77.0,77.0,77.0,77.0,4.259443,4.259443,4.259443,4.259443,5.123475,1.794121,1.794121,1.794121,1.794121,1.675691,0.955639,0.955639,0.955639,0.955639,1.207873,1.177667,1.177667,1.177667,1.177667,1.516304,1.063566,1.063566,1.063566,1.063566,0.505919,2.024332,2.024332,2.024332,2.024332,2.341067,2.96732,2.96732,2.96732,2.96732,3.909003,9.825816,9.825816,9.825816,9.825816,9.593531,0.146859,0.146859,0.146859,0.146859,0.134885,0.110457,0.110457,0.110457,0.110457,0.120048,0.231998,0.231998,0.231998,0.231998,0.314427,2619,0,0,0,0,0,0,0,2017.0,2017.0,2.0,4.0,704.0,72.0,10.0,77.0,-2.902,-1.104,-2.323,0.629,-2.798,-5.701,266.2,0.714,0.5,0.778,91.984,26.478,4.0,9.0,1.0,22.0,77.0,8.833333,0.0,0.0,0.0,0.0,0.0
2320,pga,2017,2017,2017-02-12,AT&T Pebble Beach Pro-Am,5,"Malnati, Peter",14926,2,Pebble Beach GL,5,72,1,73.0,-0.735,0.842,0.066,-0.981,-0.072,-0.808,257.5,0.857,0.611,0.833,176.014,28.94,1.0,6.0,2017-02-10,2,10,0,11.6,0,1,1,1,1,0.23,0.23,0.23,0.23,-1.04225,0.268375,0.268375,0.268375,0.268375,-0.10025,-0.266875,-0.266875,-0.266875,-0.266875,-0.1735,-0.364375,-0.364375,-0.364375,-0.364375,-0.0085,-0.36225,-0.36225,-0.36225,-0.36225,-0.2815,-0.1325,-0.1325,-0.1325,-0.1325,-1.3245,274.9125,274.9125,274.9125,274.9125,264.675,0.58925,0.58925,0.58925,0.58925,0.732,0.638875,0.638875,0.638875,0.638875,0.65275,0.759,0.759,0.759,0.759,0.736,70.5,70.5,70.5,70.5,70.5,73.75,65.0,65.0,65.0,65.0,65.0,71.0,77.0,77.0,77.0,77.0,77.0,77.0,4.070802,4.070802,4.070802,4.070802,2.5,1.706186,1.706186,1.706186,1.706186,1.263721,0.914606,0.914606,0.914606,0.914606,0.908305,1.098573,1.098573,1.098573,1.098573,1.472588,1.015704,1.015704,1.015704,1.015704,0.812992,1.877833,1.877833,1.877833,1.877833,1.961091,2.76073,2.76073,2.76073,2.76073,3.100135,11.500241,11.500241,11.500241,11.500241,5.302437,0.173756,0.173756,0.173756,0.173756,0.089803,0.102882,0.102882,0.102882,0.102882,0.123233,0.21686,0.21686,0.21686,0.21686,0.284768,2604,0,0,0,0,0,0,0,2017.0,2017.0,4.0,1.0,4.0,72.0,10.0,74.0,-0.138,-0.607,0.738,-0.377,-0.246,-0.385,270.2,0.643,0.778,0.333,30.077,24.727,3.0,5.0,1.0,26.0,0.0,9.333333,0.0,1.0,1.0,1.0,1.0
2933,pga,2017,2017,2017-02-19,Genesis Open,7,"Malnati, Peter",14926,1,Riviera CC,500,71,1,71.0,-0.116,1.059,-1.193,0.243,0.11,-0.007,271.0,0.643,0.389,0.571,56.195,29.523,5.0,7.0,2017-02-16,2,16,64,9.133333,0,0,0,0,0,0.191556,0.191556,0.191556,-0.038375,-0.97275,0.356222,0.356222,0.356222,0.435125,0.0475,-0.369778,-0.369778,-0.369778,-0.406125,-0.678,-0.296889,-0.296889,-0.296889,-0.030625,-0.1215,-0.309778,-0.309778,-0.309778,-0.00075,-0.7515,-0.118556,-0.118556,-0.118556,-0.039625,-1.72525,274.477778,274.477778,274.477778,273.325,266.225,0.595222,0.595222,0.595222,0.616,0.71425,0.611111,0.611111,0.611111,0.611125,0.5695,0.738111,0.738111,0.738111,0.736625,0.62875,70.555556,70.555556,70.555556,70.555556,70.75,73.75,65.0,65.0,65.0,65.0,65.0,71.0,77.0,77.0,77.0,77.0,77.0,77.0,3.811532,3.811532,3.811532,4.026697,2.5,1.600152,1.600152,1.600152,1.54353,1.317745,0.895207,0.895207,0.895207,0.922954,1.06594,1.072989,1.072989,1.072989,1.141135,1.357675,0.971435,0.971435,0.971435,0.590997,0.707138,1.763591,1.763591,1.763591,1.603842,1.372054,2.582766,2.582766,2.582766,2.749464,2.670617,10.836256,10.836256,10.836256,10.978648,6.184052,0.163518,0.163518,0.163518,0.161609,0.100881,0.127276,0.127276,0.127276,0.136063,0.165937,0.212313,0.212313,0.212313,0.226922,0.227154,2598,0,0,0,0,0,0,0,2017.0,2017.0,5.0,2.0,5.0,72.0,1.0,73.0,-0.735,0.842,0.066,-0.981,-0.072,-0.808,257.5,0.857,0.611,0.833,176.014,28.94,1.0,6.0,2.0,10.0,0.0,11.6,0.0,1.0,1.0,1.0,1.0
2932,pga,2017,2017,2017-02-19,Genesis Open,7,"Malnati, Peter",14926,2,Riviera CC,500,71,10,70.0,0.451,1.357,0.387,-0.579,1.165,1.616,265.2,0.643,0.611,0.75,67.956,39.231,5.0,6.0,2017-02-17,2,17,64,10.033333,0,0,0,0,0,0.2175,0.2175,0.2175,-0.292875,-0.1345,0.4563,0.4563,0.4563,0.466375,0.66275,-0.2941,-0.2941,-0.2941,-0.191125,-0.0005,-0.3251,-0.3251,-0.3251,-0.11625,-0.4235,-0.1623,-0.1623,-0.1623,0.15975,0.23925,0.0549,0.0549,0.0549,-0.133625,0.104,273.55,273.55,273.55,270.7375,265.975,0.6,0.6,0.6,0.625,0.6965,0.6111,0.6111,0.6111,0.59725,0.59725,0.7393,0.7393,0.7393,0.733125,0.62175,70.5,70.5,70.5,70.5,71.25,72.0,65.0,65.0,65.0,65.0,65.0,70.0,77.0,77.0,77.0,77.0,77.0,74.0,3.597839,3.597839,3.597839,3.575712,1.825742,1.510867,1.510867,1.510867,1.196495,0.484343,0.901392,0.901392,0.901392,0.952706,0.872428,1.039545,1.039545,1.039545,1.102948,0.841034,0.920212,0.920212,0.920212,0.617407,0.51035,1.726895,1.726895,1.726895,1.653787,0.634051,2.496069,2.496069,2.496069,2.667011,1.059769,10.62944,10.62944,10.62944,9.985981,6.205575,0.154905,0.154905,0.154905,0.160747,0.107,0.119997,0.119997,0.119997,0.128597,0.1596,0.200206,0.200206,0.200206,0.226408,0.22138,2597,0,0,0,0,0,0,0,2017.0,2017.0,7.0,1.0,500.0,71.0,1.0,71.0,-0.116,1.059,-1.193,0.243,0.11,-0.007,271.0,0.643,0.389,0.571,56.195,29.523,5.0,7.0,2.0,16.0,64.0,9.133333,0.0,0.0,0.0,0.0,0.0
2931,pga,2017,2017,2017-02-19,Genesis Open,7,"Malnati, Peter",14926,3,Riviera CC,500,71,10,70.0,0.543,-1.038,-0.322,1.281,-0.079,0.465,274.1,0.929,0.611,0.778,64.856,31.567,1.0,3.0,2017-02-18,2,18,64,17.166667,0,0,0,0,0,0.247091,0.247091,0.247091,-0.274625,0.03575,0.320455,0.320455,0.320455,0.323875,0.555,-0.296636,-0.296636,-0.296636,-0.160625,-0.2655,-0.179091,-0.179091,-0.179091,0.122375,-0.009,-0.154727,-0.154727,-0.154727,0.286375,0.281,0.092182,0.092182,0.092182,0.011375,0.3165,273.6,273.6,273.6,269.3375,266.95,0.629909,0.629909,0.629909,0.6965,0.768,0.611091,0.611091,0.611091,0.611125,0.5555,0.742818,0.742818,0.742818,0.755375,0.733,70.454545,70.454545,70.454545,70.454545,71.375,71.0,65.0,65.0,65.0,65.0,65.0,70.0,77.0,77.0,77.0,77.0,77.0,73.0,3.416537,3.416537,3.416537,3.50255,1.414214,1.436691,1.436691,1.436691,1.209563,0.590711,0.966566,0.966566,0.966566,1.090312,1.08278,0.986235,0.986235,0.986235,1.094442,0.68291,0.998307,0.998307,0.998307,0.746729,0.999548,1.638469,1.638469,1.638469,1.581452,0.595792,2.371205,2.371205,2.371205,2.663661,1.01323,10.085336,10.085336,10.085336,8.294566,7.300457,0.177302,0.177302,0.177302,0.151456,0.1473,0.113839,0.113839,0.113839,0.122446,0.111,0.19029,0.19029,0.19029,0.220116,0.11337,2596,0,0,0,0,0,0,0,2017.0,2017.0,7.0,2.0,500.0,71.0,10.0,70.0,0.451,1.357,0.387,-0.579,1.165,1.616,265.2,0.643,0.611,0.75,67.956,39.231,5.0,6.0,2.0,17.0,64.0,10.033333,0.0,0.0,0.0,0.0,0.0


## Test Data Preprocessing

In [64]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4938 entries, 0 to 4937
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   tour             4938 non-null   object 
 1   year             4938 non-null   int64  
 2   season           4938 non-null   int64  
 3   event_completed  4938 non-null   object 
 4   event_name       4938 non-null   object 
 5   event_id         4938 non-null   int64  
 6   player_name      4938 non-null   object 
 7   dg_id            4938 non-null   int64  
 8   fin_text         4938 non-null   object 
 9   round_num        4938 non-null   int64  
 10  course_name      4938 non-null   object 
 11  course_num       4938 non-null   int64  
 12  course_par       4938 non-null   int64  
 13  start_hole       4938 non-null   int64  
 14  teetime          4938 non-null   object 
 15  round_score      4938 non-null   int64  
 16  sg_putt          3932 non-null   float64
 17  sg_arg        

In [65]:
# convert dates
test_df['event_completed'] = test_df['event_completed'].astype('datetime64[ns]')

In [66]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4938 entries, 0 to 4937
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             4938 non-null   object        
 1   year             4938 non-null   int64         
 2   season           4938 non-null   int64         
 3   event_completed  4938 non-null   datetime64[ns]
 4   event_name       4938 non-null   object        
 5   event_id         4938 non-null   int64         
 6   player_name      4938 non-null   object        
 7   dg_id            4938 non-null   int64         
 8   fin_text         4938 non-null   object        
 9   round_num        4938 non-null   int64         
 10  course_name      4938 non-null   object        
 11  course_num       4938 non-null   int64         
 12  course_par       4938 non-null   int64         
 13  start_hole       4938 non-null   int64         
 14  teetime          4938 non-null   object 

In [67]:
# Remove nulls
test_df = test_df.dropna(axis=0)

In [68]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3836 entries, 0 to 4937
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             3836 non-null   object        
 1   year             3836 non-null   int64         
 2   season           3836 non-null   int64         
 3   event_completed  3836 non-null   datetime64[ns]
 4   event_name       3836 non-null   object        
 5   event_id         3836 non-null   int64         
 6   player_name      3836 non-null   object        
 7   dg_id            3836 non-null   int64         
 8   fin_text         3836 non-null   object        
 9   round_num        3836 non-null   int64         
 10  course_name      3836 non-null   object        
 11  course_num       3836 non-null   int64         
 12  course_par       3836 non-null   int64         
 13  start_hole       3836 non-null   int64         
 14  teetime          3836 non-null   object      

In [69]:
def round_date(test_df):
    if test_df['round_num'] == 1:
        return test_df['event_completed'] - pd.Timedelta(days=3)
    elif test_df['round_num'] == 2:
        return test_df['event_completed'] - pd.Timedelta(days=2)
    elif test_df['round_num'] == 3:
        return test_df['event_completed'] - pd.Timedelta(days=1)
    else:
        return test_df['event_completed']


test_df['round_completed'] = test_df.apply(round_date,axis=1)

In [70]:
test_df['month'] = test_df['round_completed'].dt.month
test_df['day'] = test_df['round_completed'].dt.day

In [71]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3836 entries, 0 to 4937
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             3836 non-null   object        
 1   year             3836 non-null   int64         
 2   season           3836 non-null   int64         
 3   event_completed  3836 non-null   datetime64[ns]
 4   event_name       3836 non-null   object        
 5   event_id         3836 non-null   int64         
 6   player_name      3836 non-null   object        
 7   dg_id            3836 non-null   int64         
 8   fin_text         3836 non-null   object        
 9   round_num        3836 non-null   int64         
 10  course_name      3836 non-null   object        
 11  course_num       3836 non-null   int64         
 12  course_par       3836 non-null   int64         
 13  start_hole       3836 non-null   int64         
 14  teetime          3836 non-null   object      

In [72]:
test_df['fin_num'] = test_df['fin_text'].str.extract('(\d+)')
test_df.loc[test_df['fin_num'].isna(), 'fin_num'] = '0'
test_df['fin_num'] = test_df['fin_num'].astype(int)
test_df = test_df.drop(['fin_text'], axis=1)

In [73]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3836 entries, 0 to 4937
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   tour             3836 non-null   object        
 1   year             3836 non-null   int64         
 2   season           3836 non-null   int64         
 3   event_completed  3836 non-null   datetime64[ns]
 4   event_name       3836 non-null   object        
 5   event_id         3836 non-null   int64         
 6   player_name      3836 non-null   object        
 7   dg_id            3836 non-null   int64         
 8   round_num        3836 non-null   int64         
 9   course_name      3836 non-null   object        
 10  course_num       3836 non-null   int64         
 11  course_par       3836 non-null   int64         
 12  start_hole       3836 non-null   int64         
 13  teetime          3836 non-null   object        
 14  round_score      3836 non-null   int64       

In [74]:
#
test_df['teetime_cleaned'] = pd.to_datetime(test_df['teetime'], format='%I:%M%p').dt.time
test_df['teetime_numeric'] = test_df['teetime_cleaned'].apply(lambda x: x.hour + x.minute / 60)
test_df = test_df.drop(['teetime','teetime_cleaned'], axis=1)

In [75]:
test_df['ohe_win'] = np.where(test_df['fin_num'] == 1, 1, 0)
test_df['ohe_top_five'] = np.where(test_df['fin_num'] <= 5, 1, 0)
test_df['ohe_top_ten'] = np.where(test_df['fin_num'] <= 10, 1, 0)
test_df['ohe_top_twenty'] = np.where(test_df['fin_num'] <= 20, 1, 0)
test_df['ohe_make_cut'] = np.where(test_df['fin_num'] == 0, 1, 0)

In [76]:
test_df_sorted = test_df.sort_values(by=['year', 'dg_id','round_completed'], ascending=[True,True,True])

In [77]:
test_df_sorted['L20_moving_avg_sg_putt'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_putt'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_putt'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_putt'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_putt'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_sg_arg'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_arg'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_arg'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_arg'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_arg'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_sg_app'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_app'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_app'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_app'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_app'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_sg_ott'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_ott'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_ott'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_ott'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_ott'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_sg_t2g'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_t2g'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_t2g'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_t2g'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_t2g'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_sg_total'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_sg_total'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_sg_total'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_sg_total'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_sg_total'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_driving_dist'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_driving_dist'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_driving_dist'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_driving_dist'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_driving_dist'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_driving_acc'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_driving_acc'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_driving_acc'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_driving_acc'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_driving_acc'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_gir'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_gir'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_gir'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_gir'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_gir'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())


test_df_sorted['L20_moving_avg_scrambling'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg_scrambling'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg_scrambling'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg_scrambling'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg_scrambling'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())

In [78]:
test_df_sorted['career_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).mean())
test_df_sorted['L20_moving_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
test_df_sorted['L16_moving_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).mean())
test_df_sorted['L12_moving_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())
test_df_sorted['L8_moving_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).mean())
test_df_sorted['L4_moving_avg'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())

test_df_sorted['career_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).min())
test_df_sorted['L20_moving_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).min())
test_df_sorted['L16_moving_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).min())
test_df_sorted['L12_moving_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).min())
test_df_sorted['L8_moving_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).min())
test_df_sorted['L4_moving_min'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).min())

test_df_sorted['career_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).max())
test_df_sorted['L20_moving_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).max())
test_df_sorted['L16_moving_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).max())
test_df_sorted['L12_moving_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).max())
test_df_sorted['L8_moving_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).max())
test_df_sorted['L4_moving_max'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).max())

In [79]:
# test_df_sorted['career_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_round_score_std_dev'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16__sg_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_putt_std_dev'] = test_df_sorted.groupby('player_name')['sg_putt'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_arg_std_dev'] = test_df_sorted.groupby('player_name')['sg_arg'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_t2g_std_dev'] = test_df_sorted.groupby('player_name')['sg_t2g'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_sg_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_sg_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_sg_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_sg_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_sg_total_std_dev'] = test_df_sorted.groupby('player_name')['sg_total'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_driving_dist_std_dev'] = test_df_sorted.groupby('player_name')['driving_dist'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_driving_acc_std_dev'] = test_df_sorted.groupby('player_name')['driving_acc'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_gir_std_dev'] = test_df_sorted.groupby('player_name')['gir'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

# test_df_sorted['career_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.expanding(min_periods=1).std())
test_df_sorted['L20_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
test_df_sorted['L16_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
test_df_sorted['L12_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
test_df_sorted['L8_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
test_df_sorted['L4_scrambling_std_dev'] = test_df_sorted.groupby('player_name')['scrambling'].transform(lambda x: x.rolling(window=4, min_periods=1).std())

  test_df_sorted['L12_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
  test_df_sorted['L8_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=8, min_periods=1).std())
  test_df_sorted['L4_sg_app_std_dev'] = test_df_sorted.groupby('player_name')['sg_app'].transform(lambda x: x.rolling(window=4, min_periods=1).std())
  test_df_sorted['L20_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=20, min_periods=1).std())
  test_df_sorted['L16_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=16, min_periods=1).std())
  test_df_sorted['L12_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transform(lambda x: x.rolling(window=12, min_periods=1).std())
  test_df_sorted['L8_sg_ott_std_dev'] = test_df_sorted.groupby('player_name')['sg_ott'].transfo

In [80]:
# test_df_sorted['career_rounds_played'] = test_df_sorted.groupby('player_name')['round_score'].transform(lambda x: x.expanding(min_periods=1).count())

In [81]:
now = datetime.now()

# Calculate days since each round
test_df_sorted['Days_Since'] = (now - test_df_sorted['round_completed']).dt.days

test_df_sorted['Last_365_Days'] = np.where(test_df_sorted['Days_Since'] <= 365, 1, 0)
test_df_sorted['Last_180_Days'] = np.where(test_df_sorted['Days_Since'] <= 180, 1, 0)
test_df_sorted['Last_90_Days'] = np.where(test_df_sorted['Days_Since'] <= 90, 1, 0)
test_df_sorted['Last_60_Days'] = np.where(test_df_sorted['Days_Since'] <= 60, 1, 0)
test_df_sorted['Last_30_Days'] = np.where(test_df_sorted['Days_Since'] <= 30, 1, 0)
test_df_sorted['Last_10_Days'] = np.where(test_df_sorted['Days_Since'] <= 10, 1, 0)
test_df_sorted['Last_5_Days'] = np.where(test_df_sorted['Days_Since'] <= 5, 1, 0)

  test_df_sorted['Days_Since'] = (now - test_df_sorted['round_completed']).dt.days
  test_df_sorted['Last_365_Days'] = np.where(test_df_sorted['Days_Since'] <= 365, 1, 0)
  test_df_sorted['Last_180_Days'] = np.where(test_df_sorted['Days_Since'] <= 180, 1, 0)
  test_df_sorted['Last_90_Days'] = np.where(test_df_sorted['Days_Since'] <= 90, 1, 0)
  test_df_sorted['Last_60_Days'] = np.where(test_df_sorted['Days_Since'] <= 60, 1, 0)
  test_df_sorted['Last_30_Days'] = np.where(test_df_sorted['Days_Since'] <= 30, 1, 0)
  test_df_sorted['Last_10_Days'] = np.where(test_df_sorted['Days_Since'] <= 10, 1, 0)
  test_df_sorted['Last_5_Days'] = np.where(test_df_sorted['Days_Since'] <= 5, 1, 0)


In [82]:
test_df_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3836 entries, 2240 to 2824
Columns: 169 entries, tour to Last_5_Days
dtypes: datetime64[ns](2), float64(138), int32(2), int64(23), object(4)
memory usage: 4.9+ MB


In [83]:
test_df_sorted['lagged_year'] = test_df_sorted.groupby('dg_id')['year'].shift(1)
test_df_sorted['lagged_season'] = test_df_sorted.groupby('dg_id')['season'].shift(1)
test_df_sorted['lagged_event_id'] = test_df_sorted.groupby('dg_id')['event_id'].shift(1)
test_df_sorted['lagged_round_num'] = test_df_sorted.groupby('dg_id')['round_num'].shift(1)
test_df_sorted['lagged_course_num'] = test_df_sorted.groupby('dg_id')['course_num'].shift(1)
test_df_sorted['lagged_course_par'] = test_df_sorted.groupby('dg_id')['course_par'].shift(1)
test_df_sorted['lagged_start_hole'] = test_df_sorted.groupby('dg_id')['start_hole'].shift(1)
test_df_sorted['lagged_round_score'] = test_df_sorted.groupby('dg_id')['round_score'].shift(1)
test_df_sorted['lagged_sg_putt'] = test_df_sorted.groupby('dg_id')['sg_putt'].shift(1)
test_df_sorted['lagged_sg_arg'] = test_df_sorted.groupby('dg_id')['sg_arg'].shift(1)
test_df_sorted['lagged_sg_app'] = test_df_sorted.groupby('dg_id')['sg_app'].shift(1)
test_df_sorted['lagged_sg_ott'] = test_df_sorted.groupby('dg_id')['sg_ott'].shift(1)
test_df_sorted['lagged_sg_t2g'] = test_df_sorted.groupby('dg_id')['sg_t2g'].shift(1)
test_df_sorted['lagged_sg_total'] = test_df_sorted.groupby('dg_id')['sg_total'].shift(1)
test_df_sorted['lagged_driving_dist'] = test_df_sorted.groupby('dg_id')['driving_dist'].shift(1)
test_df_sorted['lagged_driving_acc'] = test_df_sorted.groupby('dg_id')['driving_acc'].shift(1)
test_df_sorted['lagged_gir'] = test_df_sorted.groupby('dg_id')['gir'].shift(1)
test_df_sorted['lagged_scrambling'] = test_df_sorted.groupby('dg_id')['scrambling'].shift(1)
test_df_sorted['lagged_prox_rgh'] = test_df_sorted.groupby('dg_id')['prox_rgh'].shift(1)
test_df_sorted['lagged_prox_fw'] = test_df_sorted.groupby('dg_id')['prox_fw'].shift(1)
test_df_sorted['lagged_great_shots'] = test_df_sorted.groupby('dg_id')['great_shots'].shift(1)
test_df_sorted['lagged_poor_shots'] = test_df_sorted.groupby('dg_id')['poor_shots'].shift(1)
test_df_sorted['lagged_month'] = test_df_sorted.groupby('dg_id')['month'].shift(1)
test_df_sorted['lagged_day'] = test_df_sorted.groupby('dg_id')['day'].shift(1)
test_df_sorted['lagged_fin_num'] = test_df_sorted.groupby('dg_id')['fin_num'].shift(1)
test_df_sorted['lagged_teetime_numeric'] = test_df_sorted.groupby('dg_id')['teetime_numeric'].shift(1)
test_df_sorted['lagged_ohe_win'] = test_df_sorted.groupby('dg_id')['ohe_win'].shift(1)
test_df_sorted['lagged_ohe_top_five'] = test_df_sorted.groupby('dg_id')['ohe_top_five'].shift(1)
test_df_sorted['lagged_ohe_top_ten'] = test_df_sorted.groupby('dg_id')['ohe_top_ten'].shift(1)
test_df_sorted['lagged_ohe_top_twenty'] = test_df_sorted.groupby('dg_id')['ohe_top_twenty'].shift(1)
test_df_sorted['lagged_ohe_make_cut'] = test_df_sorted.groupby('dg_id')['ohe_make_cut'].shift(1)

  test_df_sorted['lagged_year'] = test_df_sorted.groupby('dg_id')['year'].shift(1)
  test_df_sorted['lagged_season'] = test_df_sorted.groupby('dg_id')['season'].shift(1)
  test_df_sorted['lagged_event_id'] = test_df_sorted.groupby('dg_id')['event_id'].shift(1)
  test_df_sorted['lagged_round_num'] = test_df_sorted.groupby('dg_id')['round_num'].shift(1)
  test_df_sorted['lagged_course_num'] = test_df_sorted.groupby('dg_id')['course_num'].shift(1)
  test_df_sorted['lagged_course_par'] = test_df_sorted.groupby('dg_id')['course_par'].shift(1)
  test_df_sorted['lagged_start_hole'] = test_df_sorted.groupby('dg_id')['start_hole'].shift(1)
  test_df_sorted['lagged_round_score'] = test_df_sorted.groupby('dg_id')['round_score'].shift(1)
  test_df_sorted['lagged_sg_putt'] = test_df_sorted.groupby('dg_id')['sg_putt'].shift(1)
  test_df_sorted['lagged_sg_arg'] = test_df_sorted.groupby('dg_id')['sg_arg'].shift(1)
  test_df_sorted['lagged_sg_app'] = test_df_sorted.groupby('dg_id')['sg_app'].shift(1)
 

In [84]:
test_df_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3836 entries, 2240 to 2824
Columns: 200 entries, tour to lagged_ohe_make_cut
dtypes: datetime64[ns](2), float64(169), int32(2), int64(23), object(4)
memory usage: 5.9+ MB


In [85]:
test_df_sorted.isna().sum()

tour                             0
year                             0
season                           0
event_completed                  0
event_name                       0
event_id                         0
player_name                      0
dg_id                            0
round_num                        0
course_name                      0
course_num                       0
course_par                       0
start_hole                       0
round_score                      0
sg_putt                          0
sg_arg                           0
sg_app                           0
sg_ott                           0
sg_t2g                           0
sg_total                         0
driving_dist                     0
driving_acc                      0
gir                              0
scrambling                       0
prox_rgh                         0
prox_fw                          0
great_shots                      0
poor_shots                       0
round_completed     

In [86]:
test_df_sorted = test_df_sorted.dropna(axis=0)

In [87]:
test_df_sorted.isna().sum()

tour                           0
year                           0
season                         0
event_completed                0
event_name                     0
event_id                       0
player_name                    0
dg_id                          0
round_num                      0
course_name                    0
course_num                     0
course_par                     0
start_hole                     0
round_score                    0
sg_putt                        0
sg_arg                         0
sg_app                         0
sg_ott                         0
sg_t2g                         0
sg_total                       0
driving_dist                   0
driving_acc                    0
gir                            0
scrambling                     0
prox_rgh                       0
prox_fw                        0
great_shots                    0
poor_shots                     0
round_completed                0
month                          0
day       

In [88]:
test_df_sorted.loc[test_df_sorted['player_name'] == 'Scheffler, Scottie',]

Unnamed: 0,tour,year,season,event_completed,event_name,event_id,player_name,dg_id,round_num,course_name,course_num,course_par,start_hole,round_score,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,driving_dist,driving_acc,gir,scrambling,prox_rgh,prox_fw,great_shots,poor_shots,round_completed,month,day,fin_num,teetime_numeric,ohe_win,ohe_top_five,ohe_top_ten,ohe_top_twenty,ohe_make_cut,L20_moving_avg_sg_putt,L16_moving_avg_sg_putt,L12_moving_avg_sg_putt,L8_moving_avg_sg_putt,L4_moving_avg_sg_putt,L20_moving_avg_sg_arg,L16_moving_avg_sg_arg,L12_moving_avg_sg_arg,L8_moving_avg_sg_arg,L4_moving_avg_sg_arg,L20_moving_avg_sg_app,L16_moving_avg_sg_app,L12_moving_avg_sg_app,L8_moving_avg_sg_app,L4_moving_avg_sg_app,L20_moving_avg_sg_ott,L16_moving_avg_sg_ott,L12_moving_avg_sg_ott,L8_moving_avg_sg_ott,L4_moving_avg_sg_ott,L20_moving_avg_sg_t2g,L16_moving_avg_sg_t2g,L12_moving_avg_sg_t2g,L8_moving_avg_sg_t2g,L4_moving_avg_sg_t2g,L20_moving_avg_sg_total,L16_moving_avg_sg_total,L12_moving_avg_sg_total,L8_moving_avg_sg_total,L4_moving_avg_sg_total,L20_moving_avg_driving_dist,L16_moving_avg_driving_dist,L12_moving_avg_driving_dist,L8_moving_avg_driving_dist,L4_moving_avg_driving_dist,L20_moving_avg_driving_acc,L16_moving_avg_driving_acc,L12_moving_avg_driving_acc,L8_moving_avg_driving_acc,L4_moving_avg_driving_acc,L20_moving_avg_gir,L16_moving_avg_gir,L12_moving_avg_gir,L8_moving_avg_gir,L4_moving_avg_gir,L20_moving_avg_scrambling,L16_moving_avg_scrambling,L12_moving_avg_scrambling,L8_moving_avg_scrambling,L4_moving_avg_scrambling,career_avg,L20_moving_avg,L16_moving_avg,L12_moving_avg,L8_moving_avg,L4_moving_avg,career_min,L20_moving_min,L16_moving_min,L12_moving_min,L8_moving_min,L4_moving_min,career_max,L20_moving_max,L16_moving_max,L12_moving_max,L8_moving_max,L4_moving_max,L20_round_score_std_dev,L16_round_score_std_dev,L12_round_score_std_dev,L8_round_score_std_dev,L4_round_score_std_dev,L20_sg_putt_std_dev,L16__sg_putt_std_dev,L12_putt_std_dev,L8_putt_std_dev,L4_sg_putt_std_dev,L20_sg_arg_std_dev,L16_sg_arg_std_dev,L12_sg_arg_std_dev,L8_sg_arg_std_dev,L4_sg_arg_std_dev,L20_sg_app_std_dev,L16_app_std_dev,L12_app_std_dev,L8_sg_app_std_dev,L4_sg_app_std_dev,L20_sg_ott_std_dev,L16_sg_ott_std_dev,L12_sg_ott_std_dev,L8_sg_ott_std_dev,L4_sg_ott_std_dev,L20_sg_t2g_std_dev,L16_sg_t2g_std_dev,L12_sg_t2g_std_dev,L8_sg_t2g_std_dev,L4_sg_t2g_std_dev,L20_sg_total_std_dev,L16_sg_total_std_dev,L12_sg_total_std_dev,L8_total_std_dev,L4_sg_total_std_dev,L20_driving_dist_std_dev,L16_driving_dist_std_dev,L12_driving_dist_std_dev,L8_driving_dist_std_dev,L4_driving_dist_std_dev,L20_driving_acc_std_dev,L16_driving_acc_std_dev,L12_driving_acc_std_dev,L8_driving_acc_std_dev,L4_driving_acc_std_dev,L20_gir_std_dev,L16_gir_std_dev,L12_gir_std_dev,L8_gir_std_dev,L4_gir_std_dev,L20_scrambling_std_dev,L16_scrambling_std_dev,L12_scrambling_std_dev,L8_scrambling_std_dev,L4_scrambling_std_dev,Days_Since,Last_365_Days,Last_180_Days,Last_90_Days,Last_60_Days,Last_30_Days,Last_10_Days,Last_5_Days,lagged_year,lagged_season,lagged_event_id,lagged_round_num,lagged_course_num,lagged_course_par,lagged_start_hole,lagged_round_score,lagged_sg_putt,lagged_sg_arg,lagged_sg_app,lagged_sg_ott,lagged_sg_t2g,lagged_sg_total,lagged_driving_dist,lagged_driving_acc,lagged_gir,lagged_scrambling,lagged_prox_rgh,lagged_prox_fw,lagged_great_shots,lagged_poor_shots,lagged_month,lagged_day,lagged_fin_num,lagged_teetime_numeric,lagged_ohe_win,lagged_ohe_top_five,lagged_ohe_top_ten,lagged_ohe_top_twenty,lagged_ohe_make_cut
61,pga,2024,2024,2024-01-07,The Sentry,16,"Scheffler, Scottie",18417,2,Plantation Course at Kapalua,656,73,1,64,0.937,1.591,0.45,0.497,2.537,3.475,325.3,0.533,0.889,0.833,48.887,19.153,3.0,1.0,2024-01-05,1,5,5,9.55,0,1,1,1,0,0.6245,0.6245,0.6245,0.6245,0.6245,0.8675,0.8675,0.8675,0.8675,0.8675,1.0775,1.0775,1.0775,1.0775,1.0775,0.583,0.583,0.583,0.583,0.583,2.5275,2.5275,2.5275,2.5275,2.5275,3.153,3.153,3.153,3.153,3.153,324.0,324.0,324.0,324.0,324.0,0.5,0.5,0.5,0.5,0.5,0.9165,0.9165,0.9165,0.9165,0.9165,0.6665,0.6665,0.6665,0.6665,0.6665,65.0,65.0,65.0,65.0,65.0,65.0,64.0,64.0,64.0,64.0,64.0,64.0,66.0,66.0,66.0,66.0,66.0,66.0,1.414214,1.414214,1.414214,1.414214,1.414214,0.441942,0.441942,0.441942,0.441942,0.441942,1.023184,1.023184,1.023184,1.023184,1.023184,0.887419,0.887419,0.887419,0.887419,0.887419,0.121622,0.121622,0.121622,0.121622,0.121622,0.013435,0.013435,0.013435,0.013435,0.013435,0.455377,0.455377,0.455377,0.455377,0.455377,1.838478,1.838478,1.838478,1.838478,1.838478,0.046669,0.046669,0.046669,0.046669,0.046669,0.038891,0.038891,0.038891,0.038891,0.038891,0.235467,0.235467,0.235467,0.235467,0.235467,84,1,1,1,0,0,0,0,2024.0,2024.0,16.0,1.0,656.0,73.0,1.0,66.0,0.312,0.144,1.705,0.669,2.518,2.831,322.7,0.467,0.944,0.5,33.187,30.55,4.0,2.0,1.0,4.0,5.0,11.65,0.0,1.0,1.0,1.0,0.0
60,pga,2024,2024,2024-01-07,The Sentry,16,"Scheffler, Scottie",18417,3,Plantation Course at Kapalua,656,73,1,71,-2.133,-0.247,0.596,1.275,1.624,-0.508,322.3,0.4,0.889,0.5,70.139,26.375,3.0,4.0,2024-01-06,1,6,5,9.75,0,1,1,1,0,-0.294667,-0.294667,-0.294667,-0.294667,-0.294667,0.496,0.496,0.496,0.496,0.496,0.917,0.917,0.917,0.917,0.917,0.813667,0.813667,0.813667,0.813667,0.813667,2.226333,2.226333,2.226333,2.226333,2.226333,1.932667,1.932667,1.932667,1.932667,1.932667,323.433333,323.433333,323.433333,323.433333,323.433333,0.466667,0.466667,0.466667,0.466667,0.466667,0.907333,0.907333,0.907333,0.907333,0.907333,0.611,0.611,0.611,0.611,0.611,67.0,67.0,67.0,67.0,67.0,67.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,71.0,3.605551,3.605551,3.605551,3.605551,3.605551,1.622424,1.622424,1.622424,1.622424,1.622424,0.96824,0.96824,0.96824,0.96824,0.96824,0.686321,0.686321,0.686321,0.686321,0.686321,0.408678,0.408678,0.408678,0.408678,0.408678,0.521722,0.521722,0.521722,0.521722,0.521722,2.138066,2.138066,2.138066,2.138066,2.138066,1.628906,1.628906,1.628906,1.628906,1.628906,0.066501,0.066501,0.066501,0.066501,0.066501,0.031754,0.031754,0.031754,0.031754,0.031754,0.192258,0.192258,0.192258,0.192258,0.192258,83,1,1,1,0,0,0,0,2024.0,2024.0,16.0,2.0,656.0,73.0,1.0,64.0,0.937,1.591,0.45,0.497,2.537,3.475,325.3,0.533,0.889,0.833,48.887,19.153,3.0,1.0,1.0,5.0,5.0,9.55,0.0,1.0,1.0,1.0,0.0
72,pga,2024,2024,2024-01-07,The Sentry,16,"Scheffler, Scottie",18417,4,Plantation Course at Kapalua,656,73,1,66,-1.669,-0.534,1.898,0.965,2.33,0.661,321.5,0.733,0.944,0.75,13.492,15.771,5.0,3.0,2024-01-07,1,7,5,9.35,0,1,1,1,0,-0.63825,-0.63825,-0.63825,-0.63825,-0.63825,0.2385,0.2385,0.2385,0.2385,0.2385,1.16225,1.16225,1.16225,1.16225,1.16225,0.8515,0.8515,0.8515,0.8515,0.8515,2.25225,2.25225,2.25225,2.25225,2.25225,1.61475,1.61475,1.61475,1.61475,1.61475,322.95,322.95,322.95,322.95,322.95,0.53325,0.53325,0.53325,0.53325,0.53325,0.9165,0.9165,0.9165,0.9165,0.9165,0.64575,0.64575,0.64575,0.64575,0.64575,66.75,66.75,66.75,66.75,66.75,66.75,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,71.0,2.986079,2.986079,2.986079,2.986079,2.986079,1.492326,1.492326,1.492326,1.492326,1.492326,0.943513,0.943513,0.943513,0.943513,0.943513,0.744725,0.744725,0.744725,0.744725,0.744725,0.342155,0.342155,0.342155,0.342155,0.342155,0.429127,0.429127,0.429127,0.429127,0.429127,1.857911,1.857911,1.857911,1.857911,1.857911,1.644182,1.644182,1.644182,1.644182,1.644182,0.143811,0.143811,0.143811,0.143811,0.143811,0.031754,0.031754,0.031754,0.031754,0.031754,0.171675,0.171675,0.171675,0.171675,0.171675,82,1,1,1,0,0,0,0,2024.0,2024.0,16.0,3.0,656.0,73.0,1.0,71.0,-2.133,-0.247,0.596,1.275,1.624,-0.508,322.3,0.4,0.889,0.5,70.139,26.375,3.0,4.0,1.0,6.0,5.0,9.75,0.0,1.0,1.0,1.0,0.0
812,pga,2024,2024,2024-01-21,The American Express,2,"Scheffler, Scottie",18417,3,Pete Dye Stadium Course,704,72,1,69,-1.345,1.315,0.141,-0.784,0.672,-0.673,289.5,0.429,0.667,0.889,41.407,25.7,0.0,3.0,2024-01-20,1,20,17,10.516667,0,0,0,1,0,-0.7796,-0.7796,-0.7796,-0.7796,-1.0525,0.4538,0.4538,0.4538,0.4538,0.53125,0.958,0.958,0.958,0.958,0.77125,0.5244,0.5244,0.5244,0.5244,0.48825,1.9362,1.9362,1.9362,1.9362,1.79075,1.1572,1.1572,1.1572,1.1572,0.73875,316.26,316.26,316.26,316.26,314.65,0.5124,0.5124,0.5124,0.5124,0.52375,0.8666,0.8666,0.8666,0.8666,0.84725,0.6944,0.6944,0.6944,0.6944,0.743,67.2,67.2,67.2,67.2,67.2,67.5,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,71.0,2.774887,2.774887,2.774887,2.774887,3.109126,1.33048,1.33048,1.33048,1.33048,1.365189,0.948385,0.948385,0.948385,0.948385,1.076687,0.790286,0.790286,0.790286,0.790286,0.774746,0.789161,0.789161,0.789161,0.789161,0.906451,0.798467,0.798467,0.798467,0.798467,0.842039,1.906734,1.906734,1.906734,1.906734,1.918379,15.026909,15.026909,15.026909,15.026909,16.846266,0.132984,0.132984,0.132984,0.132984,0.150735,0.114919,0.114919,0.114919,0.114919,0.122932,0.184224,0.184224,0.184224,0.184224,0.171769,69,1,1,1,0,0,0,0,2024.0,2024.0,16.0,4.0,656.0,73.0,1.0,66.0,-1.669,-0.534,1.898,0.965,2.33,0.661,321.5,0.733,0.944,0.75,13.492,15.771,5.0,3.0,1.0,7.0,5.0,9.35,0.0,1.0,1.0,1.0,0.0
760,pga,2024,2024,2024-01-21,The American Express,2,"Scheffler, Scottie",18417,4,Pete Dye Stadium Course,704,72,10,65,0.125,-0.248,3.192,1.215,4.159,4.284,298.0,0.786,1.0,0.5,34.909,22.99,2.0,2.0,2024-01-21,1,21,17,10.033333,0,0,0,1,0,-0.628833,-0.628833,-0.628833,-0.628833,-1.2555,0.336833,0.336833,0.336833,0.336833,0.0715,1.330333,1.330333,1.330333,1.330333,1.45675,0.6395,0.6395,0.6395,0.6395,0.66775,2.306667,2.306667,2.306667,2.306667,2.19625,1.678333,1.678333,1.678333,1.678333,0.941,313.216667,313.216667,313.216667,313.216667,307.825,0.558,0.558,0.558,0.558,0.587,0.888833,0.888833,0.888833,0.888833,0.875,0.662,0.662,0.662,0.662,0.65975,66.833333,66.833333,66.833333,66.833333,66.833333,67.75,64.0,64.0,64.0,64.0,64.0,65.0,71.0,71.0,71.0,71.0,71.0,71.0,2.639444,2.639444,2.639444,2.639444,2.753785,1.246004,1.246004,1.246004,1.246004,0.975496,0.89534,0.89534,0.89534,0.89534,0.83993,1.153878,1.153878,1.153878,1.153878,1.375728,0.760071,0.760071,0.760071,0.760071,0.9771,1.154779,1.154779,1.154779,1.154779,1.474346,2.130255,2.130255,2.130255,2.130255,2.306415,15.369374,15.369374,15.369374,15.369374,16.621948,0.163169,0.163169,0.163169,0.163169,0.200707,0.116323,0.116323,0.116323,0.116323,0.145884,0.182891,0.182891,0.182891,0.182891,0.192995,68,1,1,1,0,0,0,0,2024.0,2024.0,2.0,3.0,704.0,72.0,1.0,69.0,-1.345,1.315,0.141,-0.784,0.672,-0.673,289.5,0.429,0.667,0.889,41.407,25.7,0.0,3.0,1.0,20.0,17.0,10.516667,0.0,0.0,0.0,1.0,0.0
1750,pga,2024,2024,2024-02-04,AT&T Pebble Beach Pro-Am,5,"Scheffler, Scottie",18417,2,Pebble Beach Golf Links,5,72,1,64,2.601,0.287,2.136,0.351,2.774,5.375,284.0,0.571,0.944,0.8,44.078,25.793,4.0,1.0,2024-02-02,2,2,6,10.15,0,0,1,1,0,-0.167429,-0.167429,-0.167429,-0.167429,-0.072,0.329714,0.329714,0.329714,0.329714,0.205,1.445429,1.445429,1.445429,1.445429,1.84175,0.598286,0.598286,0.598286,0.598286,0.43675,2.373429,2.373429,2.373429,2.373429,2.48375,2.206429,2.206429,2.206429,2.206429,2.41175,309.042857,309.042857,309.042857,309.042857,298.25,0.559857,0.559857,0.559857,0.559857,0.62975,0.896714,0.896714,0.896714,0.896714,0.88875,0.681714,0.681714,0.681714,0.681714,0.73475,66.428571,66.428571,66.428571,66.428571,66.428571,66.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,69.0,2.636737,2.636737,2.636737,2.636737,2.160247,1.668542,1.668542,1.668542,1.668542,1.94548,0.817547,0.817547,0.817547,0.817547,0.814484,1.096475,1.096475,1.096475,1.096475,1.265632,0.702363,0.702363,0.702363,0.702363,0.891124,1.06886,1.06886,1.06886,1.06886,1.437218,2.394546,2.394546,2.394546,2.394546,2.879061,17.854771,17.854771,17.854771,17.854771,16.535316,0.149033,0.149033,0.149033,0.149033,0.162097,0.108215,0.108215,0.108215,0.108215,0.150172,0.174914,0.174914,0.174914,0.174914,0.166724,56,1,1,1,1,0,0,0,2024.0,2024.0,2.0,4.0,704.0,72.0,10.0,65.0,0.125,-0.248,3.192,1.215,4.159,4.284,298.0,0.786,1.0,0.5,34.909,22.99,2.0,2.0,1.0,21.0,17.0,10.033333,0.0,0.0,0.0,1.0,0.0
1762,pga,2024,2024,2024-02-04,AT&T Pebble Beach Pro-Am,5,"Scheffler, Scottie",18417,3,Pebble Beach Golf Links,5,72,1,70,-1.764,-0.497,2.274,-0.938,0.839,-0.925,284.6,0.571,0.833,0.4,15.914,23.958,4.0,4.0,2024-02-03,2,3,6,10.583333,0,0,1,1,0,-0.367,-0.367,-0.367,-0.367,-0.09575,0.226375,0.226375,0.226375,0.226375,0.21425,1.549,1.549,1.549,1.549,1.93575,0.40625,0.40625,0.40625,0.40625,-0.039,2.181625,2.181625,2.181625,2.181625,2.111,1.815,1.815,1.815,1.815,2.01525,305.9875,305.9875,305.9875,305.9875,289.025,0.56125,0.56125,0.56125,0.56125,0.58925,0.88875,0.88875,0.88875,0.88875,0.861,0.6465,0.6465,0.6465,0.6465,0.64725,66.875,66.875,66.875,66.875,66.875,67.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,70.0,2.748376,2.748376,2.748376,2.748376,2.94392,1.644671,1.644671,1.644671,1.644671,1.971875,0.811377,0.811377,0.811377,0.811377,0.803429,1.056561,1.056561,1.056561,1.056561,1.285017,0.847267,0.847267,0.847267,0.847267,1.014535,1.128522,1.128522,1.128522,1.128522,1.665591,2.477995,2.477995,2.477995,2.477995,3.281611,18.652954,18.652954,18.652954,18.652954,6.470639,0.138034,0.138034,0.138034,0.138034,0.14726,0.102689,0.102689,0.102689,0.102689,0.146776,0.190117,0.190117,0.190117,0.190117,0.23423,55,1,1,1,1,0,0,0,2024.0,2024.0,5.0,2.0,5.0,72.0,1.0,64.0,2.601,0.287,2.136,0.351,2.774,5.375,284.0,0.571,0.944,0.8,44.078,25.793,4.0,1.0,2.0,2.0,6.0,10.15,0.0,0.0,1.0,1.0,0.0
2285,pga,2024,2024,2024-02-11,WM Phoenix Open,3,"Scheffler, Scottie",18417,1,TPC Scottsdale (Stadium Course),510,71,1,68,0.612,-0.574,1.243,1.614,2.282,2.894,301.2,0.643,0.944,0.333,39.947,48.171,6.0,1.0,2024-02-08,2,8,3,16.1,0,1,1,1,0,-0.258222,-0.258222,-0.258222,-0.3295,0.3935,0.137444,0.137444,0.137444,0.136625,-0.258,1.515,1.515,1.515,1.49125,2.21125,0.540444,0.540444,0.540444,0.524375,0.5605,2.192778,2.192778,2.192778,2.152125,2.5135,1.934889,1.934889,1.934889,1.822875,2.907,305.455556,305.455556,305.455556,303.3,291.95,0.570333,0.570333,0.570333,0.58325,0.64275,0.894889,0.894889,0.894889,0.88875,0.93025,0.611667,0.611667,0.611667,0.625625,0.50825,67.0,67.0,67.0,67.0,67.125,66.75,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,70.0,2.598076,2.598076,2.598076,2.748376,2.753785,1.572679,1.572679,1.572679,1.665651,1.793293,0.804499,0.804499,0.804499,0.860041,0.38906,0.993572,0.993572,0.993572,1.059439,0.797701,0.888933,0.888933,0.888933,0.948911,1.129546,1.056166,1.056166,1.056166,1.121536,1.370289,2.34569,2.34569,2.34569,2.481782,2.749033,17.521067,17.521067,17.521067,17.408372,8.932898,0.131963,0.131963,0.131963,0.134855,0.101352,0.097806,0.097806,0.097806,0.102689,0.070002,0.206269,0.206269,0.206269,0.215919,0.20625,50,1,1,1,1,0,0,0,2024.0,2024.0,5.0,3.0,5.0,72.0,1.0,70.0,-1.764,-0.497,2.274,-0.938,0.839,-0.925,284.6,0.571,0.833,0.4,15.914,23.958,4.0,4.0,2.0,3.0,6.0,10.583333,0.0,0.0,1.0,1.0,0.0
2284,pga,2024,2024,2024-02-11,WM Phoenix Open,3,"Scheffler, Scottie",18417,2,TPC Scottsdale (Stadium Course),510,71,10,66,0.635,-0.273,2.053,0.868,2.647,3.282,304.4,0.571,0.722,1.0,19.133,21.508,3.0,2.0,2024-02-09,2,9,3,12.966667,0,1,1,1,0,-0.1689,-0.1689,-0.1689,-0.36725,0.521,0.0964,0.0964,0.0964,-0.096375,-0.26425,1.5688,1.5688,1.5688,1.691625,1.9265,0.5732,0.5732,0.5732,0.57075,0.47375,2.2382,2.2382,2.2382,2.165875,2.1355,2.0696,2.0696,2.0696,1.79875,2.6565,305.35,305.35,305.35,300.6875,293.55,0.5704,0.5704,0.5704,0.588,0.589,0.8776,0.8776,0.8776,0.867875,0.86075,0.6505,0.6505,0.6505,0.6465,0.63325,66.9,66.9,66.9,66.9,67.375,67.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,71.0,71.0,71.0,71.0,70.0,2.469818,2.469818,2.469818,2.503569,2.581989,1.5094,1.5094,1.5094,1.636005,1.785955,0.769514,0.769514,0.769514,0.632,0.389046,0.952073,0.952073,0.952073,0.983219,0.464694,0.844471,0.844471,0.844471,0.956418,1.074509,1.006069,1.006069,1.006069,1.127586,0.889136,2.252192,2.252192,2.252192,2.464304,2.624566,16.522392,16.522392,16.522392,15.042648,10.763364,0.124416,0.124416,0.124416,0.133495,0.036,0.107202,0.107202,0.107202,0.118403,0.106274,0.229999,0.229999,0.229999,0.244952,0.319826,49,1,1,1,1,0,0,0,2024.0,2024.0,3.0,1.0,510.0,71.0,1.0,68.0,0.612,-0.574,1.243,1.614,2.282,2.894,301.2,0.643,0.944,0.333,39.947,48.171,6.0,1.0,2.0,8.0,3.0,16.1,0.0,1.0,1.0,1.0,0.0
2283,pga,2024,2024,2024-02-11,WM Phoenix Open,3,"Scheffler, Scottie",18417,3,TPC Scottsdale (Stadium Course),510,71,1,66,0.949,0.422,1.563,0.943,2.928,3.877,299.4,0.571,0.833,0.714,60.692,22.41,2.0,4.0,2024-02-10,2,10,3,16.0,0,1,1,1,0,-0.067273,-0.067273,-0.067273,0.018,0.108,0.126,0.126,0.126,-0.01275,-0.2305,1.568273,1.568273,1.568273,1.8125,1.78325,0.606818,0.606818,0.606818,0.52925,0.62175,2.300909,2.300909,2.300909,2.328875,2.174,2.233909,2.233909,2.233909,2.346875,2.282,304.809091,304.809091,304.809091,297.825,297.4,0.570455,0.570455,0.570455,0.609375,0.589,0.873545,0.873545,0.873545,0.860875,0.833,0.656273,0.656273,0.656273,0.67325,0.61175,66.818182,66.818182,66.818182,66.818182,66.75,67.5,64.0,64.0,64.0,64.0,64.0,66.0,71.0,71.0,71.0,71.0,70.0,70.0,2.358736,2.358736,2.358736,2.052873,1.914854,1.471078,1.471078,1.471078,1.519534,1.257433,0.736596,0.736596,0.736596,0.65313,0.453349,0.903217,0.903217,0.903217,0.883685,0.466898,0.808857,0.808857,0.808857,0.928284,1.092584,0.976839,0.976839,0.976839,1.132305,0.928464,2.205018,2.205018,2.205018,2.363534,2.175884,15.776847,15.776847,15.776847,12.264787,8.780281,0.118032,0.118032,0.118032,0.110864,0.036,0.102586,0.102586,0.102586,0.118631,0.090631,0.219035,0.219035,0.219035,0.238262,0.307535,48,1,1,1,1,0,0,0,2024.0,2024.0,3.0,2.0,510.0,71.0,10.0,66.0,0.635,-0.273,2.053,0.868,2.647,3.282,304.4,0.571,0.722,1.0,19.133,21.508,3.0,2.0,2.0,9.0,3.0,12.966667,0.0,1.0,1.0,1.0,0.0


***

In [89]:
train_df_sorted.info()
test_df_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 97157 entries, 165 to 128293
Columns: 200 entries, tour to lagged_ohe_make_cut
dtypes: datetime64[ns](2), float64(170), int32(2), int64(22), object(4)
memory usage: 148.2+ MB
<class 'pandas.core.frame.DataFrame'>
Index: 3571 entries, 2241 to 2824
Columns: 200 entries, tour to lagged_ohe_make_cut
dtypes: datetime64[ns](2), float64(169), int32(2), int64(23), object(4)
memory usage: 5.4+ MB


In [90]:
current_df = test_df_sorted.copy()

***

***

# Linear Regression

In [91]:
train_df_sorted = train_df_sorted.drop(['tour','event_name','course_name','player_name','round_completed','event_completed','year'
                                        ,'season','event_completed','event_id','dg_id','round_num','course_num','course_par'
                                        ,'start_hole','sg_putt','sg_arg','sg_app','sg_ott','sg_t2g','sg_total','driving_dist'
                                        ,'driving_acc','gir','scrambling','prox_rgh','prox_fw','great_shots','poor_shots'
                                        ,'round_completed','month','day','fin_num','teetime_numeric','ohe_win','ohe_top_five'
                                        ,'ohe_top_ten','ohe_top_twenty','ohe_make_cut'], axis=1)

test_df_sorted = test_df_sorted.drop(['tour','event_name','course_name','player_name','round_completed','event_completed','year'
                                        ,'season','event_completed','event_id','dg_id','round_num','course_num','course_par'
                                        ,'start_hole','sg_putt','sg_arg','sg_app','sg_ott','sg_t2g','sg_total','driving_dist'
                                        ,'driving_acc','gir','scrambling','prox_rgh','prox_fw','great_shots','poor_shots'
                                        ,'round_completed','month','day','fin_num','teetime_numeric','ohe_win','ohe_top_five'
                                        ,'ohe_top_ten','ohe_top_twenty','ohe_make_cut'], axis=1)

In [92]:
train_df_sorted.head()

Unnamed: 0,round_score,L20_moving_avg_sg_putt,L16_moving_avg_sg_putt,L12_moving_avg_sg_putt,L8_moving_avg_sg_putt,L4_moving_avg_sg_putt,L20_moving_avg_sg_arg,L16_moving_avg_sg_arg,L12_moving_avg_sg_arg,L8_moving_avg_sg_arg,L4_moving_avg_sg_arg,L20_moving_avg_sg_app,L16_moving_avg_sg_app,L12_moving_avg_sg_app,L8_moving_avg_sg_app,L4_moving_avg_sg_app,L20_moving_avg_sg_ott,L16_moving_avg_sg_ott,L12_moving_avg_sg_ott,L8_moving_avg_sg_ott,L4_moving_avg_sg_ott,L20_moving_avg_sg_t2g,L16_moving_avg_sg_t2g,L12_moving_avg_sg_t2g,L8_moving_avg_sg_t2g,L4_moving_avg_sg_t2g,L20_moving_avg_sg_total,L16_moving_avg_sg_total,L12_moving_avg_sg_total,L8_moving_avg_sg_total,L4_moving_avg_sg_total,L20_moving_avg_driving_dist,L16_moving_avg_driving_dist,L12_moving_avg_driving_dist,L8_moving_avg_driving_dist,L4_moving_avg_driving_dist,L20_moving_avg_driving_acc,L16_moving_avg_driving_acc,L12_moving_avg_driving_acc,L8_moving_avg_driving_acc,L4_moving_avg_driving_acc,L20_moving_avg_gir,L16_moving_avg_gir,L12_moving_avg_gir,L8_moving_avg_gir,L4_moving_avg_gir,L20_moving_avg_scrambling,L16_moving_avg_scrambling,L12_moving_avg_scrambling,L8_moving_avg_scrambling,L4_moving_avg_scrambling,career_avg,L20_moving_avg,L16_moving_avg,L12_moving_avg,L8_moving_avg,L4_moving_avg,career_min,L20_moving_min,L16_moving_min,L12_moving_min,L8_moving_min,L4_moving_min,career_max,L20_moving_max,L16_moving_max,L12_moving_max,L8_moving_max,L4_moving_max,L20_round_score_std_dev,L16_round_score_std_dev,L12_round_score_std_dev,L8_round_score_std_dev,L4_round_score_std_dev,L20_sg_putt_std_dev,L16__sg_putt_std_dev,L12_putt_std_dev,L8_putt_std_dev,L4_sg_putt_std_dev,L20_sg_arg_std_dev,L16_sg_arg_std_dev,L12_sg_arg_std_dev,L8_sg_arg_std_dev,L4_sg_arg_std_dev,L20_sg_app_std_dev,L16_app_std_dev,L12_app_std_dev,L8_sg_app_std_dev,L4_sg_app_std_dev,L20_sg_ott_std_dev,L16_sg_ott_std_dev,L12_sg_ott_std_dev,L8_sg_ott_std_dev,L4_sg_ott_std_dev,L20_sg_t2g_std_dev,L16_sg_t2g_std_dev,L12_sg_t2g_std_dev,L8_sg_t2g_std_dev,L4_sg_t2g_std_dev,L20_sg_total_std_dev,L16_sg_total_std_dev,L12_sg_total_std_dev,L8_total_std_dev,L4_sg_total_std_dev,L20_driving_dist_std_dev,L16_driving_dist_std_dev,L12_driving_dist_std_dev,L8_driving_dist_std_dev,L4_driving_dist_std_dev,L20_driving_acc_std_dev,L16_driving_acc_std_dev,L12_driving_acc_std_dev,L8_driving_acc_std_dev,L4_driving_acc_std_dev,L20_gir_std_dev,L16_gir_std_dev,L12_gir_std_dev,L8_gir_std_dev,L4_gir_std_dev,L20_scrambling_std_dev,L16_scrambling_std_dev,L12_scrambling_std_dev,L8_scrambling_std_dev,L4_scrambling_std_dev,Days_Since,Last_365_Days,Last_180_Days,Last_90_Days,Last_60_Days,Last_30_Days,Last_10_Days,Last_5_Days,lagged_year,lagged_season,lagged_event_id,lagged_round_num,lagged_course_num,lagged_course_par,lagged_start_hole,lagged_round_score,lagged_sg_putt,lagged_sg_arg,lagged_sg_app,lagged_sg_ott,lagged_sg_t2g,lagged_sg_total,lagged_driving_dist,lagged_driving_acc,lagged_gir,lagged_scrambling,lagged_prox_rgh,lagged_prox_fw,lagged_great_shots,lagged_poor_shots,lagged_month,lagged_day,lagged_fin_num,lagged_teetime_numeric,lagged_ohe_win,lagged_ohe_top_five,lagged_ohe_top_ten,lagged_ohe_top_twenty,lagged_ohe_make_cut
165,70.0,-1.004,-1.004,-1.004,-1.004,-1.004,-0.247,-0.247,-0.247,-0.247,-0.247,2.057,2.057,2.057,2.057,2.057,-0.469,-0.469,-0.469,-0.469,-0.469,1.341,1.341,1.341,1.341,1.341,0.3365,0.3365,0.3365,0.3365,0.3365,271.7,271.7,271.7,271.7,271.7,0.75,0.75,0.75,0.75,0.75,0.861,0.861,0.861,0.861,0.861,0.875,0.875,0.875,0.875,0.875,68.0,68.0,68.0,68.0,68.0,68.0,66.0,66.0,66.0,66.0,66.0,66.0,70.0,70.0,70.0,70.0,70.0,70.0,2.828427,2.828427,2.828427,2.828427,2.828427,2.777515,2.777515,2.777515,2.777515,2.777515,0.178191,0.178191,0.178191,0.178191,0.178191,0.033941,0.033941,0.033941,0.033941,0.033941,0.11738,0.11738,0.11738,0.11738,0.11738,0.094752,0.094752,0.094752,0.094752,0.094752,2.872975,2.872975,2.872975,2.872975,2.872975,4.384062,4.384062,4.384062,4.384062,4.384062,0.050912,0.050912,0.050912,0.050912,0.050912,0.039598,0.039598,0.039598,0.039598,0.039598,0.176777,0.176777,0.176777,0.176777,0.176777,2631,0,0,0,0,0,0,0,2017.0,2017.0,6.0,2.0,6.0,70.0,1.0,66.0,0.96,-0.121,2.081,-0.552,1.408,2.368,268.6,0.714,0.889,1.0,33.856,23.69,2.0,0.0,1.0,13.0,74.0,8.333333,0.0,0.0,0.0,0.0,0.0
11941,68.0,-0.8235,-0.8235,-0.8235,-0.8235,-0.8235,-0.21,-0.21,-0.21,-0.21,-0.21,0.2075,0.2075,0.2075,0.2075,0.2075,0.024,0.024,0.024,0.024,0.024,0.0205,0.0205,0.0205,0.0205,0.0205,-0.803,-0.803,-0.803,-0.803,-0.803,268.8,268.8,268.8,268.8,268.8,0.857,0.857,0.857,0.857,0.857,0.8055,0.8055,0.8055,0.8055,0.8055,0.6335,0.6335,0.6335,0.6335,0.6335,70.5,70.5,70.5,70.5,70.5,70.5,68.0,68.0,68.0,68.0,68.0,68.0,73.0,73.0,73.0,73.0,73.0,73.0,3.535534,3.535534,3.535534,3.535534,3.535534,1.144806,1.144806,1.144806,1.144806,1.144806,0.178191,0.178191,0.178191,0.178191,0.178191,0.511238,0.511238,0.511238,0.511238,0.511238,0.702864,0.702864,0.702864,0.702864,0.702864,1.393707,1.393707,1.393707,1.393707,1.393707,2.538513,2.538513,2.538513,2.538513,2.538513,1.838478,1.838478,1.838478,1.838478,1.838478,0.0,0.0,0.0,0.0,0.0,0.038891,0.038891,0.038891,0.038891,0.038891,0.047376,0.047376,0.047376,0.047376,0.047376,2443,0,0,0,0,0,0,0,2017.0,2017.0,518.0,1.0,819.0,71.0,10.0,73.0,-1.633,-0.336,-0.154,-0.473,-0.965,-2.598,270.1,0.857,0.778,0.667,165.655,24.031,3.0,5.0,7.0,20.0,0.0,12.516667,0.0,1.0,1.0,1.0,1.0
7151,78.0,1.986,1.986,1.986,1.986,1.986,-1.498,-1.498,-1.498,-1.498,-1.498,-1.7535,-1.7535,-1.7535,-1.7535,-1.7535,-0.233,-0.233,-0.233,-0.233,-0.233,-3.4855,-3.4855,-3.4855,-3.4855,-3.4855,-1.4995,-1.4995,-1.4995,-1.4995,-1.4995,270.35,270.35,270.35,270.35,270.35,0.6425,0.6425,0.6425,0.6425,0.6425,0.5,0.5,0.5,0.5,0.5,0.656,0.656,0.656,0.656,0.656,74.5,74.5,74.5,74.5,74.5,74.5,71.0,71.0,71.0,71.0,71.0,71.0,78.0,78.0,78.0,78.0,78.0,78.0,4.949747,4.949747,4.949747,4.949747,4.949747,1.073388,1.073388,1.073388,1.073388,1.073388,1.756453,1.756453,1.756453,1.756453,1.756453,1.300369,1.300369,1.300369,1.300369,1.300369,0.562857,0.562857,0.562857,0.562857,0.562857,3.61968,3.61968,3.61968,3.61968,3.61968,4.693068,4.693068,4.693068,4.693068,4.693068,8.555992,8.555992,8.555992,8.555992,8.555992,0.101116,0.101116,0.101116,0.101116,0.101116,0.236174,0.236174,0.236174,0.236174,0.236174,0.132936,0.132936,0.132936,0.132936,0.132936,2513,0,0,0,0,0,0,0,2017.0,2017.0,11.0,1.0,11.0,72.0,10.0,71.0,2.745,-0.256,-0.834,0.165,-0.926,1.819,264.3,0.714,0.667,0.75,81.185,29.971,5.0,3.0,5.0,11.0,0.0,7.35,0.0,1.0,1.0,1.0,1.0
180,65.0,-0.812,-0.812,-0.812,-0.812,-0.812,1.2665,1.2665,1.2665,1.2665,1.2665,0.176,0.176,0.176,0.176,0.176,-0.322,-0.322,-0.322,-0.322,-0.322,1.121,1.121,1.121,1.121,1.121,0.309,0.309,0.309,0.309,0.309,287.6,287.6,287.6,287.6,287.6,0.5,0.5,0.5,0.5,0.5,0.639,0.639,0.639,0.639,0.639,0.75,0.75,0.75,0.75,0.75,68.0,68.0,68.0,68.0,68.0,68.0,65.0,65.0,65.0,65.0,65.0,65.0,71.0,71.0,71.0,71.0,71.0,71.0,4.242641,4.242641,4.242641,4.242641,4.242641,3.712311,3.712311,3.712311,3.712311,3.712311,1.2537,1.2537,1.2537,1.2537,1.2537,1.797465,1.797465,1.797465,1.797465,1.797465,0.069296,0.069296,0.069296,0.069296,0.069296,0.613769,0.613769,0.613769,0.613769,0.613769,4.326079,4.326079,4.326079,4.326079,4.326079,1.979899,1.979899,1.979899,1.979899,1.979899,0.100409,0.100409,0.100409,0.100409,0.100409,0.039598,0.039598,0.039598,0.039598,0.039598,0.353553,0.353553,0.353553,0.353553,0.353553,2632,0,0,0,0,0,0,0,2017.0,2017.0,6.0,1.0,6.0,70.0,1.0,71.0,-3.437,2.153,-1.095,-0.371,0.687,-2.75,286.2,0.429,0.611,0.5,59.736,22.715,2.0,4.0,1.0,12.0,73.0,12.0,0.0,0.0,0.0,0.0,0.0
179,70.0,-0.627,-0.627,-0.627,-0.627,-0.627,0.596667,0.596667,0.596667,0.596667,0.596667,0.512667,0.512667,0.512667,0.512667,0.512667,-0.841333,-0.841333,-0.841333,-0.841333,-0.841333,0.268333,0.268333,0.268333,0.268333,0.268333,-0.359,-0.359,-0.359,-0.359,-0.359,281.566667,281.566667,281.566667,281.566667,281.566667,0.452333,0.452333,0.452333,0.452333,0.452333,0.648333,0.648333,0.648333,0.648333,0.648333,0.722333,0.722333,0.722333,0.722333,0.722333,68.666667,68.666667,68.666667,68.666667,68.666667,68.666667,65.0,65.0,65.0,65.0,65.0,65.0,71.0,71.0,71.0,71.0,71.0,71.0,3.21455,3.21455,3.21455,3.21455,3.21455,2.644485,2.644485,2.644485,2.644485,2.644485,1.460107,1.460107,1.460107,1.460107,1.460107,1.398383,1.398383,1.398383,1.398383,1.398383,0.900845,0.900845,0.900845,0.900845,0.900845,1.539311,1.539311,1.539311,1.539311,1.539311,3.270497,3.270497,3.270497,3.270497,3.270497,10.543402,10.543402,10.543402,10.543402,10.543402,0.108891,0.108891,0.108891,0.108891,0.108891,0.032332,0.032332,0.032332,0.032332,0.032332,0.254551,0.254551,0.254551,0.254551,0.254551,2631,0,0,0,0,0,0,0,2017.0,2017.0,6.0,2.0,6.0,70.0,10.0,65.0,1.813,0.38,1.447,-0.273,1.555,3.368,289.0,0.571,0.667,1.0,33.141,23.608,2.0,1.0,1.0,13.0,73.0,7.333333,0.0,0.0,0.0,0.0,0.0


In [93]:
X_train = train_df_sorted.drop(['round_score'], axis=1)
y_train = train_df_sorted['round_score']


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

model = LinearRegression()
model.fit(X_train_scaled, y_train)

r2_train = model.score(X_train_scaled,y_train)

X_train_pred = model.predict(X_train_scaled)
mape_train = mean_absolute_percentage_error(X_train_pred,y_train)

print(f' Train: \n r2: {r2_train:.4f} \n mape: {mape_train:.4f}')

 Train: 
 r2: 0.4592 
 mape: 0.0270


# Lasso Regression

In [None]:
lasso_model = Lasso() 
lasso_model.fit(X_train_scaled, y_train)

l_r2_train = lasso_model.score(X_train_scaled,y_train)
l_r2_test = lasso_model.score(X_test_scaled,y_test)

l_X_train_pred = lasso_model.predict(X_train_scaled)
l_X_test_pred = lasso_model.predict(X_test_scaled)
l_mape_train = mean_absolute_percentage_error(l_X_train_pred,y_train)
l_mape_test = mean_absolute_percentage_error(l_X_test_pred,y_test)

print(f' Train: \n r2: {l_r2_train:.4f} \n mape: {l_mape_train:.4f}')
print(f' Test: \n r2: {l_r2_test:.4f} \n mape: {l_mape_test:.4f}')

# Ridge Regression

In [None]:
ridge_model = Ridge() 
ridge_model.fit(X_train_scaled, y_train)

r_r2_train = ridge_model.score(X_train_scaled,y_train)
r_r2_test = ridge_model.score(X_test_scaled,y_test)

r_X_train_pred = ridge_model.predict(X_train_scaled)
r_X_test_pred = ridge_model.predict(X_test_scaled)
r_mape_train = mean_absolute_percentage_error(r_X_train_pred,y_train)
r_mape_test = mean_absolute_percentage_error(r_X_test_pred,y_test)

print(f' Train: \n r2: {r_r2_train:.4f} \n mape: {r_mape_train:.4f}')
print(f' Test: \n r2: {r_r2_test:.4f} \n mape: {r_mape_test:.4f}')

# Linear Pipeline

In [None]:
estimators = [('scaler', StandardScaler()),
              ('reduce_dim', PCA(n_components=0.9)),
              ('model', LinearRegression())]


pipe = Pipeline(estimators)
param_grid = [{'scaler':[StandardScaler(), MinMaxScaler()],
               'reduce_dim':[None,PCA(n_components=0.9)],
               'model':[Lasso(max_iter=int(1e9))],
               'model__alpha':np.logspace(-3,3,50)},
              {'scaler':[StandardScaler(), MinMaxScaler()],
               'reduce_dim':[None,PCA(n_components=0.9)],
               'model':[Ridge(max_iter=int(1e9))],
               'model__alpha':np.logspace(0,10,50)},
             {'scaler':[StandardScaler(), MinMaxScaler()],
              'reduce_dim':[None,PCA(n_components=0.9)],
              'model':[LinearRegression()]}
             ]

grid = GridSearchCV(pipe, param_grid,n_jobs=-1,verbose=20,
                    scoring=["r2","neg_mean_absolute_percentage_error"],refit='r2')
                   
fittedgrid = grid.fit(X_train, y_train)

In [None]:
results = pd.DataFrame(fittedgrid.cv_results_)

In [None]:
str(results.loc[0,"param_model"])

In [None]:
results.param_model.value_counts()

In [None]:
lasso_sel = results["param_model"].apply(lambda x:str(x).startswith("Lasso"))

In [None]:
lasso_results = results.loc[lasso_sel,["param_model__alpha","mean_test_r2","mean_test_neg_mean_absolute_percentage_error"]]
plt.figure()
plt.plot(lasso_results["param_model__alpha"],lasso_results["mean_test_r2"])
plt.xscale("log")
plt.xlabel("alpha")
plt.ylabel("R2")
ax2 = plt.gca().twinx()
ax2.plot(lasso_results["param_model__alpha"],lasso_results["mean_test_neg_mean_absolute_percentage_error"],color='red')

In [None]:
ridge_sel = results["param_model"].apply(lambda x:str(x).startswith("Ridge"))

In [None]:
ridge_results = results.loc[ridge_sel,["param_model__alpha","mean_test_r2","mean_test_neg_mean_absolute_percentage_error"]]
plt.figure()
plt.plot(ridge_results["param_model__alpha"],ridge_results["mean_test_r2"])
plt.xscale("log")
plt.xlabel("alpha")
plt.ylabel("R2")
ax2 = plt.gca().twinx()
ax2.plot(ridge_results["param_model__alpha"],ridge_results["mean_test_neg_mean_absolute_percentage_error"],color='red')

In [None]:
fittedgrid.best_estimator_

In [None]:
fittedgrid.best_estimator_.get_params()

In [None]:
fittedgrid.best_estimator_.score(X_test,y_test)

In [None]:
field_df[['dg_id','player_name','current_round','event_name']].sort_values(by='player_name', ascending=True)

# Random Forest

In [None]:
estimators2 = [('scaler', StandardScaler()),
              ('reduce_dim', PCA(n_components=0.9)),
              ('model', RandomForestRegressor())]


pipe2 = Pipeline(estimators2)
param_grid2 = [{'scaler': [StandardScaler(), MinMaxScaler()],
                'reduce_dim': [None, PCA(n_components=0.9)],
                'model':[RandomForestRegressor()],
                'model__max_depth':[5,10,15,20,25]}
              ]


grid2 = GridSearchCV(pipe2, param_grid2, cv=5,verbose=20,n_jobs=-1,
                    scoring=["r2","neg_mean_absolute_percentage_error"],refit='r2')
                   
fittedgrid2 = grid2.fit(X_train, y_train)

In [None]:
results2 = pd.DataFrame(fittedgrid2.cv_results_)

In [None]:
results2[["param_scaler","param_reduce_dim","param_model__max_depth","mean_test_r2"]]

In [None]:
fittedgrid2.best_estimator_

In [None]:
fittedgrid2.best_estimator_.score(X_test,y_test)

***

***

# Model Projections

In [None]:
most_recent_scores = current_df.sort_values('round_completed').groupby('player_name').last().reset_index()

In [None]:
most_recent_scores.head()

In [None]:
most_recent_scores = most_recent_scores.drop(['tour','event_name','course_name','player_name','round_completed','event_completed','year'
                                        ,'season','event_completed','event_id','round_num','course_num','course_par'
                                        ,'start_hole','sg_putt','sg_arg','sg_app','sg_ott','sg_t2g','sg_total','driving_dist'
                                        ,'driving_acc','gir','scrambling','prox_rgh','prox_fw','great_shots','poor_shots'
                                        ,'round_completed','month','day','fin_num','teetime_numeric','ohe_win','ohe_top_five'
                                        ,'ohe_top_ten','ohe_top_twenty','ohe_make_cut'], axis=1)

In [None]:
most_recent_scores.head()

In [None]:
most_recent_scores = most_recent_scores.set_index('dg_id')

In [None]:
most_recent_scores.head(20)

In [None]:
most_recent_scores = most_recent_scores.drop(['round_score'], axis=1)

In [None]:
predicted_score = fittedgrid.best_estimator_.predict(most_recent_scores)
print(predicted_score)

In [None]:
most_recent_scores['predicted_score'] = predicted_score

In [None]:
most_recent_scores.head()

In [None]:
result_df = most_recent_scores.merge(player_df[['dg_id', 'player_name']], on='dg_id', how='left')

In [None]:
result_df.sort_values(by='predicted_score', ascending=True).head(10)

In [None]:
this_week_df = result_df.merge(field_df[['dg_id', 'event_name']], on='dg_id', how='inner')
this_week_df.sort_values(by='predicted_score', ascending=True)

In [None]:
tt_df.loc[tt_df['player_name'] == 'Finau, Tony'].tail()

# Neural Network

In [94]:
from sklearn.datasets import fetch_california_housing
from sklearn.neural_network import MLPRegressor
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

In [95]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [96]:
model = keras.Sequential()

regularizer = keras.regularizers.l1(0.001)

model.add(layers.Dense(40, activation="relu", kernel_regularizer=regularizer))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(40, activation="relu", kernel_regularizer=regularizer))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(40, activation="relu", kernel_regularizer=regularizer))
model.add(layers.Dropout(0.1))


model.add(layers.Dense(1))

model.compile(
    # Optimizer
    optimizer=keras.optimizers.Adam(),
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError()
)


# model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')
# early_stopping = EarlyStopping(monitor='val_loss', patience=50)

history = model.fit(X_train_scaled, y_train, epochs=150, verbose=1) #,validation_data=(X_val_scaled, y_val),callbacks=[early_stopping, model_checkpoint])

Epoch 1/150
   1/3037 [..............................] - ETA: 7:05 - loss: 4896.3379

2024-03-29 18:41:52.282420: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 7

In [97]:
train_loss = history.history["loss"][-1]

print(f"Train Loss: {train_loss:.4f}")

Train Loss: 4.5406


# Model Projections

In [98]:
most_recent_scores = current_df.sort_values('round_completed').groupby('player_name').last().reset_index()

In [99]:
most_recent_scores = most_recent_scores.drop(['tour','event_name','course_name','player_name','round_completed','event_completed','year'
                                        ,'season','event_completed','event_id','round_num','course_num','course_par'
                                        ,'start_hole','sg_putt','sg_arg','sg_app','sg_ott','sg_t2g','sg_total','driving_dist'
                                        ,'driving_acc','gir','scrambling','prox_rgh','prox_fw','great_shots','poor_shots'
                                        ,'round_completed','month','day','fin_num','teetime_numeric','ohe_win','ohe_top_five'
                                        ,'ohe_top_ten','ohe_top_twenty','ohe_make_cut'], axis=1)

In [111]:
most_recent_scores.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Columns: 164 entries, L20_moving_avg_sg_putt to predicted_score
dtypes: float32(1), float64(154), int64(9)
memory usage: 322.0 KB


In [101]:
player_ids = df['dg_id']
most_recent_scores = most_recent_scores.drop(['dg_id','round_score'], axis=1)

In [102]:
most_recent_scores.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Columns: 162 entries, L20_moving_avg_sg_putt to lagged_ohe_make_cut
dtypes: float64(154), int64(8)
memory usage: 319.1 KB


In [103]:
mr_scaled = scaler.transform(most_recent_scores)

In [104]:
predicted_score = model.predict(mr_scaled)
print(predicted_score)

[[69.79415 ]
 [72.66626 ]
 [72.14854 ]
 [75.479416]
 [72.51458 ]
 [71.29649 ]
 [69.74913 ]
 [74.78484 ]
 [76.43279 ]
 [73.62517 ]
 [69.2848  ]
 [69.44518 ]
 [72.02301 ]
 [72.09602 ]
 [73.44256 ]
 [71.73834 ]
 [72.04586 ]
 [72.97085 ]
 [71.53128 ]
 [76.12525 ]
 [73.687126]
 [73.12907 ]
 [73.15369 ]
 [71.07357 ]
 [73.94833 ]
 [70.68781 ]
 [73.007484]
 [73.87251 ]
 [73.94804 ]
 [70.047775]
 [69.56957 ]
 [66.999306]
 [72.456085]
 [72.18669 ]
 [71.452805]
 [73.27517 ]
 [73.505714]
 [73.09869 ]
 [72.0413  ]
 [71.89634 ]
 [73.059   ]
 [71.80169 ]
 [74.09641 ]
 [71.41261 ]
 [69.69115 ]
 [72.06065 ]
 [71.70514 ]
 [72.3754  ]
 [71.169205]
 [72.21366 ]
 [73.49814 ]
 [75.02986 ]
 [71.80736 ]
 [71.57457 ]
 [72.32824 ]
 [75.267075]
 [71.53668 ]
 [68.43272 ]
 [72.89747 ]
 [73.37674 ]
 [69.183365]
 [70.895874]
 [78.33924 ]
 [71.82052 ]
 [70.20273 ]
 [71.86388 ]
 [68.15844 ]
 [71.71887 ]
 [73.08701 ]
 [73.873955]
 [74.1388  ]
 [73.42521 ]
 [69.55583 ]
 [73.70073 ]
 [71.168976]
 [71.39688 ]
 [71.317825]

In [105]:
most_recent_scores['dg_id'] = player_ids
most_recent_scores['predicted_score'] = predicted_score

In [106]:
result_df = most_recent_scores.merge(player_df[['dg_id', 'player_name']], on='dg_id', how='left')

In [107]:
result_df.sort_values(by='predicted_score', ascending=True).head(10)

Unnamed: 0,L20_moving_avg_sg_putt,L16_moving_avg_sg_putt,L12_moving_avg_sg_putt,L8_moving_avg_sg_putt,L4_moving_avg_sg_putt,L20_moving_avg_sg_arg,L16_moving_avg_sg_arg,L12_moving_avg_sg_arg,L8_moving_avg_sg_arg,L4_moving_avg_sg_arg,L20_moving_avg_sg_app,L16_moving_avg_sg_app,L12_moving_avg_sg_app,L8_moving_avg_sg_app,L4_moving_avg_sg_app,L20_moving_avg_sg_ott,L16_moving_avg_sg_ott,L12_moving_avg_sg_ott,L8_moving_avg_sg_ott,L4_moving_avg_sg_ott,L20_moving_avg_sg_t2g,L16_moving_avg_sg_t2g,L12_moving_avg_sg_t2g,L8_moving_avg_sg_t2g,L4_moving_avg_sg_t2g,L20_moving_avg_sg_total,L16_moving_avg_sg_total,L12_moving_avg_sg_total,L8_moving_avg_sg_total,L4_moving_avg_sg_total,L20_moving_avg_driving_dist,L16_moving_avg_driving_dist,L12_moving_avg_driving_dist,L8_moving_avg_driving_dist,L4_moving_avg_driving_dist,L20_moving_avg_driving_acc,L16_moving_avg_driving_acc,L12_moving_avg_driving_acc,L8_moving_avg_driving_acc,L4_moving_avg_driving_acc,L20_moving_avg_gir,L16_moving_avg_gir,L12_moving_avg_gir,L8_moving_avg_gir,L4_moving_avg_gir,L20_moving_avg_scrambling,L16_moving_avg_scrambling,L12_moving_avg_scrambling,L8_moving_avg_scrambling,L4_moving_avg_scrambling,career_avg,L20_moving_avg,L16_moving_avg,L12_moving_avg,L8_moving_avg,L4_moving_avg,career_min,L20_moving_min,L16_moving_min,L12_moving_min,L8_moving_min,L4_moving_min,career_max,L20_moving_max,L16_moving_max,L12_moving_max,L8_moving_max,L4_moving_max,L20_round_score_std_dev,L16_round_score_std_dev,L12_round_score_std_dev,L8_round_score_std_dev,L4_round_score_std_dev,L20_sg_putt_std_dev,L16__sg_putt_std_dev,L12_putt_std_dev,L8_putt_std_dev,L4_sg_putt_std_dev,L20_sg_arg_std_dev,L16_sg_arg_std_dev,L12_sg_arg_std_dev,L8_sg_arg_std_dev,L4_sg_arg_std_dev,L20_sg_app_std_dev,L16_app_std_dev,L12_app_std_dev,L8_sg_app_std_dev,L4_sg_app_std_dev,L20_sg_ott_std_dev,L16_sg_ott_std_dev,L12_sg_ott_std_dev,L8_sg_ott_std_dev,L4_sg_ott_std_dev,L20_sg_t2g_std_dev,L16_sg_t2g_std_dev,L12_sg_t2g_std_dev,L8_sg_t2g_std_dev,L4_sg_t2g_std_dev,L20_sg_total_std_dev,L16_sg_total_std_dev,L12_sg_total_std_dev,L8_total_std_dev,L4_sg_total_std_dev,L20_driving_dist_std_dev,L16_driving_dist_std_dev,L12_driving_dist_std_dev,L8_driving_dist_std_dev,L4_driving_dist_std_dev,L20_driving_acc_std_dev,L16_driving_acc_std_dev,L12_driving_acc_std_dev,L8_driving_acc_std_dev,L4_driving_acc_std_dev,L20_gir_std_dev,L16_gir_std_dev,L12_gir_std_dev,L8_gir_std_dev,L4_gir_std_dev,L20_scrambling_std_dev,L16_scrambling_std_dev,L12_scrambling_std_dev,L8_scrambling_std_dev,L4_scrambling_std_dev,Days_Since,Last_365_Days,Last_180_Days,Last_90_Days,Last_60_Days,Last_30_Days,Last_10_Days,Last_5_Days,lagged_year,lagged_season,lagged_event_id,lagged_round_num,lagged_course_num,lagged_course_par,lagged_start_hole,lagged_round_score,lagged_sg_putt,lagged_sg_arg,lagged_sg_app,lagged_sg_ott,lagged_sg_t2g,lagged_sg_total,lagged_driving_dist,lagged_driving_acc,lagged_gir,lagged_scrambling,lagged_prox_rgh,lagged_prox_fw,lagged_great_shots,lagged_poor_shots,lagged_month,lagged_day,lagged_fin_num,lagged_teetime_numeric,lagged_ohe_win,lagged_ohe_top_five,lagged_ohe_top_ten,lagged_ohe_top_twenty,lagged_ohe_make_cut,dg_id,predicted_score,player_name
31,0.5999,0.970375,0.626083,1.375125,1.529,0.1447,0.110375,0.09125,0.234875,0.50725,0.81525,0.906,0.767167,1.12675,1.53875,0.4747,0.321125,0.373667,0.715375,0.45625,1.43445,1.337375,1.232083,2.077,2.50225,2.03625,2.310062,1.86125,3.45225,4.0315,300.92,297.65,297.025,293.275,289.675,0.54605,0.5445,0.5415,0.589125,0.64275,0.73065,0.694563,0.680667,0.71525,0.76375,0.6563,0.6945,0.671,0.668125,0.712,68.454545,68.35,68.5,69.333333,68.375,67.25,60.0,60.0,60.0,65.0,65.0,65.0,75.0,75.0,75.0,75.0,71.0,70.0,3.688888,3.847077,3.22866,2.615203,2.629956,2.627053,2.539818,2.621295,2.440596,2.953102,0.933105,0.975764,1.06014,0.994966,0.605083,1.424803,1.389924,1.205154,1.304902,1.231275,0.861376,0.87281,0.798561,0.747938,1.041338,1.758999,1.901414,1.841303,1.357541,1.693572,3.592467,3.906457,3.874887,2.819761,3.372273,11.534552,8.173208,7.859693,5.25187,3.336041,0.117918,0.130063,0.115869,0.091701,0.101352,0.127642,0.112786,0.100708,0.100383,0.123233,0.223049,0.171811,0.160895,0.139379,0.062711,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,70.0,0.134,1.312,-0.111,-1.048,0.153,0.288,288.4,0.643,0.611,0.714,24.242,31.802,3.0,5.0,3.0,16.0,2.0,14.666667,0.0,1.0,1.0,1.0,0.0,19477,66.999306,"Schenk, Adam"
192,0.0639,0.103813,0.102917,0.699125,0.3115,0.68635,0.804375,1.157833,0.860625,0.7745,1.3072,1.150063,0.763167,1.208,1.588,0.9038,1.1395,1.134667,1.434375,1.60725,2.89715,3.093687,3.0555,3.503,3.96975,2.96295,3.199875,3.161667,4.20225,4.2815,294.57,295.95625,293.566667,294.625,294.3,0.63925,0.65175,0.666667,0.6875,0.8035,0.75,0.72225,0.680667,0.72925,0.764,0.6458,0.645437,0.69,0.696875,0.6875,67.416667,67.55,67.6875,68.083333,67.625,67.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,70.0,70.0,70.0,70.0,69.0,2.012461,1.815443,1.880925,2.065879,2.160247,1.608883,1.578803,1.722138,1.613007,0.707725,1.002192,1.033844,0.922298,0.560032,0.528995,1.314777,1.314407,1.073055,0.939932,0.947876,0.91915,0.753644,0.862306,0.764763,0.911384,1.759473,1.777759,1.976222,1.652279,1.876716,2.331037,2.104651,2.44702,2.16796,2.08995,6.103243,5.344152,3.24691,2.696426,2.981051,0.139895,0.140068,0.159095,0.156992,0.068413,0.142283,0.131253,0.116156,0.095693,0.069671,0.236038,0.244126,0.143181,0.166603,0.216506,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,68.0,0.363,0.511,0.873,0.54,1.924,2.288,296.7,0.714,0.667,0.875,91.437,35.16,2.0,4.0,3.0,16.0,1.0,14.166667,1.0,1.0,1.0,1.0,0.0,11676,67.050835,"Finau, Tony"
113,0.052,0.052,0.052,0.052,0.052,-0.11425,-0.11425,-0.11425,-0.11425,-0.11425,-0.022,-0.022,-0.022,-0.022,-0.022,-0.97775,-0.97775,-0.97775,-0.97775,-0.97775,-1.1135,-1.1135,-1.1135,-1.1135,-1.1135,-1.06225,-1.06225,-1.06225,-1.06225,-1.06225,289.55,289.55,289.55,289.55,289.55,0.5535,0.5535,0.5535,0.5535,0.5535,0.68075,0.68075,0.68075,0.68075,0.68075,0.69475,0.69475,0.69475,0.69475,0.69475,69.75,69.75,69.75,69.75,69.75,69.75,67.0,67.0,67.0,67.0,67.0,67.0,76.0,76.0,76.0,76.0,76.0,76.0,4.193249,4.193249,4.193249,4.193249,4.193249,2.913946,2.913946,2.913946,2.913946,2.913946,1.088005,1.088005,1.088005,1.088005,1.088005,0.665328,0.665328,0.665328,0.665328,0.665328,2.640386,2.640386,2.640386,2.640386,2.640386,2.576828,2.576828,2.576828,2.576828,2.576828,4.328317,4.328317,4.328317,4.328317,4.328317,3.0271,3.0271,3.0271,3.0271,3.0271,0.135326,0.135326,0.135326,0.135326,0.135326,0.145788,0.145788,0.145788,0.145788,0.145788,0.301023,0.301023,0.301023,0.301023,0.301023,75,1,1,1,0,0,0,0,2024.0,2024.0,6.0,3.0,6.0,70.0,10.0,76.0,-3.081,0.311,0.206,-4.912,-4.394,-7.476,289.5,0.357,0.556,0.571,40.754,25.848,3.0,8.0,1.0,13.0,74.0,10.666667,0.0,0.0,0.0,0.0,0.0,22085,67.551315,"Morikawa, Collin"
112,-0.4625,-0.4625,-0.4625,-0.4625,-0.4625,-0.497,-0.497,-0.497,-0.497,-0.497,-0.2115,-0.2115,-0.2115,-0.2115,-0.2115,0.3345,0.3345,0.3345,0.3345,0.3345,-0.374,-0.374,-0.374,-0.374,-0.374,-0.837,-0.837,-0.837,-0.837,-0.837,280.55,280.55,280.55,280.55,280.55,0.6425,0.6425,0.6425,0.6425,0.6425,0.8055,0.8055,0.8055,0.8055,0.8055,0.6875,0.6875,0.6875,0.6875,0.6875,70.0,70.0,70.0,70.0,70.0,70.0,66.0,66.0,66.0,66.0,66.0,66.0,74.0,74.0,74.0,74.0,74.0,74.0,5.656854,5.656854,5.656854,5.656854,5.656854,1.744432,1.744432,1.744432,1.744432,1.744432,0.274357,0.274357,0.274357,0.274357,0.274357,1.836356,1.836356,1.836356,1.836356,1.836356,0.658316,0.658316,0.658316,0.658316,0.658316,2.76903,2.76903,2.76903,2.76903,2.76903,4.512755,4.512755,4.512755,4.512755,4.512755,8.980256,8.980256,8.980256,8.980256,8.980256,0.101116,0.101116,0.101116,0.101116,0.101116,0.118087,0.118087,0.118087,0.118087,0.118087,0.441942,0.441942,0.441942,0.441942,0.441942,77,1,1,1,0,0,0,0,2024.0,2024.0,6.0,1.0,6.0,70.0,10.0,74.0,-1.696,-0.691,-1.51,-0.131,-2.332,-4.028,274.2,0.571,0.722,0.375,65.542,40.143,1.0,3.0,1.0,11.0,0.0,13.5,0.0,1.0,1.0,1.0,1.0,22085,67.624863,"Morikawa, Collin"
88,0.1815,0.1815,0.1815,0.1815,0.023,0.720625,0.720625,0.720625,0.720625,0.955,0.501125,0.501125,0.501125,0.501125,0.642,-0.126875,-0.126875,-0.126875,-0.126875,0.06825,1.094875,1.094875,1.094875,1.094875,1.66525,1.27625,1.27625,1.27625,1.27625,1.68775,304.7125,304.7125,304.7125,304.7125,294.475,0.542625,0.542625,0.542625,0.542625,0.5355,0.72225,0.72225,0.72225,0.72225,0.625,0.711,0.711,0.711,0.711,0.70625,67.25,67.25,67.25,67.25,67.25,67.0,62.0,62.0,62.0,62.0,62.0,65.0,72.0,72.0,72.0,72.0,72.0,70.0,3.105295,3.105295,3.105295,3.105295,2.160247,1.504001,1.504001,1.504001,1.504001,1.165602,0.644088,0.644088,0.644088,0.644088,0.883265,1.052502,1.052502,1.052502,1.052502,1.424323,0.846257,0.846257,0.846257,0.846257,1.111653,1.468394,1.468394,1.468394,1.468394,1.582572,2.285018,2.285018,2.285018,2.285018,1.389542,12.884258,12.884258,12.884258,12.884258,8.530094,0.171136,0.171136,0.171136,0.171136,0.222142,0.167958,0.167958,0.167958,0.167958,0.183614,0.162793,0.162793,0.162793,0.162793,0.155958,75,1,1,1,0,0,0,0,2024.0,2024.0,6.0,3.0,6.0,70.0,1.0,67.0,1.467,1.474,-0.919,-0.496,0.058,1.524,296.3,0.571,0.5,0.875,72.674,29.94,5.0,3.0,1.0,13.0,13.0,10.666667,0.0,0.0,0.0,1.0,0.0,21554,67.900246,"Poston, J.T."
66,-0.441,-0.441,-0.441,-0.441,-0.441,-1.669,-1.669,-1.669,-1.669,-1.669,-0.241,-0.241,-0.241,-0.241,-0.241,0.319,0.319,0.319,0.319,0.319,-1.5905,-1.5905,-1.5905,-1.5905,-1.5905,-2.032,-2.032,-2.032,-2.032,-2.032,291.7,291.7,291.7,291.7,291.7,0.5385,0.5385,0.5385,0.5385,0.5385,0.5275,0.5275,0.5275,0.5275,0.5275,0.611,0.611,0.611,0.611,0.611,73.5,73.5,73.5,73.5,73.5,73.5,67.0,67.0,67.0,67.0,67.0,67.0,80.0,80.0,80.0,80.0,80.0,80.0,9.192388,9.192388,9.192388,9.192388,9.192388,4.440631,4.440631,4.440631,4.440631,4.440631,1.694228,1.694228,1.694228,1.694228,1.694228,2.450832,2.450832,2.450832,2.450832,2.450832,1.166726,1.166726,1.166726,1.166726,1.166726,5.311079,5.311079,5.311079,5.311079,5.311079,9.752417,9.752417,9.752417,9.752417,9.752417,5.23259,5.23259,5.23259,5.23259,5.23259,0.217082,0.217082,0.217082,0.217082,0.217082,0.118087,0.118087,0.118087,0.118087,0.118087,0.393151,0.393151,0.393151,0.393151,0.393151,7,1,1,1,1,1,1,0,2024.0,2024.0,475.0,1.0,665.0,71.0,10.0,80.0,-3.581,-2.867,-1.974,-0.506,-5.346,-8.928,295.4,0.385,0.444,0.333,43.245,30.884,0.0,10.0,3.0,21.0,0.0,14.95,0.0,1.0,1.0,1.0,1.0,17488,68.15844,"Im, Sungjae"
189,0.595471,0.762813,1.323667,1.933625,2.393,-0.119176,-0.14525,-0.25075,-0.3715,-0.68475,0.029059,0.201313,0.008833,0.047,-0.02425,-0.561882,-0.584187,-0.559333,-0.18575,-0.793,-0.651941,-0.528062,-0.801083,-0.510125,-1.50175,-0.053941,0.237437,0.526167,1.424375,0.89275,278.852941,278.7375,278.775,279.6125,275.775,0.619765,0.618312,0.610167,0.62075,0.59875,0.647118,0.652812,0.638917,0.66675,0.625,0.646294,0.638062,0.6425,0.6335,0.645,70.705882,70.705882,70.5,70.333333,69.625,70.5,65.0,65.0,65.0,65.0,65.0,65.0,78.0,78.0,78.0,78.0,78.0,78.0,3.137768,3.119829,3.550501,3.889087,5.446712,1.987772,1.925321,1.882411,1.847268,2.218054,0.797168,0.81579,0.740926,0.784248,0.974218,1.528342,1.397681,1.431987,1.624594,2.376387,1.13336,1.166669,1.143321,0.959043,0.780642,2.28858,2.304017,2.427321,2.673978,3.741293,3.432641,3.320995,3.67151,3.740581,5.490674,8.721032,8.993618,8.086816,8.784961,9.109107,0.119193,0.122946,0.132467,0.163203,0.224912,0.130105,0.132166,0.146874,0.139146,0.183412,0.115784,0.114328,0.120882,0.116804,0.139702,5,1,1,1,1,1,1,1,2024.0,2024.0,475.0,3.0,665.0,71.0,1.0,78.0,0.096,-1.735,-3.215,-1.886,-6.836,-6.74,265.6,0.615,0.389,0.455,53.212,35.6,3.0,6.0,3.0,23.0,33.0,11.366667,0.0,0.0,0.0,0.0,0.0,17576,68.224144,"Conners, Corey"
117,-0.34085,-0.26,-0.40075,-0.477125,1.1975,0.1511,0.230438,0.083917,0.474625,0.60875,0.53445,0.364625,0.464583,0.731625,0.259,0.6478,0.675187,0.594167,0.848,0.966,1.3331,1.27,1.1425,2.05425,1.83375,0.9942,1.012375,0.745,1.57725,3.0315,287.005,288.44375,286.991667,287.4625,287.025,0.7035,0.691875,0.672583,0.696375,0.71425,0.6917,0.663187,0.625,0.652875,0.639,0.67035,0.67125,0.648,0.64875,0.733,69.142857,69.6,69.875,70.5,70.25,68.25,64.0,64.0,64.0,64.0,64.0,64.0,75.0,75.0,75.0,75.0,75.0,71.0,2.85436,2.777889,2.938769,3.284161,3.095696,1.875219,2.009077,2.315535,2.825737,2.154776,1.412564,1.209722,1.357135,0.808734,1.169251,1.498194,1.614756,1.823195,2.152536,1.514353,0.581911,0.639729,0.692236,0.583488,0.797347,2.568826,2.616181,2.978916,2.739767,2.948178,2.671163,2.512742,2.841196,2.91047,2.747039,5.239071,4.02442,3.508291,2.684845,2.128184,0.109491,0.100097,0.083226,0.050458,0.05838,0.13547,0.12904,0.12563,0.125017,0.096225,0.154865,0.166122,0.184206,0.225537,0.097328,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,68.0,-1.459,0.806,0.808,2.132,3.746,2.288,288.1,0.714,0.611,0.7,27.656,36.372,3.0,3.0,3.0,16.0,6.0,10.916667,0.0,0.0,1.0,1.0,0.0,19477,68.358154,"Schenk, Adam"
57,0.24965,0.553813,0.356833,0.07125,-0.1115,0.42755,0.431062,0.434083,0.321875,0.23,-0.03665,0.137875,0.420417,0.599375,0.691,0.14175,0.2625,0.19725,0.084625,0.72175,0.53235,0.831187,1.051583,1.005875,1.64275,0.7839,1.387375,1.411667,1.07725,1.5315,289.24,290.125,289.075,287.225,288.825,0.61425,0.629375,0.607,0.57125,0.589,0.68895,0.708437,0.689917,0.68075,0.68075,0.7121,0.732875,0.71675,0.686875,0.67125,69.413793,70.2,69.5,69.833333,70.75,69.75,64.0,65.0,65.0,65.0,66.0,66.0,77.0,77.0,77.0,77.0,77.0,75.0,3.473206,3.32666,3.485902,3.807887,3.774917,1.63136,1.652641,1.650046,1.820425,2.481021,0.859144,0.953905,1.076299,1.062388,1.388399,1.675016,1.534011,1.658438,1.883846,1.626957,1.088877,0.939012,0.875175,0.804934,0.535908,2.06435,2.023283,2.174664,2.584058,2.827965,2.992899,2.969417,3.117677,3.743487,4.301421,5.240872,4.914943,5.063977,4.89015,5.426709,0.145425,0.114408,0.103295,0.076303,0.036,0.123971,0.096062,0.086733,0.105873,0.145788,0.186711,0.178814,0.185653,0.191942,0.174391,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,75.0,-3.014,-1.563,-0.258,0.122,-1.699,-4.712,291.6,0.571,0.556,0.462,85.523,34.762,1.0,8.0,3.0,16.0,19.0,13.416667,0.0,0.0,0.0,1.0,0.0,14577,68.432716,"English, Harris"
150,-0.166091,-0.166091,-0.166091,0.2715,0.465,0.195909,0.195909,0.195909,0.2275,0.10025,0.261091,0.261091,0.261091,0.43075,0.54975,-0.476091,-0.476091,-0.476091,-0.285,0.01775,-0.018909,-0.018909,-0.018909,0.373625,0.66825,-0.183818,-0.183818,-0.183818,0.64675,1.13675,283.045455,283.045455,283.045455,278.5875,276.975,0.507,0.507,0.507,0.526875,0.5,0.656545,0.656545,0.656545,0.666625,0.63875,0.590909,0.590909,0.590909,0.674,0.66675,70.909091,70.909091,70.909091,70.909091,70.0,70.25,67.0,67.0,67.0,67.0,67.0,67.0,74.0,74.0,74.0,74.0,73.0,73.0,2.586679,2.586679,2.586679,2.390457,3.201562,1.889377,1.889377,1.889377,1.96291,2.365745,0.959286,0.959286,0.959286,1.13818,0.960004,1.11269,1.11269,1.11269,1.157252,1.49904,0.794074,0.794074,0.794074,0.811188,0.591728,2.085878,2.085878,2.085878,2.21193,1.39193,2.78292,2.78292,2.78292,2.790012,3.43694,8.904983,8.904983,8.904983,5.07133,5.352492,0.111095,0.111095,0.111095,0.099641,0.098891,0.107788,0.107788,0.107788,0.125989,0.147217,0.20554,0.20554,0.20554,0.168431,0.068178,5,1,1,1,1,1,1,1,2024.0,2024.0,475.0,3.0,665.0,71.0,10.0,73.0,-2.117,1.483,-0.324,-0.782,0.377,-1.74,271.4,0.538,0.611,0.75,48.779,39.855,1.0,6.0,3.0,23.0,26.0,10.916667,0.0,0.0,0.0,0.0,0.0,22535,68.461197,"Norrman, Vincent"


In [110]:
this_week_df = result_df.merge(field_df[['dg_id', 'event_name']], on='dg_id', how='inner')
this_week_df.sort_values(by='predicted_score', ascending=True)

Unnamed: 0,L20_moving_avg_sg_putt,L16_moving_avg_sg_putt,L12_moving_avg_sg_putt,L8_moving_avg_sg_putt,L4_moving_avg_sg_putt,L20_moving_avg_sg_arg,L16_moving_avg_sg_arg,L12_moving_avg_sg_arg,L8_moving_avg_sg_arg,L4_moving_avg_sg_arg,L20_moving_avg_sg_app,L16_moving_avg_sg_app,L12_moving_avg_sg_app,L8_moving_avg_sg_app,L4_moving_avg_sg_app,L20_moving_avg_sg_ott,L16_moving_avg_sg_ott,L12_moving_avg_sg_ott,L8_moving_avg_sg_ott,L4_moving_avg_sg_ott,L20_moving_avg_sg_t2g,L16_moving_avg_sg_t2g,L12_moving_avg_sg_t2g,L8_moving_avg_sg_t2g,L4_moving_avg_sg_t2g,L20_moving_avg_sg_total,L16_moving_avg_sg_total,L12_moving_avg_sg_total,L8_moving_avg_sg_total,L4_moving_avg_sg_total,L20_moving_avg_driving_dist,L16_moving_avg_driving_dist,L12_moving_avg_driving_dist,L8_moving_avg_driving_dist,L4_moving_avg_driving_dist,L20_moving_avg_driving_acc,L16_moving_avg_driving_acc,L12_moving_avg_driving_acc,L8_moving_avg_driving_acc,L4_moving_avg_driving_acc,L20_moving_avg_gir,L16_moving_avg_gir,L12_moving_avg_gir,L8_moving_avg_gir,L4_moving_avg_gir,L20_moving_avg_scrambling,L16_moving_avg_scrambling,L12_moving_avg_scrambling,L8_moving_avg_scrambling,L4_moving_avg_scrambling,career_avg,L20_moving_avg,L16_moving_avg,L12_moving_avg,L8_moving_avg,L4_moving_avg,career_min,L20_moving_min,L16_moving_min,L12_moving_min,L8_moving_min,L4_moving_min,career_max,L20_moving_max,L16_moving_max,L12_moving_max,L8_moving_max,L4_moving_max,L20_round_score_std_dev,L16_round_score_std_dev,L12_round_score_std_dev,L8_round_score_std_dev,L4_round_score_std_dev,L20_sg_putt_std_dev,L16__sg_putt_std_dev,L12_putt_std_dev,L8_putt_std_dev,L4_sg_putt_std_dev,L20_sg_arg_std_dev,L16_sg_arg_std_dev,L12_sg_arg_std_dev,L8_sg_arg_std_dev,L4_sg_arg_std_dev,L20_sg_app_std_dev,L16_app_std_dev,L12_app_std_dev,L8_sg_app_std_dev,L4_sg_app_std_dev,L20_sg_ott_std_dev,L16_sg_ott_std_dev,L12_sg_ott_std_dev,L8_sg_ott_std_dev,L4_sg_ott_std_dev,L20_sg_t2g_std_dev,L16_sg_t2g_std_dev,L12_sg_t2g_std_dev,L8_sg_t2g_std_dev,L4_sg_t2g_std_dev,L20_sg_total_std_dev,L16_sg_total_std_dev,L12_sg_total_std_dev,L8_total_std_dev,L4_sg_total_std_dev,L20_driving_dist_std_dev,L16_driving_dist_std_dev,L12_driving_dist_std_dev,L8_driving_dist_std_dev,L4_driving_dist_std_dev,L20_driving_acc_std_dev,L16_driving_acc_std_dev,L12_driving_acc_std_dev,L8_driving_acc_std_dev,L4_driving_acc_std_dev,L20_gir_std_dev,L16_gir_std_dev,L12_gir_std_dev,L8_gir_std_dev,L4_gir_std_dev,L20_scrambling_std_dev,L16_scrambling_std_dev,L12_scrambling_std_dev,L8_scrambling_std_dev,L4_scrambling_std_dev,Days_Since,Last_365_Days,Last_180_Days,Last_90_Days,Last_60_Days,Last_30_Days,Last_10_Days,Last_5_Days,lagged_year,lagged_season,lagged_event_id,lagged_round_num,lagged_course_num,lagged_course_par,lagged_start_hole,lagged_round_score,lagged_sg_putt,lagged_sg_arg,lagged_sg_app,lagged_sg_ott,lagged_sg_t2g,lagged_sg_total,lagged_driving_dist,lagged_driving_acc,lagged_gir,lagged_scrambling,lagged_prox_rgh,lagged_prox_fw,lagged_great_shots,lagged_poor_shots,lagged_month,lagged_day,lagged_fin_num,lagged_teetime_numeric,lagged_ohe_win,lagged_ohe_top_five,lagged_ohe_top_ten,lagged_ohe_top_twenty,lagged_ohe_make_cut,dg_id,predicted_score,player_name,event_name
56,0.0639,0.103813,0.102917,0.699125,0.3115,0.68635,0.804375,1.157833,0.860625,0.7745,1.3072,1.150063,0.763167,1.208,1.588,0.9038,1.1395,1.134667,1.434375,1.60725,2.89715,3.093687,3.0555,3.503,3.96975,2.96295,3.199875,3.161667,4.20225,4.2815,294.57,295.95625,293.566667,294.625,294.3,0.63925,0.65175,0.666667,0.6875,0.8035,0.75,0.72225,0.680667,0.72925,0.764,0.6458,0.645437,0.69,0.696875,0.6875,67.416667,67.55,67.6875,68.083333,67.625,67.0,64.0,64.0,64.0,64.0,64.0,64.0,71.0,70.0,70.0,70.0,70.0,69.0,2.012461,1.815443,1.880925,2.065879,2.160247,1.608883,1.578803,1.722138,1.613007,0.707725,1.002192,1.033844,0.922298,0.560032,0.528995,1.314777,1.314407,1.073055,0.939932,0.947876,0.91915,0.753644,0.862306,0.764763,0.911384,1.759473,1.777759,1.976222,1.652279,1.876716,2.331037,2.104651,2.44702,2.16796,2.08995,6.103243,5.344152,3.24691,2.696426,2.981051,0.139895,0.140068,0.159095,0.156992,0.068413,0.142283,0.131253,0.116156,0.095693,0.069671,0.236038,0.244126,0.143181,0.166603,0.216506,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,68.0,0.363,0.511,0.873,0.54,1.924,2.288,296.7,0.714,0.667,0.875,91.437,35.16,2.0,4.0,3.0,16.0,1.0,14.166667,1.0,1.0,1.0,1.0,0.0,11676,67.050835,"Finau, Tony",Texas Children's Houston Open
44,-0.166091,-0.166091,-0.166091,0.2715,0.465,0.195909,0.195909,0.195909,0.2275,0.10025,0.261091,0.261091,0.261091,0.43075,0.54975,-0.476091,-0.476091,-0.476091,-0.285,0.01775,-0.018909,-0.018909,-0.018909,0.373625,0.66825,-0.183818,-0.183818,-0.183818,0.64675,1.13675,283.045455,283.045455,283.045455,278.5875,276.975,0.507,0.507,0.507,0.526875,0.5,0.656545,0.656545,0.656545,0.666625,0.63875,0.590909,0.590909,0.590909,0.674,0.66675,70.909091,70.909091,70.909091,70.909091,70.0,70.25,67.0,67.0,67.0,67.0,67.0,67.0,74.0,74.0,74.0,74.0,73.0,73.0,2.586679,2.586679,2.586679,2.390457,3.201562,1.889377,1.889377,1.889377,1.96291,2.365745,0.959286,0.959286,0.959286,1.13818,0.960004,1.11269,1.11269,1.11269,1.157252,1.49904,0.794074,0.794074,0.794074,0.811188,0.591728,2.085878,2.085878,2.085878,2.21193,1.39193,2.78292,2.78292,2.78292,2.790012,3.43694,8.904983,8.904983,8.904983,5.07133,5.352492,0.111095,0.111095,0.111095,0.099641,0.098891,0.107788,0.107788,0.107788,0.125989,0.147217,0.20554,0.20554,0.20554,0.168431,0.068178,5,1,1,1,1,1,1,1,2024.0,2024.0,475.0,3.0,665.0,71.0,10.0,73.0,-2.117,1.483,-0.324,-0.782,0.377,-1.74,271.4,0.538,0.611,0.75,48.779,39.855,1.0,6.0,3.0,23.0,26.0,10.916667,0.0,0.0,0.0,0.0,0.0,22535,68.461197,"Norrman, Vincent",Texas Children's Houston Open
31,2.035,2.035,2.035,2.035,2.035,-0.522,-0.522,-0.522,-0.522,-0.522,-0.254333,-0.254333,-0.254333,-0.254333,-0.254333,-0.241333,-0.241333,-0.241333,-0.241333,-0.241333,-1.018,-1.018,-1.018,-1.018,-1.018,1.017,1.017,1.017,1.017,1.017,291.133333,291.133333,291.133333,291.133333,291.133333,0.589667,0.589667,0.589667,0.589667,0.589667,0.666667,0.666667,0.666667,0.666667,0.666667,0.625,0.625,0.625,0.625,0.625,68.666667,68.666667,68.666667,68.666667,68.666667,68.666667,66.0,66.0,66.0,66.0,66.0,66.0,70.0,70.0,70.0,70.0,70.0,70.0,2.309401,2.309401,2.309401,2.309401,2.309401,1.149256,1.149256,1.149256,1.149256,1.149256,1.279784,1.279784,1.279784,1.279784,1.279784,0.751304,0.751304,0.751304,0.751304,0.751304,1.200333,1.200333,1.200333,1.200333,1.200333,1.475903,1.475903,1.475903,1.475903,1.475903,2.427451,2.427451,2.427451,2.427451,2.427451,2.084067,2.084067,2.084067,2.084067,2.084067,0.193249,0.193249,0.193249,0.193249,0.193249,0.09584,0.09584,0.09584,0.09584,0.09584,0.263391,0.263391,0.263391,0.263391,0.263391,33,1,1,1,1,0,0,0,2024.0,2024.0,540.0,3.0,978.0,71.0,1.0,70.0,0.783,-0.444,0.117,-1.425,-1.752,-0.969,290.7,0.385,0.722,0.6,37.051,30.677,2.0,4.0,2.0,24.0,33.0,8.316667,0.0,0.0,0.0,0.0,0.0,23014,68.742371,"Theegala, Sahith",Texas Children's Houston Open
24,0.0601,0.302438,0.437917,1.131125,1.267,-0.14495,-0.130562,-0.139833,-0.438375,-0.6575,0.0263,-0.098437,0.007667,0.155125,0.19625,0.2663,0.2495,0.280667,0.612625,1.219,0.14755,0.020437,0.148583,0.3295,0.75775,0.2094,0.325063,0.589417,1.4605,2.02475,289.465,288.9375,289.558333,289.7625,298.675,0.61195,0.620563,0.595333,0.60725,0.6785,0.68885,0.666687,0.652833,0.65275,0.63875,0.6565,0.68825,0.712667,0.723,0.67025,69.608696,69.95,70.125,70.083333,69.625,69.75,64.0,66.0,66.0,66.0,66.0,66.0,76.0,76.0,76.0,75.0,75.0,75.0,2.964261,3.263434,3.117643,3.292307,3.774917,1.800068,1.817733,1.860191,1.534234,1.600005,1.021129,1.035529,1.075936,1.127858,1.281428,1.173356,1.108409,1.047533,1.117342,1.543535,1.294863,1.401429,1.304974,1.238158,1.203571,1.913446,2.117018,1.987572,2.369861,2.610806,3.091159,3.336638,3.047942,3.02447,3.483263,12.817187,10.047943,9.488602,10.801579,4.349234,0.156566,0.142129,0.150176,0.174905,0.092436,0.125291,0.12823,0.138341,0.156441,0.132397,0.183849,0.180478,0.180002,0.176575,0.173513,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,2.0,11.0,72.0,10.0,69.0,0.663,0.887,-0.088,1.749,2.548,3.211,303.0,0.714,0.667,0.875,50.458,25.375,4.0,2.0,3.0,15.0,5.0,8.4,0.0,1.0,1.0,1.0,0.0,18417,69.183365,"Scheffler, Scottie",Texas Children's Houston Open
27,-0.30095,-0.251563,-0.208333,-0.2895,0.226,0.4693,0.467437,0.475417,0.628875,1.597,0.42475,0.302875,0.388083,0.45275,0.0675,-0.0652,-0.021375,0.136583,0.0825,-0.00775,0.8289,0.748938,1.00025,1.164375,1.65725,0.5305,0.500625,0.793,0.876625,1.88675,277.46,277.53125,277.775,277.025,271.475,0.66265,0.64075,0.652,0.620875,0.63475,0.6528,0.645875,0.662083,0.65975,0.62525,0.6645,0.634437,0.65025,0.6315,0.8035,70.04,70.35,70.6875,70.666667,71.0,69.5,66.0,66.0,67.0,67.0,68.0,68.0,75.0,75.0,75.0,75.0,75.0,72.0,2.476734,2.330057,2.498484,2.618615,1.732051,1.101164,1.131067,1.156546,1.385096,1.786916,1.008922,1.078737,1.185226,1.275361,0.800274,1.500061,1.327622,1.445947,1.199323,1.089542,0.858189,0.781582,0.648726,0.647902,0.300632,1.727193,1.686747,1.849267,2.0022,1.249178,2.021032,2.069996,2.183776,2.486908,1.777187,7.117392,7.027728,6.204123,7.174508,3.354971,0.131652,0.13555,0.142618,0.155688,0.201936,0.08411,0.09023,0.098739,0.086098,0.053181,0.193105,0.200002,0.227962,0.27604,0.243137,5,1,1,1,1,1,1,1,2024.0,2024.0,475.0,3.0,665.0,71.0,1.0,72.0,-1.462,0.72,-0.121,0.123,0.722,-0.74,269.4,0.462,0.611,0.5,58.89,33.356,3.0,4.0,3.0,23.0,11.0,12.466667,0.0,0.0,0.0,1.0,0.0,18417,69.555832,"Scheffler, Scottie",Texas Children's Houston Open
36,-0.72335,-0.657375,-0.556583,-0.681625,-0.182,0.12325,0.42175,0.418917,0.732,0.83175,0.11425,-0.08675,0.102083,-0.32375,-0.3655,0.55235,0.590625,0.722583,0.7255,0.997,0.78975,0.925687,1.243667,1.13375,1.46325,0.0683,0.270625,0.687083,0.45225,1.2815,287.975,286.7125,288.508333,291.0875,295.975,0.6213,0.63825,0.618833,0.598,0.6605,0.6556,0.638937,0.6575,0.60425,0.639,0.60305,0.60175,0.59325,0.619,0.6725,69.269231,70.3,70.5625,70.5,71.375,70.0,65.0,65.0,65.0,65.0,68.0,68.0,76.0,76.0,76.0,76.0,76.0,73.0,2.939746,2.988171,3.176619,2.825269,2.160247,1.651433,1.790078,1.928279,1.899014,1.974282,1.272527,1.165364,1.212746,1.19565,1.542457,1.473133,1.275244,1.329949,1.053766,0.566285,0.507039,0.474464,0.434204,0.495317,0.263577,1.602753,1.226249,1.126611,1.041656,1.278888,2.494563,2.514724,2.579985,2.54096,2.740783,8.093069,8.309101,6.993953,6.587529,4.154014,0.14858,0.153195,0.147123,0.174699,0.068413,0.123059,0.12509,0.122691,0.091109,0.071522,0.172188,0.180586,0.206238,0.242323,0.301603,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,73.0,-2.818,-0.03,-0.912,1.047,0.105,-2.712,296.3,0.571,0.667,0.273,15.038,38.727,4.0,7.0,3.0,16.0,26.0,13.416667,0.0,0.0,0.0,0.0,0.0,21891,69.560448,"Kitayama, Kurt",Texas Children's Houston Open
16,0.1495,0.1495,0.1495,0.1495,0.1495,0.7015,0.7015,0.7015,0.7015,0.7015,-0.70575,-0.70575,-0.70575,-0.70575,-0.70575,0.25425,0.25425,0.25425,0.25425,0.25425,0.24975,0.24975,0.24975,0.24975,0.24975,0.399,0.399,0.399,0.399,0.399,296.125,296.125,296.125,296.125,296.125,0.67275,0.67275,0.67275,0.67275,0.67275,0.7085,0.7085,0.7085,0.7085,0.7085,0.69025,0.69025,0.69025,0.69025,0.69025,69.5,69.5,69.5,69.5,69.5,69.5,67.0,67.0,67.0,67.0,67.0,67.0,72.0,72.0,72.0,72.0,72.0,72.0,2.081666,2.081666,2.081666,2.081666,2.081666,1.128403,1.128403,1.128403,1.128403,1.128403,0.598811,0.598811,0.598811,0.598811,0.598811,2.343569,2.343569,2.343569,2.343569,2.343569,0.403657,0.403657,0.403657,0.403657,0.403657,2.217548,2.217548,2.217548,2.217548,2.217548,2.738864,2.738864,2.738864,2.738864,2.738864,2.132878,2.132878,2.132878,2.132878,2.132878,0.09689,0.09689,0.09689,0.09689,0.09689,0.122894,0.122894,0.122894,0.122894,0.122894,0.096814,0.096814,0.096814,0.096814,0.096814,33,1,1,1,1,0,0,0,2024.0,2024.0,540.0,3.0,978.0,71.0,1.0,72.0,0.031,0.516,-4.054,0.538,-3.0,-2.969,298.7,0.692,0.556,0.636,71.347,52.236,0.0,4.0,2.0,24.0,46.0,11.0,0.0,0.0,0.0,0.0,0.0,15556,69.691147,"Hughes, Mackenzie",Texas Children's Houston Open
51,-0.1765,-0.1765,-0.1765,-0.1765,-0.1765,0.06,0.06,0.06,0.06,0.06,-0.209,-0.209,-0.209,-0.209,-0.209,0.114,0.114,0.114,0.114,0.114,-0.035,-0.035,-0.035,-0.035,-0.035,-0.212,-0.212,-0.212,-0.212,-0.212,287.0,287.0,287.0,287.0,287.0,0.7305,0.7305,0.7305,0.7305,0.7305,0.7225,0.7225,0.7225,0.7225,0.7225,0.5,0.5,0.5,0.5,0.5,70.0,70.0,70.0,70.0,70.0,70.0,67.0,67.0,67.0,67.0,67.0,67.0,73.0,73.0,73.0,73.0,73.0,73.0,4.242641,4.242641,4.242641,4.242641,4.242641,4.053843,4.053843,4.053843,4.053843,4.053843,0.847114,0.847114,0.847114,0.847114,0.847114,0.281428,0.281428,0.281428,0.281428,0.281428,0.131522,0.131522,0.131522,0.131522,0.131522,1.260064,1.260064,1.260064,1.260064,1.260064,5.3132,5.3132,5.3132,5.3132,5.3132,0.282843,0.282843,0.282843,0.282843,0.282843,0.054447,0.054447,0.054447,0.054447,0.054447,0.078489,0.078489,0.078489,0.078489,0.078489,0.236174,0.236174,0.236174,0.236174,0.236174,34,1,1,1,1,0,0,0,2024.0,2024.0,540.0,1.0,978.0,71.0,1.0,67.0,2.69,0.659,-0.01,0.207,0.856,3.545,286.8,0.769,0.778,0.667,23.275,47.221,3.0,3.0,2.0,22.0,13.0,13.9,0.0,0.0,0.0,1.0,0.0,19872,69.704132,"Riley, Davis",Texas Children's Houston Open
10,1.304389,1.27175,1.13275,0.908375,1.25525,0.543556,0.555188,0.498667,0.59725,0.86725,-0.341111,-0.214438,-0.153833,-0.27025,0.189,-0.945,-1.029,-1.026,-1.068,-0.92875,-0.742556,-0.688312,-0.681,-0.74075,0.128,0.5625,0.584188,0.45275,0.169375,1.38675,276.077778,275.8875,275.133333,272.9,269.35,0.5905,0.5795,0.594167,0.552125,0.53825,0.608111,0.625125,0.602,0.569625,0.5,0.762389,0.762375,0.76925,0.751125,0.79175,70.388889,70.388889,70.125,70.333333,71.125,70.0,67.0,67.0,67.0,67.0,67.0,67.0,76.0,76.0,76.0,76.0,76.0,74.0,2.523743,2.552776,2.870962,3.226564,2.94392,1.142644,1.211659,1.174222,1.122751,0.713934,1.271372,1.351582,1.539459,1.66253,2.013252,1.346678,1.37531,1.405932,1.68011,1.273855,0.975701,0.972742,0.738537,0.780707,0.655764,2.111561,2.23968,2.562348,2.833147,2.214358,2.069785,2.201702,2.377695,2.838507,2.909649,5.696496,6.028585,6.381127,6.761234,5.913544,0.139087,0.138644,0.108698,0.091163,0.062463,0.103525,0.096245,0.100114,0.106191,0.045724,0.114,0.112228,0.127044,0.133373,0.176046,5,1,1,1,1,1,1,1,2024.0,2024.0,475.0,3.0,665.0,71.0,1.0,74.0,0.196,-1.93,-0.782,-0.224,-2.936,-2.74,268.0,0.615,0.444,0.571,47.353,41.281,3.0,6.0,3.0,23.0,17.0,12.1,0.0,0.0,0.0,1.0,0.0,26096,69.74913,"Bhatia, Akshay",Texas Children's Houston Open
0,0.40745,0.462,0.29075,0.311875,-0.303,0.04315,0.129312,0.116083,0.1535,0.6305,0.56395,0.611688,0.316917,0.20025,1.44625,0.5741,0.531938,0.518167,0.9115,1.0075,1.181,1.27275,0.951,1.26525,3.08425,1.5904,1.737188,1.245,1.57725,2.7815,299.22,298.45625,299.8,299.4375,303.3,0.5927,0.598062,0.571333,0.607,0.625,0.68895,0.680625,0.643667,0.66675,0.72225,0.66105,0.675687,0.673083,0.671375,0.729,69.32,69.2,69.4375,70.0,70.25,68.5,63.0,65.0,65.0,67.0,67.0,67.0,77.0,74.0,74.0,74.0,74.0,73.0,2.764436,2.682505,2.662876,3.058945,3.0,1.733449,1.910128,1.990718,1.497965,1.95317,0.855199,0.918878,0.869267,0.909078,0.554302,1.47751,1.557038,1.421426,1.722972,0.360178,0.877588,0.94128,1.075991,0.799167,1.104664,2.160906,2.323271,2.582454,2.829624,0.578552,2.246929,2.325669,2.398074,2.62308,2.435411,7.407578,8.026703,5.023581,5.863187,3.495712,0.11837,0.122012,0.109866,0.085579,0.122183,0.115885,0.120778,0.109692,0.125988,0.045316,0.183202,0.169444,0.192885,0.209815,0.283733,12,1,1,1,1,1,0,0,2024.0,2024.0,11.0,3.0,11.0,72.0,1.0,67.0,0.361,0.136,1.595,1.195,2.926,3.288,305.8,0.643,0.667,0.75,47.536,26.208,4.0,2.0,3.0,16.0,8.0,12.0,0.0,0.0,1.0,1.0,0.0,16283,69.794151,"Rodgers, Patrick",Texas Children's Houston Open


# **^FIX THE DUPLICATE PLAYERS ABOVE^**

***

***

# Export

In [None]:
this_week_df.to_csv('this_week_df.csv', index=False)

In [None]:
np.savetxt("pds.csv", predicted_score, delimiter=",")

***

# Citations

- Documentation:
    - https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.set_sticky.html
    - https://towardsdatascience.com/10-useful-jupyter-notebook-extensions-for-a-data-scientist-bd4cb472c25e
- ChatGPT:
    - "Subtracting an integer from a date column in pandas based on conditions where we would use nested ifs"

***

# Archive Code

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['event_id', 'course_num', 'course_par', 'round_num', 'teetime_numeric', 'start_hole', 'fin_num', 'sg_putt', 'sg_arg', 'sg_app',
                                   'sg_ott', 'sg_t2g', 'sg_total', 'driving_dist', 'driving_acc', 'gir', 'scrambling', 'prox_rgh', 'prox_fw', 'great_shots', 'poor_shots',
                                   'L20_moving_avg', 'L10_moving_avg', 'L5_moving_avg']),
        ('binary', 'passthrough',['ohe_win', 'ohe_top_five', 'ohe_top_ten', 'ohe_top_twenty', 'ohe_make_cut'])
    ]
)


In [None]:
# data_features = df_sorted.drop(['dg_id','tour','year','event_name','course_name','player_name','round_completed','event_completed'], axis=1)
# X_train_processed = preprocessor.fit_transform(X_train)
# X_test_processed = preprocessor.transform(X_test)

In [None]:
# windows = [
#     ('last_30_days', 30, 1.0),  # (Name, Days in window, Weight)
#     ('last_90_days', 90, 0.3),
#     ('last_365_days', 365, 0.1),
#     ('older', float('inf'), 0)  # Use inf for older than 365 days
# ]

# # Initialize a column for weights
# df_sorted['weights'] = 0.25  # Default weight for older data

# for window_name, window_days, weight in windows[::-1]:
#     # Apply weights based on the days since
#     df_sorted.loc[df_sorted['Days_Since'] <= window_days, 'weights'] = weight


# ,'weights'
# weights = df_sorted['weights']
# , weights_train, weights_test, weights
# , sample_weight=weights_train)
# , sample_weight=weights_train))
# , sample_weight=weights_test))

In [None]:
# Add rolling total of rounds prior to the specific record in the dataframe