Lastly, we'll run our predictions. We want to run predictions for all punt types since we don't know the punt type we'll be going with at the beginning of the draft. After the first couple of picks, we'll have a better idea of which stats we want to tank and thus, we can refer to the dataset correspondingly.

In [1]:
import os
import utils
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb

from bayes_opt import BayesianOptimization
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook as tqdm
from xgboost import XGBRegressor

from constants import DATA_DIR
from constants import PUNT_TYPES
warnings.filterwarnings('ignore')

First, we'll refit the model again using the optimal parameter found before on the ABT. Next we'll come up with predicted value using the past season's data and store it in a new folder. This is done for each punt type.

In [2]:
# Read fitting data with parameters, Create datasets
weight = 'quad'
season = '2020-21'
for punt in PUNT_TYPES:
    # fit the model
    if punt == []:
        punt_name = 'Base'
    else:
        punt_name = '+'.join(punt)
    train = pd.read_csv(os.path.join(DATA_DIR,'ABT',punt_name+'.csv'))
    X = train.loc[:, train.columns != 'VALUE']
    y = train['VALUE'].values.reshape(-1,1).flatten()
    lasso = Lasso(alpha=1e-3)
    lasso.fit(X, y)
    
    # get prediction data
    merged = utils.csv_concatenate(os.path.join(DATA_DIR,punt_name,'Value'))
    merged.sort_values(by=['SEASON'], inplace=True)
    pred_data = utils.weigh_data(weight, season, merged, True)
    pred_data.set_index('PLAYER',inplace=True)
    pred_y = pred_data['VALUE']
    pred_data.drop(columns=['TEAM','SEASON','VALUE'], inplace=True)
    pred_data = pd.get_dummies(pred_data)
    pred = lasso.predict(pred_data)
    
    #dataset created
    pred_data['PRED'] = pred
    pred_data.to_csv(os.path.join(DATA_DIR,'PRED',punt_name+'.csv'))

HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




After this our work is essentially done! We've figured out how we can optimize our drafting based on which category we want to punt and trained our models to predict our scores within ~1 RMSE. Next we can just include some helper functions that will aide us on drafting day.

In [3]:
def which_punt(player_list):
    rows = []
    for punt in PUNT_TYPES:
        if punt == []:
            punt_name = 'Base'
        else:
            punt_name = '+'.join(punt)
        
        df = pd.read_csv(os.path.join(DATA_DIR,'PRED',punt_name+'.csv')).set_index('PLAYER')
        stats=[punt_name]
        for player in player_list:
            stats.append(df.loc[player,'PRED'])
        rows.append(stats)
    columns = ['Punt']
    for player in player_list:
        columns.append(player)
    rank = pd.DataFrame(rows,columns=columns)
    rank['Sum'] = rank.sum(1)
    rank.sort_values(by='Sum', ascending=False, inplace=True)
    rank.set_index('Punt', inplace=True)
    return rank

player_list = ['James Harden','Bradley Beal','Fred VanVleet']
which_punt(player_list)

Unnamed: 0_level_0,James Harden,Bradley Beal,Fred VanVleet,Sum
Punt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Base,14.317128,10.650868,10.872908,35.840904
FG%,11.760281,7.718878,10.196662,29.675821
BLK+FG%,10.751292,7.658457,9.748664,28.158412
BLK,10.788879,7.943299,9.311119,28.043297
FG%+TRB,10.197947,7.278019,10.006377,27.482344
TRB,10.237596,7.571286,9.568914,27.377796
FT%,11.079537,7.223532,8.872134,27.175202
STL,9.954248,6.624912,7.203903,23.783064
PTS,8.671526,4.252741,8.097697,21.021964
AST,7.441505,6.214798,7.269583,20.925886
