<div class="alert alert-danger">
    <h4 style="font-weight: bold; font-size: 28px;">Support Vector Machines with Basic Feature Set</h4>
    <h5 style="font-weight: bold; font-size: 24px;">Hyperparameter Tuning using Expanding Window</h5>
    <p style="font-size: 20px;">NBA API Seasons 2021-22 to 2023-24</p>
</div>

<a name="Models"></a>

# Table of Contents

[Setup](#Setup)

[Data](#Data)

[Inspect Expanding Training Window](#Inspect-Training-Windows)

**[1. Target: Total Points (over / under)](#1.-Target:-Total-Points-(over-/-under))**
  
**[2. Target: Difference in Points (plus / minus)](#2.-Target:-Difference-in-Points-(plus-/-minus))**

**[3. Target: Game Winner (moneyline)](#3.-Target:-Game-Winner-(moneyline))**

# Setup

[Return to top](#Models)

In [1]:
# basic modules
import os
import time
import random as rn
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from functools import reduce
import itertools
import json

# plotting style
plt.style.use('seaborn-v0_8-notebook')
sns.set_style('white')
#sns.set_style('darkgrid')

# pandas tricks for better display
pd.options.display.max_columns = 50  
pd.options.display.max_rows = 500     
pd.options.display.max_colwidth = 100
pd.options.display.precision = 3

# preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, PolynomialFeatures
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin

# sampling
from imblearn.over_sampling import RandomOverSampler, SMOTE, SMOTENC, ADASYN, BorderlineSMOTE
from imblearn.under_sampling import RandomUnderSampler, ClusterCentroids
 
# models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression, LogisticRegression, LogisticRegressionCV
from sklearn.svm import SVR, SVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import (
  BaggingClassifier, BaggingRegressor, RandomForestClassifier, RandomForestRegressor,
  AdaBoostClassifier, GradientBoostingClassifier
)
from xgboost import XGBClassifier, XGBRegressor
from sklearn.ensemble import StackingClassifier, StackingRegressor

# metrics & utilities 
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, GridSearchCV, StratifiedKFold, TimeSeriesSplit
from sklearn.metrics import (
  accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, roc_auc_score,  
  roc_curve, RocCurveDisplay, auc, average_precision_score, precision_recall_curve, 
  PrecisionRecallDisplay, precision_score, recall_score, f1_score, mean_squared_error
)
from sklearn.utils import resample, class_weight

# variable importance
import shap
from shap.explainers import Tree
from lime.lime_tabular import LimeTabularExplainer
from sklearn.inspection import permutation_importance

# warnings
import warnings
warnings.filterwarnings("ignore")

# user defined functions
import utility_functions as utl

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


# Data

[Return to top](#Models)

Data splits:

- Define NBA Season 2021-22 as the TRAINING set: regular season is 2021-10-19 to 2022-04-10. 
- Define NBA Season 2022-23 as the VALIDATION set: regular season is 2022-10-18 to 2023-04-09.
- Define NBA Season 2023-24 as the TESTING set: regular season is 2023-10-24 to 2024-04-14.

In [2]:
# load, filter (by time) and scale data
pts_scaled_df, pm_scaled_df, res_scaled_df = utl.load_and_scale_data(
    file_path='../data/processed/nba_team_matchups_rolling_box_scores_2022_2024_r05.csv',
    seasons_to_keep=['2021-22', '2022-23', '2023-24'], 
    training_season='2021-22',
    scaler_type='minmax', 
    scale_target=False
)

Season 2021-22: 1186 games
Season 2022-23: 1181 games
Season 2023-24: 692 games
Total number of games across sampled seasons: 3059 games


In [3]:
# define number of games in seasons
season_22_ngames = 1186
season_23_ngames = 1181

In [4]:
pts_scaled_df.head()

Unnamed: 0_level_0,ROLL_HOME_PTS,ROLL_HOME_FGM,ROLL_HOME_FGA,ROLL_HOME_FG_PCT,ROLL_HOME_FG3M,ROLL_HOME_FG3A,ROLL_HOME_FG3_PCT,ROLL_HOME_FTM,ROLL_HOME_FTA,ROLL_HOME_FT_PCT,ROLL_HOME_OREB,ROLL_HOME_DREB,ROLL_HOME_REB,ROLL_HOME_AST,ROLL_HOME_STL,ROLL_HOME_BLK,ROLL_HOME_TOV,ROLL_HOME_PF,ROLL_AWAY_PTS,ROLL_AWAY_FGM,ROLL_AWAY_FGA,ROLL_AWAY_FG_PCT,ROLL_AWAY_FG3M,ROLL_AWAY_FG3A,ROLL_AWAY_FG3_PCT,ROLL_AWAY_FTM,ROLL_AWAY_FTA,ROLL_AWAY_FT_PCT,ROLL_AWAY_OREB,ROLL_AWAY_DREB,ROLL_AWAY_REB,ROLL_AWAY_AST,ROLL_AWAY_STL,ROLL_AWAY_BLK,ROLL_AWAY_TOV,ROLL_AWAY_PF,TOTAL_PTS
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2021-10-23,124.0,42.0,87.0,0.483,16.0,38.0,0.421,24.0,31.0,0.774,13.0,33.0,46.0,26.0,18.0,12.0,18.0,22.0,112.0,42.0,87.0,0.483,15.0,29.0,0.517,13.0,16.0,0.813,9.0,33.0,42.0,26.0,7.0,5.0,16.0,20.0,185
2021-10-23,83.0,30.0,97.0,0.309,7.0,34.0,0.206,16.0,22.0,0.727,19.0,35.0,54.0,14.0,10.0,4.0,19.0,21.0,87.0,31.0,93.0,0.333,13.0,43.0,0.302,12.0,13.0,0.923,10.0,40.0,50.0,16.0,7.0,3.0,15.0,21.0,198
2021-10-23,121.0,45.0,93.0,0.484,12.0,35.0,0.343,19.0,22.0,0.864,9.0,40.0,49.0,25.0,5.0,5.0,12.0,22.0,115.0,42.0,86.0,0.488,10.0,32.0,0.313,21.0,28.0,0.75,7.0,40.0,47.0,31.0,8.0,2.0,11.0,22.0,239
2021-10-23,123.0,49.0,98.0,0.5,13.0,30.0,0.433,12.0,18.0,0.667,13.0,30.0,43.0,32.0,8.0,3.0,8.0,22.0,95.0,32.0,84.0,0.381,12.0,42.0,0.286,19.0,29.0,0.655,5.0,33.0,38.0,19.0,6.0,0.0,15.0,26.0,232
2021-10-24,124.0,48.0,95.0,0.505,17.0,38.0,0.447,11.0,14.0,0.786,10.0,44.0,54.0,29.0,12.0,10.0,17.0,18.0,134.0,48.0,117.0,0.41,21.0,57.0,0.368,17.0,23.0,0.739,15.0,41.0,56.0,34.0,13.0,9.0,18.0,24.0,204


In [5]:
pm_scaled_df.head()

Unnamed: 0_level_0,ROLL_HOME_PTS,ROLL_HOME_FGM,ROLL_HOME_FGA,ROLL_HOME_FG_PCT,ROLL_HOME_FG3M,ROLL_HOME_FG3A,ROLL_HOME_FG3_PCT,ROLL_HOME_FTM,ROLL_HOME_FTA,ROLL_HOME_FT_PCT,ROLL_HOME_OREB,ROLL_HOME_DREB,ROLL_HOME_REB,ROLL_HOME_AST,ROLL_HOME_STL,ROLL_HOME_BLK,ROLL_HOME_TOV,ROLL_HOME_PF,ROLL_AWAY_PTS,ROLL_AWAY_FGM,ROLL_AWAY_FGA,ROLL_AWAY_FG_PCT,ROLL_AWAY_FG3M,ROLL_AWAY_FG3A,ROLL_AWAY_FG3_PCT,ROLL_AWAY_FTM,ROLL_AWAY_FTA,ROLL_AWAY_FT_PCT,ROLL_AWAY_OREB,ROLL_AWAY_DREB,ROLL_AWAY_REB,ROLL_AWAY_AST,ROLL_AWAY_STL,ROLL_AWAY_BLK,ROLL_AWAY_TOV,ROLL_AWAY_PF,PLUS_MINUS
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2021-10-23,124.0,42.0,87.0,0.483,16.0,38.0,0.421,24.0,31.0,0.774,13.0,33.0,46.0,26.0,18.0,12.0,18.0,22.0,112.0,42.0,87.0,0.483,15.0,29.0,0.517,13.0,16.0,0.813,9.0,33.0,42.0,26.0,7.0,5.0,16.0,20.0,7.0
2021-10-23,83.0,30.0,97.0,0.309,7.0,34.0,0.206,16.0,22.0,0.727,19.0,35.0,54.0,14.0,10.0,4.0,19.0,21.0,87.0,31.0,93.0,0.333,13.0,43.0,0.302,12.0,13.0,0.923,10.0,40.0,50.0,16.0,7.0,3.0,15.0,21.0,-8.0
2021-10-23,121.0,45.0,93.0,0.484,12.0,35.0,0.343,19.0,22.0,0.864,9.0,40.0,49.0,25.0,5.0,5.0,12.0,22.0,115.0,42.0,86.0,0.488,10.0,32.0,0.313,21.0,28.0,0.75,7.0,40.0,47.0,31.0,8.0,2.0,11.0,22.0,29.0
2021-10-23,123.0,49.0,98.0,0.5,13.0,30.0,0.433,12.0,18.0,0.667,13.0,30.0,43.0,32.0,8.0,3.0,8.0,22.0,95.0,32.0,84.0,0.381,12.0,42.0,0.286,19.0,29.0,0.655,5.0,33.0,38.0,19.0,6.0,0.0,15.0,26.0,-10.0
2021-10-24,124.0,48.0,95.0,0.505,17.0,38.0,0.447,11.0,14.0,0.786,10.0,44.0,54.0,29.0,12.0,10.0,17.0,18.0,134.0,48.0,117.0,0.41,21.0,57.0,0.368,17.0,23.0,0.739,15.0,41.0,56.0,34.0,13.0,9.0,18.0,24.0,-10.0


In [6]:
res_scaled_df.head()

Unnamed: 0_level_0,ROLL_HOME_PTS,ROLL_HOME_FGM,ROLL_HOME_FGA,ROLL_HOME_FG_PCT,ROLL_HOME_FG3M,ROLL_HOME_FG3A,ROLL_HOME_FG3_PCT,ROLL_HOME_FTM,ROLL_HOME_FTA,ROLL_HOME_FT_PCT,ROLL_HOME_OREB,ROLL_HOME_DREB,ROLL_HOME_REB,ROLL_HOME_AST,ROLL_HOME_STL,ROLL_HOME_BLK,ROLL_HOME_TOV,ROLL_HOME_PF,ROLL_AWAY_PTS,ROLL_AWAY_FGM,ROLL_AWAY_FGA,ROLL_AWAY_FG_PCT,ROLL_AWAY_FG3M,ROLL_AWAY_FG3A,ROLL_AWAY_FG3_PCT,ROLL_AWAY_FTM,ROLL_AWAY_FTA,ROLL_AWAY_FT_PCT,ROLL_AWAY_OREB,ROLL_AWAY_DREB,ROLL_AWAY_REB,ROLL_AWAY_AST,ROLL_AWAY_STL,ROLL_AWAY_BLK,ROLL_AWAY_TOV,ROLL_AWAY_PF,GAME_RESULT
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2021-10-23,124.0,42.0,87.0,0.483,16.0,38.0,0.421,24.0,31.0,0.774,13.0,33.0,46.0,26.0,18.0,12.0,18.0,22.0,112.0,42.0,87.0,0.483,15.0,29.0,0.517,13.0,16.0,0.813,9.0,33.0,42.0,26.0,7.0,5.0,16.0,20.0,1
2021-10-23,83.0,30.0,97.0,0.309,7.0,34.0,0.206,16.0,22.0,0.727,19.0,35.0,54.0,14.0,10.0,4.0,19.0,21.0,87.0,31.0,93.0,0.333,13.0,43.0,0.302,12.0,13.0,0.923,10.0,40.0,50.0,16.0,7.0,3.0,15.0,21.0,0
2021-10-23,121.0,45.0,93.0,0.484,12.0,35.0,0.343,19.0,22.0,0.864,9.0,40.0,49.0,25.0,5.0,5.0,12.0,22.0,115.0,42.0,86.0,0.488,10.0,32.0,0.313,21.0,28.0,0.75,7.0,40.0,47.0,31.0,8.0,2.0,11.0,22.0,1
2021-10-23,123.0,49.0,98.0,0.5,13.0,30.0,0.433,12.0,18.0,0.667,13.0,30.0,43.0,32.0,8.0,3.0,8.0,22.0,95.0,32.0,84.0,0.381,12.0,42.0,0.286,19.0,29.0,0.655,5.0,33.0,38.0,19.0,6.0,0.0,15.0,26.0,0
2021-10-24,124.0,48.0,95.0,0.505,17.0,38.0,0.447,11.0,14.0,0.786,10.0,44.0,54.0,29.0,12.0,10.0,17.0,18.0,134.0,48.0,117.0,0.41,21.0,57.0,0.368,17.0,23.0,0.739,15.0,41.0,56.0,34.0,13.0,9.0,18.0,24.0,0


# Inspect Expanding Training Window

[Return to top](#Models)

In [7]:
# expanding window configuration
initial_train_size = 10  # starting size of the training set
test_size = 1            # leave-one-out (LOO) cross-validation

counter = 0
max_splits_to_show = 15

# show first few splits
for train_indices, test_indices in utl.expanding_window_ts_split(pts_scaled_df, initial_train_size, test_size=test_size):
    print("TRAIN:", train_indices, "TEST:", test_indices)
    counter += 1
    if counter >= max_splits_to_show:
        break

TRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10] TEST: [11]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11] TEST: [12]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12] TEST: [13]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13] TEST: [14]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] TEST: [15]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15] TEST: [16]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16] TEST: [17]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17] TEST: [18]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18] TEST: [19]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19] TEST: [20]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20] TEST: [21]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21] TEST: [22]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22] TEST: [23]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9

<a name="1.-Target:-Total-Points-(over-/-under)"></a>
# 1. Target: Total Points (over / under)

[Return to top](#Models)

In [8]:
# configuration for expanding window
results = utl.train_models_over_grid(
    df=pts_scaled_df, # data set to use
    target_col='TOTAL_PTS', # target column name
    initial_train_size=season_22_ngames, # starting size of the training set
    expansion_limit=50, # maximum number of new training observations in expansion
    test_size=1, # leave-one-out (LOO) cross-validation
    model_class=SVR, # model class
    constant_params={
        'gamma': 'scale',
        'coef0': 0.0,
        'shrinking': True
    },
    explore_params={
        'kernel': ['linear', 'rbf'],       # tried: 'linear', 'rbf',
        'epsilon': [0.01, 0.1, 0.5, 1.0],  # tried: 0.01, 0.1, 0.5, 1.0
        'C': [0.1, 1.0, 10, 100]           # tried: 0.01, 0.1, 1.0, 10, 100, 1000
    }
)

Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 0.1}
Total time taken: 2.38 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 1.0}
Total time taken: 6.86 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 10}
Total time taken: 52.52 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 100}
Total time taken: 416.66 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 0.1}
Total time taken: 2.63 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 1.0}
Total time taken: 7.62 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 10}
Total time taken: 54.35 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 100}
Total time taken: 416.33 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.5, 'C': 0.1}
Total time taken: 2.61 seconds
Parameters 

In [9]:
# get metrics for each combination of parameter values
results_df = utl.compile_results_to_dataframe(results)

# print best hyperparameter settings
results_df.sort_values(by='average_rmse', ascending=True).head()

Unnamed: 0,run_id,C,average_rmse,epsilon,kernel
15,run_15,100.0,16.999,1.0,linear
11,run_11,100.0,17.026,0.5,linear
7,run_7,100.0,17.131,0.1,linear
3,run_3,100.0,17.165,0.01,linear
1,run_1,1.0,17.179,0.01,linear


In [10]:
# get best parameters from validation as dictionary
best_params = utl.get_best_params(results_df, metric='average_rmse')

# save the dictionary to a file
with open('../hyperparameters/SVR_pts_best_params.json', 'w') as json_file:
    json.dump(best_params, json_file, default=utl.handle_non_serializable, indent=4)

<a name="2.-Target:-Difference-in-Points-(plus-/-minus)"></a>
# 2. Target: Difference in Points (plus / minus)

[Return to top](#Models)

In [11]:
# configuration for expanding window
results = utl.train_models_over_grid(
    df=pm_scaled_df, # data set to use
    target_col='PLUS_MINUS', # target column name
    initial_train_size=season_22_ngames, # starting size of the training set
    expansion_limit=50, # maximum number of new training observations in expansion
    test_size=1, # leave-one-out (LOO) cross-validation
    model_class=SVR, # model class
    constant_params={
        'gamma': 'scale',
        'coef0': 0.0,
        'shrinking': True
    },
    explore_params={
        'kernel': ['linear', 'rbf'],       # tried: 'linear', 'rbf',
        'epsilon': [0.01, 0.1, 0.5, 1.0],  # tried: 0.01, 0.1, 0.5, 1.0
        'C': [0.1, 1.0, 10, 100]           # tried: 0.01, 0.1, 1.0, 10, 100, 1000
    }
)

Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 0.1}
Total time taken: 906.50 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 1.0}
Total time taken: 911.83 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 10}
Total time taken: 9135.86 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.01, 'C': 100}
Total time taken: 22557.18 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 0.1}
Total time taken: 2.33 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 1.0}
Total time taken: 7.27 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 10}
Total time taken: 50.38 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.1, 'C': 100}
Total time taken: 445.94 seconds
Parameters currently explored: {'kernel': 'linear', 'epsilon': 0.5, 'C': 0.1}
Total time taken: 2.40 seconds
Par

In [12]:
# get metrics for each combination of parameter values
results_df = utl.compile_results_to_dataframe(results)

# print best hyperparameter settings
results_df.sort_values(by='average_rmse', ascending=True).head()

Unnamed: 0,run_id,C,average_rmse,epsilon,kernel
26,run_26,10.0,12.122,0.5,rbf
30,run_30,10.0,12.132,1.0,rbf
18,run_18,10.0,12.147,0.01,rbf
22,run_22,10.0,12.148,0.1,rbf
31,run_31,100.0,12.183,1.0,rbf


In [13]:
# get best parameters from validation as dictionary
best_params = utl.get_best_params(results_df, metric='average_rmse')

# save the dictionary to a file
with open('../hyperparameters/SVR_pm_best_params.json', 'w') as json_file:
    json.dump(best_params, json_file, default=utl.handle_non_serializable, indent=4)

<a name="3.-Target:-Game-Winner-(moneyline)"></a>
# 3. Target: Game Winner (moneyline)

[Return to top](#Models)

In [None]:
# configuration for expanding window
results = utl.train_models_over_grid(
    df=res_scaled_df, # data set to use
    target_col='GAME_RESULT', # target column name
    initial_train_size=season_22_ngames, # starting size of the training set
    expansion_limit=50, # maximum number of new training observations in expansion
    test_size=1, # leave-one-out (LOO) cross-validation
    model_class=SVC, # model class
    constant_params={
        'random_state': 599,
        'gamma': 'scale',
        'coef0': 0.0,
        'shrinking': True,
        'probability': True,
        'class_weight': None
    },
    explore_params={
        'kernel': ['linear', 'rbf'],       # tried: 'linear', 'rbf',
        'C': [0.1, 1.0, 10, 100]           # tried: 0.01, 0.1, 1.0, 10, 100, 1000
    }
)

Parameters currently explored: {'kernel': 'linear', 'C': 0.1}
Total time taken: 15.47 seconds
Parameters currently explored: {'kernel': 'linear', 'C': 1.0}
Total time taken: 80.09 seconds
Parameters currently explored: {'kernel': 'linear', 'C': 10}
Total time taken: 683.58 seconds
Parameters currently explored: {'kernel': 'linear', 'C': 100}


In [None]:
# get metrics for each combination of parameter values
results_df = utl.compile_results_to_dataframe(results)

# print best hyperparameter settings
results_df.sort_values(by='average_accuracy', ascending=False).head()

In [None]:
# get best parameters from validation as dictionary
best_params = utl.get_best_params(results_df, metric='average_accuracy')

# save the dictionary to a file
with open('../hyperparameters/SVC_res_best_params.json', 'w') as json_file:
    json.dump(best_params, json_file, default=utl.handle_non_serializable, indent=4)