In [1]:
import os
import numpy as np
import pandas as pd
import pickle

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import ShuffleSplit, GridSearchCV

pd.set_option("display.max_rows", None, "display.max_columns", None)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Gradient Boosting
In this notebook, we will be using Gradient Boosting in order to try and predict individual games. We will be using the testing and training data that was generated from the `training-data` notebook to train our model.
## Getting the data ready

In [2]:
path = os.path.abspath(f'../../data/training_set.csv')
training_df = pd.read_csv(path)
training_df['Loss'] = 1 - training_df['Win']
training_df.head()

Unnamed: 0,FG_SMA,FG_CMA,FG_EMA,FGA_SMA,FGA_CMA,FGA_EMA,FG%_SMA,FG%_CMA,FG%_EMA,3P_SMA,3P_CMA,3P_EMA,3PA_SMA,3PA_CMA,3PA_EMA,3P%_SMA,3P%_CMA,3P%_EMA,FT_SMA,FT_CMA,FT_EMA,FTA_SMA,FTA_CMA,FTA_EMA,FT%_SMA,FT%_CMA,FT%_EMA,ORB_SMA,ORB_CMA,ORB_EMA,TRB_SMA,TRB_CMA,TRB_EMA,AST_SMA,AST_CMA,AST_EMA,STL_SMA,STL_CMA,STL_EMA,BLK_SMA,BLK_CMA,BLK_EMA,TOV_SMA,TOV_CMA,TOV_EMA,PF_SMA,PF_CMA,PF_EMA,ORtg_SMA,ORtg_CMA,ORtg_EMA,DRtg_SMA,DRtg_CMA,DRtg_EMA,Pace_SMA,Pace_CMA,Pace_EMA,FTr_SMA,FTr_CMA,FTr_EMA,3PAr_SMA,3PAr_CMA,3PAr_EMA,TS%_SMA,TS%_CMA,TS%_EMA,TRB%_SMA,TRB%_CMA,TRB%_EMA,AST%_SMA,AST%_CMA,AST%_EMA,STL%_SMA,STL%_CMA,STL%_EMA,BLK%_SMA,BLK%_CMA,BLK%_EMA,eFG%_SMA,eFG%_CMA,eFG%_EMA,TOV%_SMA,TOV%_CMA,TOV%_EMA,ORB%_SMA,ORB%_CMA,ORB%_EMA,FT/FGA_SMA,FT/FGA_CMA,FT/FGA_EMA,opp_FG_SMA,opp_FG_CMA,opp_FG_EMA,opp_FGA_SMA,opp_FGA_CMA,opp_FGA_EMA,opp_FG%_SMA,opp_FG%_CMA,opp_FG%_EMA,opp_3P_SMA,opp_3P_CMA,opp_3P_EMA,opp_3PA_SMA,opp_3PA_CMA,opp_3PA_EMA,opp_3P%_SMA,opp_3P%_CMA,opp_3P%_EMA,opp_FT_SMA,opp_FT_CMA,opp_FT_EMA,opp_FTA_SMA,opp_FTA_CMA,opp_FTA_EMA,opp_FT%_SMA,opp_FT%_CMA,opp_FT%_EMA,opp_ORB_SMA,opp_ORB_CMA,opp_ORB_EMA,opp_TRB_SMA,opp_TRB_CMA,opp_TRB_EMA,opp_AST_SMA,opp_AST_CMA,opp_AST_EMA,opp_STL_SMA,opp_STL_CMA,opp_STL_EMA,opp_BLK_SMA,opp_BLK_CMA,opp_BLK_EMA,opp_TOV_SMA,opp_TOV_CMA,opp_TOV_EMA,opp_PF_SMA,opp_PF_CMA,opp_PF_EMA,opp_ORtg_SMA,opp_ORtg_CMA,opp_ORtg_EMA,opp_DRtg_SMA,opp_DRtg_CMA,opp_DRtg_EMA,opp_Pace_SMA,opp_Pace_CMA,opp_Pace_EMA,opp_FTr_SMA,opp_FTr_CMA,opp_FTr_EMA,opp_3PAr_SMA,opp_3PAr_CMA,opp_3PAr_EMA,opp_TS%_SMA,opp_TS%_CMA,opp_TS%_EMA,opp_TRB%_SMA,opp_TRB%_CMA,opp_TRB%_EMA,opp_AST%_SMA,opp_AST%_CMA,opp_AST%_EMA,opp_STL%_SMA,opp_STL%_CMA,opp_STL%_EMA,opp_BLK%_SMA,opp_BLK%_CMA,opp_BLK%_EMA,opp_eFG%_SMA,opp_eFG%_CMA,opp_eFG%_EMA,opp_TOV%_SMA,opp_TOV%_CMA,opp_TOV%_EMA,opp_ORB%_SMA,opp_ORB%_CMA,opp_ORB%_EMA,opp_FT/FGA_SMA,opp_FT/FGA_CMA,opp_FT/FGA_EMA,Neutral,Win,Loss
0,22.8,24.653846,23.211004,54.4,56.038462,55.951135,0.4192,0.440962,0.414875,5.0,6.307692,4.723021,17.4,20.076923,17.688046,0.2916,0.318923,0.265432,15.4,13.615385,12.57172,20.6,18.807692,16.287396,0.7516,0.718615,0.756023,7.8,7.423077,7.630525,30.6,29.538462,29.642729,12.0,12.307692,11.694995,5.6,6.923077,6.909948,3.2,4.230769,3.675965,12.0,11.384615,12.212031,20.2,16.769231,19.323787,96.86,100.507692,93.371986,104.72,101.753846,101.552341,68.06,68.665385,67.74213,0.3852,0.342192,0.297102,0.3228,0.355769,0.318668,0.5124,0.5335,0.498947,48.3,47.223077,47.41648,53.1,50.046154,50.683134,8.2,10.0,10.137945,8.38,11.403846,10.344138,0.4658,0.497,0.457629,15.76,14.819231,16.077612,25.74,23.003846,24.421789,0.2866,0.249,0.228649,21.4,21.538462,21.057987,54.8,52.538462,54.855677,0.391,0.409423,0.384831,8.2,8.076923,8.884101,24.2,24.846154,25.275189,0.336,0.321192,0.350582,10.4,11.846154,11.14838,14.6,15.884615,14.613588,0.7082,0.739269,0.753357,6.8,6.115385,7.383697,31.4,30.653846,32.536725,11.8,11.653846,11.233467,3.4,4.192308,3.474109,2.8,2.346154,2.342764,11.6,11.346154,13.049714,15.0,14.653846,15.671923,92.98,96.769231,93.931371,102.02,104.4,99.376988,65.82,65.4,66.126001,0.2672,0.316538,0.267147,0.4408,0.468308,0.459948,0.4962,0.523346,0.502494,47.46,49.119231,49.264062,55.6,53.553846,53.411235,5.08,6.373077,5.167442,6.86,7.053846,6.029065,0.4656,0.484962,0.465742,15.52,15.811538,17.140137,22.4,20.638462,24.324966,0.19,0.236269,0.203534,0,1,0
1,26.0,25.363636,24.750136,58.6,57.0,56.395143,0.4496,0.446727,0.442387,4.0,5.727273,4.45734,15.0,18.681818,15.916814,0.2764,0.3105,0.28243,17.2,14.590909,17.632687,22.0,19.681818,21.997752,0.7602,0.735909,0.786252,11.4,8.272727,10.136214,34.2,32.5,32.159791,11.0,11.681818,10.272923,5.8,5.5,6.499974,3.0,2.909091,2.81615,10.6,10.863636,11.167679,16.4,15.454545,15.446714,108.5,103.040909,106.421923,105.94,101.304545,105.870044,67.44,68.240909,67.298025,0.3826,0.354,0.396063,0.259,0.328182,0.28408,0.536,0.537273,0.539165,53.86,50.554545,51.135159,42.5,45.390909,41.533877,8.56,7.936364,9.626756,10.6,8.509091,8.985101,0.4846,0.4975,0.482619,13.44,14.113636,14.382758,34.7,25.840909,31.393061,0.3014,0.264045,0.319346,27.8,26.869565,27.521328,60.0,58.217391,60.888683,0.4662,0.462348,0.454192,9.8,9.826087,7.595725,26.0,25.0,23.311761,0.3486,0.383913,0.288509,8.2,10.521739,7.798636,11.2,14.869565,11.644625,0.773,0.727348,0.72052,7.8,6.826087,8.042637,32.2,29.043478,32.869443,14.8,15.26087,13.426693,5.0,7.26087,5.431505,3.6,3.478261,3.540045,10.0,11.391304,10.48864,15.6,16.0,14.994876,108.82,106.2,102.699579,105.02,100.552174,103.770446,67.9,69.226087,68.827918,0.1854,0.254087,0.189748,0.4412,0.430783,0.38929,0.5694,0.568565,0.535933,50.22,47.143478,51.452466,52.96,57.156522,48.568758,7.34,10.417391,7.870382,7.2,9.23913,7.71239,0.5504,0.547261,0.518834,13.4,14.778261,13.688026,24.02,22.078261,24.345978,0.1374,0.180522,0.128987,0,1,0
2,26.4,28.25,25.671618,57.8,63.1,56.427805,0.4536,0.44545,0.45112,6.4,6.65,5.819351,20.6,20.4,20.03366,0.3054,0.32185,0.288661,9.0,9.15,9.090603,14.2,13.95,15.245384,0.5978,0.63775,0.577958,8.4,10.15,8.553029,28.0,32.75,28.593819,15.6,16.05,14.707068,7.4,7.35,7.326814,5.2,4.05,5.228792,13.0,12.85,13.141423,14.2,14.55,13.786065,97.5,99.43,95.954924,107.38,102.715,104.121329,69.56,71.955,68.71699,0.2554,0.2271,0.284286,0.3574,0.3214,0.358103,0.5236,0.51495,0.516993,46.38,50.19,46.864038,57.42,54.915,56.400907,10.56,10.125,10.660785,13.74,10.635,14.307917,0.508,0.49725,0.502925,16.86,15.71,17.228947,27.14,30.19,27.231699,0.1584,0.14765,0.165701,26.0,25.454545,25.679242,57.4,55.090909,57.564619,0.4572,0.464636,0.450118,8.2,8.545455,7.799799,21.4,22.818182,21.633975,0.3872,0.377591,0.365943,13.2,11.454545,11.706824,18.6,17.318182,17.203915,0.7188,0.644227,0.701938,8.8,9.272727,8.568962,34.8,34.318182,34.254804,15.8,16.181818,14.151697,5.0,5.954545,4.26975,5.8,5.772727,5.840287,11.2,13.681818,12.204707,17.8,14.727273,18.470674,105.8,104.645455,102.108583,100.98,95.209091,100.397883,67.8,67.559091,67.625642,0.3282,0.329273,0.302344,0.3732,0.417636,0.375464,0.5586,0.563136,0.54331,51.72,54.690909,51.240622,60.86,63.813636,55.153792,7.18,8.763636,6.169933,14.94,14.968182,14.708419,0.5294,0.544273,0.51876,14.5,17.772727,15.755138,26.5,30.259091,25.897915,0.2354,0.220864,0.207807,0,0,1
3,24.6,25.666667,24.534714,52.2,54.0,52.713938,0.4716,0.471333,0.464322,9.8,7.666667,9.373407,23.4,19.833333,24.219688,0.4112,0.379583,0.38074,12.8,13.25,13.450327,19.6,19.75,20.433324,0.6826,0.681417,0.683046,7.0,6.5,7.127945,27.0,29.666667,29.147917,14.6,13.083333,14.835013,6.0,5.583333,6.032555,1.4,2.416667,1.462046,13.8,14.583333,12.952757,19.0,19.583333,20.080306,105.38,100.925,105.583149,101.46,99.975,97.257504,68.54,70.741667,68.297726,0.3782,0.371417,0.38958,0.449,0.370167,0.461446,0.5872,0.568833,0.578359,46.9,48.141667,49.164642,58.1,49.541667,59.472853,8.8,7.758333,8.803756,4.1,6.333333,4.202775,0.5662,0.542833,0.554087,18.1,18.691667,16.936403,23.86,22.291667,24.287561,0.2466,0.249083,0.256014,26.2,25.928571,27.202928,56.6,58.857143,59.122634,0.4634,0.440357,0.460755,9.2,9.5,9.989433,25.8,24.785714,27.224526,0.3538,0.379286,0.365979,10.8,11.071429,10.378299,14.6,14.857143,13.676782,0.727,0.7465,0.767052,6.2,6.857143,6.908517,27.0,29.857143,28.080932,17.2,15.714286,18.015524,8.8,6.857143,8.948778,5.4,4.357143,5.081276,18.6,17.5,18.342378,19.4,17.357143,18.155262,96.16,94.407143,97.725044,103.5,108.471429,105.612118,75.26,75.585714,76.090106,0.2588,0.249857,0.229168,0.4548,0.4195,0.460251,0.5706,0.547571,0.569564,45.38,46.114286,46.860574,64.5,59.807143,65.736169,11.76,9.042857,11.850792,13.44,10.992857,12.783031,0.544,0.520357,0.544734,22.7,21.035714,21.910853,21.3,21.192857,23.197858,0.1908,0.185571,0.173572,0,0,1
4,26.8,27.0,26.409965,60.8,59.692308,60.45676,0.44,0.447385,0.435041,7.4,7.615385,7.106061,19.8,21.307692,19.692654,0.363,0.344846,0.351888,10.0,11.461538,10.878916,14.0,16.230769,15.448029,0.7066,0.699462,0.692208,8.2,8.076923,8.90836,29.6,30.769231,30.143741,12.4,14.076923,11.303985,3.8,4.615385,4.36007,1.6,1.538462,1.538016,10.0,11.769231,10.60494,15.0,16.769231,15.611678,103.9,102.5,103.150813,121.06,109.784615,119.034924,68.42,70.946154,68.618108,0.2294,0.277385,0.257188,0.325,0.358923,0.326383,0.5242,0.537846,0.520239,45.62,48.176923,47.158431,45.54,51.853846,42.40302,5.58,6.446154,6.386942,4.08,3.746154,4.02533,0.5006,0.510077,0.493547,12.92,14.976923,13.56662,24.86,25.507692,27.21401,0.164,0.196385,0.181446,24.4,24.0,24.395855,63.6,58.928571,61.510901,0.3854,0.410214,0.398335,6.8,7.785714,7.628808,22.4,23.714286,23.4773,0.3066,0.338214,0.328224,7.2,7.571429,6.257677,9.4,10.642857,8.428689,0.7242,0.700214,0.695112,10.6,7.857143,9.597987,32.2,28.571429,29.879917,10.6,12.142857,12.80856,7.0,7.214286,7.066411,3.0,3.785714,3.534328,11.0,11.642857,12.163889,14.2,13.714286,13.984295,92.18,93.292857,92.539726,103.42,106.842857,109.30528,67.88,67.714286,67.44857,0.1486,0.183643,0.137646,0.3524,0.404786,0.383154,0.4628,0.498,0.479864,47.6,46.442857,46.858256,43.14,50.171429,51.873574,10.3,10.628571,10.451685,8.86,11.35,10.819778,0.4392,0.477643,0.460859,13.92,15.478571,15.672046,28.72,23.4,27.554073,0.1138,0.129357,0.101445,0,1,0


In [3]:
path = os.path.abspath(f'../../data/testing_set.csv')
testing_df = pd.read_csv(path)
testing_df['Loss'] = 1 - testing_df['Win']
testing_df.head()

Unnamed: 0,FG_SMA,FG_CMA,FG_EMA,FGA_SMA,FGA_CMA,FGA_EMA,FG%_SMA,FG%_CMA,FG%_EMA,3P_SMA,3P_CMA,3P_EMA,3PA_SMA,3PA_CMA,3PA_EMA,3P%_SMA,3P%_CMA,3P%_EMA,FT_SMA,FT_CMA,FT_EMA,FTA_SMA,FTA_CMA,FTA_EMA,FT%_SMA,FT%_CMA,FT%_EMA,ORB_SMA,ORB_CMA,ORB_EMA,TRB_SMA,TRB_CMA,TRB_EMA,AST_SMA,AST_CMA,AST_EMA,STL_SMA,STL_CMA,STL_EMA,BLK_SMA,BLK_CMA,BLK_EMA,TOV_SMA,TOV_CMA,TOV_EMA,PF_SMA,PF_CMA,PF_EMA,ORtg_SMA,ORtg_CMA,ORtg_EMA,DRtg_SMA,DRtg_CMA,DRtg_EMA,Pace_SMA,Pace_CMA,Pace_EMA,FTr_SMA,FTr_CMA,FTr_EMA,3PAr_SMA,3PAr_CMA,3PAr_EMA,TS%_SMA,TS%_CMA,TS%_EMA,TRB%_SMA,TRB%_CMA,TRB%_EMA,AST%_SMA,AST%_CMA,AST%_EMA,STL%_SMA,STL%_CMA,STL%_EMA,BLK%_SMA,BLK%_CMA,BLK%_EMA,eFG%_SMA,eFG%_CMA,eFG%_EMA,TOV%_SMA,TOV%_CMA,TOV%_EMA,ORB%_SMA,ORB%_CMA,ORB%_EMA,FT/FGA_SMA,FT/FGA_CMA,FT/FGA_EMA,opp_FG_SMA,opp_FG_CMA,opp_FG_EMA,opp_FGA_SMA,opp_FGA_CMA,opp_FGA_EMA,opp_FG%_SMA,opp_FG%_CMA,opp_FG%_EMA,opp_3P_SMA,opp_3P_CMA,opp_3P_EMA,opp_3PA_SMA,opp_3PA_CMA,opp_3PA_EMA,opp_3P%_SMA,opp_3P%_CMA,opp_3P%_EMA,opp_FT_SMA,opp_FT_CMA,opp_FT_EMA,opp_FTA_SMA,opp_FTA_CMA,opp_FTA_EMA,opp_FT%_SMA,opp_FT%_CMA,opp_FT%_EMA,opp_ORB_SMA,opp_ORB_CMA,opp_ORB_EMA,opp_TRB_SMA,opp_TRB_CMA,opp_TRB_EMA,opp_AST_SMA,opp_AST_CMA,opp_AST_EMA,opp_STL_SMA,opp_STL_CMA,opp_STL_EMA,opp_BLK_SMA,opp_BLK_CMA,opp_BLK_EMA,opp_TOV_SMA,opp_TOV_CMA,opp_TOV_EMA,opp_PF_SMA,opp_PF_CMA,opp_PF_EMA,opp_ORtg_SMA,opp_ORtg_CMA,opp_ORtg_EMA,opp_DRtg_SMA,opp_DRtg_CMA,opp_DRtg_EMA,opp_Pace_SMA,opp_Pace_CMA,opp_Pace_EMA,opp_FTr_SMA,opp_FTr_CMA,opp_FTr_EMA,opp_3PAr_SMA,opp_3PAr_CMA,opp_3PAr_EMA,opp_TS%_SMA,opp_TS%_CMA,opp_TS%_EMA,opp_TRB%_SMA,opp_TRB%_CMA,opp_TRB%_EMA,opp_AST%_SMA,opp_AST%_CMA,opp_AST%_EMA,opp_STL%_SMA,opp_STL%_CMA,opp_STL%_EMA,opp_BLK%_SMA,opp_BLK%_CMA,opp_BLK%_EMA,opp_eFG%_SMA,opp_eFG%_CMA,opp_eFG%_EMA,opp_TOV%_SMA,opp_TOV%_CMA,opp_TOV%_EMA,opp_ORB%_SMA,opp_ORB%_CMA,opp_ORB%_EMA,opp_FT/FGA_SMA,opp_FT/FGA_CMA,opp_FT/FGA_EMA,Neutral,Win,Loss
0,24.6,25.0,26.295223,57.2,58.588235,59.950459,0.4262,0.427588,0.434373,7.4,6.588235,8.204161,23.6,22.176471,24.070312,0.3084,0.299059,0.337075,14.0,12.647059,13.172682,20.8,19.235294,19.369426,0.6766,0.650882,0.685984,8.0,8.705882,8.663763,33.0,32.0,33.494042,15.2,15.058824,16.393277,6.0,6.529412,7.253464,4.0,3.529412,3.867263,12.8,13.588235,12.652723,18.6,19.529412,18.925827,99.6,96.376471,102.177531,103.68,99.011765,100.365633,71.06,72.029412,72.268344,0.3742,0.336706,0.33305,0.4134,0.378235,0.402471,0.5246,0.512118,0.532588,51.98,51.194118,53.618324,61.28,59.847059,61.491816,8.3,8.976471,9.928151,12.04,10.111765,12.09883,0.4896,0.483588,0.501728,16.1,16.652941,15.522065,25.48,26.558824,27.283188,0.251,0.221882,0.22488,25.6,26.263158,24.161796,54.6,59.578947,54.535112,0.465,0.441,0.440586,7.6,7.368421,6.938694,19.8,20.789474,19.170009,0.364,0.353421,0.34689,13.8,12.157895,14.134105,21.6,19.684211,23.175522,0.6444,0.626,0.617173,9.4,10.947368,10.232045,34.4,36.736842,37.302457,16.4,16.052632,15.467661,11.8,9.263158,10.163377,4.6,5.315789,5.158201,19.2,16.263158,19.961327,19.2,19.315789,19.325415,97.32,97.121053,92.661265,81.94,84.326316,76.493108,74.54,74.047368,75.133121,0.4,0.335579,0.429435,0.3626,0.350158,0.351612,0.5588,0.523474,0.528957,56.72,55.089474,58.697837,65.74,61.347368,65.274972,15.62,12.384211,13.507466,12.94,15.321053,14.292547,0.534,0.503316,0.503701,22.7,19.210526,23.150352,32.98,32.4,33.972093,0.2552,0.206421,0.261465,0,0,1
1,25.4,25.25,26.894015,54.8,55.916667,55.720876,0.462,0.454417,0.480009,7.4,6.25,7.206811,22.2,19.0,20.849769,0.3374,0.326417,0.345893,14.2,14.5,15.147651,17.4,19.0,18.813979,0.8158,0.7615,0.804623,5.4,7.166667,5.983827,29.0,31.083333,31.540353,13.6,11.25,14.659644,8.0,7.166667,7.526371,5.8,6.166667,5.91345,12.8,13.0,12.254156,16.0,16.416667,14.752449,104.66,101.841667,108.833516,95.44,92.383333,91.727737,69.4,69.533333,69.843951,0.328,0.349167,0.350921,0.4048,0.34475,0.37465,0.5724,0.552167,0.58717,48.32,49.283333,50.794579,52.06,43.458333,51.963275,11.52,10.166667,10.706601,15.08,17.758333,15.824199,0.5294,0.511917,0.544585,16.82,16.691667,15.854449,20.8,24.325,22.854488,0.2676,0.267,0.28266,26.4,25.454545,24.685058,57.0,58.363636,53.919338,0.4636,0.436,0.459139,6.0,6.181818,6.227337,19.4,21.363636,20.01072,0.2982,0.291818,0.304787,16.0,15.636364,17.081051,23.2,23.181818,24.057105,0.6832,0.675364,0.709332,9.2,11.181818,9.276042,32.6,35.636364,31.52348,12.4,13.454545,13.094904,7.6,8.727273,7.716964,7.2,7.545455,6.531423,13.4,13.454545,15.302749,19.6,19.272727,20.626886,104.1,102.254545,102.612163,95.84,87.2,96.516332,72.1,71.272727,70.806744,0.4232,0.408545,0.460333,0.3428,0.370273,0.375382,0.5518,0.526,0.558821,52.18,53.881818,53.677695,48.3,53.681818,54.29924,10.64,12.181818,10.918891,17.34,18.818182,16.227338,0.5164,0.489273,0.517752,16.48,16.4,19.017409,32.02,35.209091,35.178476,0.293,0.277182,0.328098,0,0,1
2,22.6,23.625,23.893919,57.4,57.75,57.110197,0.3922,0.4085,0.418203,7.6,7.0,7.976223,23.8,22.375,22.977595,0.3246,0.315375,0.355482,15.0,15.125,13.397805,18.8,19.75,17.759031,0.7792,0.751,0.743282,10.8,11.25,10.604938,36.2,36.125,36.867398,12.2,11.875,13.618656,6.6,7.375,6.412894,3.4,3.375,3.427526,13.6,15.0,14.100594,13.8,15.0,14.414723,98.6,98.5125,100.778692,87.28,89.6125,85.721445,68.42,70.3125,68.383082,0.3328,0.345,0.31452,0.4158,0.386625,0.402249,0.5122,0.517625,0.528738,54.6,55.95,55.052309,55.24,51.325,57.850754,9.68,10.475,9.387151,8.18,8.675,7.962323,0.4598,0.46975,0.489303,17.04,18.25,17.70791,33.68,34.7625,33.959442,0.2662,0.26525,0.237552,20.6,20.166667,21.045267,57.0,55.5,57.646091,0.362,0.364167,0.366605,4.6,4.333333,4.946502,15.2,15.666667,15.786008,0.305,0.282,0.312971,12.2,11.166667,11.950617,18.0,16.333333,16.748971,0.6736,0.686333,0.719436,9.2,8.333333,9.197531,29.8,28.666667,28.954733,9.6,8.833333,9.444444,7.0,6.666667,7.148148,3.0,2.5,2.415638,17.8,17.833333,17.62963,18.2,18.833333,16.588477,78.24,76.233333,79.395473,110.28,111.5,114.295062,72.06,71.316667,71.867901,0.3118,0.287667,0.282848,0.269,0.286667,0.278337,0.4448,0.443,0.452086,46.94,45.1,45.559671,45.88,42.866667,44.186831,9.24,8.933333,9.470782,8.9,7.416667,7.004527,0.4028,0.403333,0.409926,21.58,22.283333,21.498765,26.84,24.75,26.860494,0.211,0.196667,0.20235,0,1,0
3,27.6,30.210526,27.649443,59.4,61.684211,62.042109,0.4672,0.487421,0.447264,9.2,8.842105,8.484374,26.6,23.736842,27.077445,0.3448,0.362474,0.312032,12.8,13.368421,15.358436,19.4,19.947368,21.963728,0.657,0.661421,0.680866,12.2,11.315789,13.258659,39.2,37.263158,40.132135,16.8,15.894737,16.683856,8.0,8.105263,8.039175,4.6,3.421053,4.673308,10.2,11.631579,9.55208,17.2,15.473684,16.718513,115.44,115.610526,115.290975,79.34,88.810526,84.296366,67.32,71.278947,69.146828,0.3318,0.328737,0.355954,0.4474,0.386737,0.435777,0.5668,0.578579,0.550669,59.5,56.963158,58.839409,59.84,52.747368,59.355313,11.86,11.331579,11.589912,14.14,10.215789,14.153236,0.5466,0.559684,0.51695,13.04,14.105263,11.757133,38.5,36.057895,39.413917,0.2168,0.218789,0.247359,27.8,28.764706,28.766712,61.8,64.470588,63.318843,0.45,0.443882,0.45303,7.8,7.117647,8.890309,20.4,20.470588,21.488604,0.3752,0.341353,0.400982,12.2,9.647059,12.555784,17.8,14.352941,17.703171,0.6736,0.660118,0.696609,8.6,10.588235,9.616473,31.2,33.764706,31.879139,16.0,16.647059,17.886353,7.6,7.529412,8.602996,5.2,3.882353,5.022172,13.4,12.764706,12.977303,17.0,14.823529,16.652968,101.36,100.941176,105.335369,106.26,101.882353,103.246985,74.66,72.794118,74.68234,0.2868,0.226765,0.281216,0.3292,0.313882,0.338097,0.5392,0.517706,0.549476,47.62,51.047059,48.916129,56.48,55.852941,60.053062,10.16,10.235294,11.44265,13.38,10.141176,12.989218,0.5128,0.498118,0.522372,16.08,15.305882,15.360195,26.08,31.029412,29.131986,0.1958,0.151941,0.198241,0,1,0
4,28.6,27.521739,28.995392,61.0,60.913043,60.658003,0.4704,0.452478,0.478355,7.8,8.130435,7.027787,20.4,22.173913,17.748878,0.3942,0.368261,0.400872,16.8,14.869565,15.4709,20.6,19.782609,18.814466,0.8202,0.754478,0.826359,8.2,8.869565,8.137672,30.2,31.130435,30.662777,15.4,13.956522,14.745217,7.2,7.478261,7.554715,2.6,1.608696,2.053649,10.0,11.913043,11.026134,14.2,17.217391,14.650023,113.02,106.7,111.726013,105.28,104.313043,100.707474,70.92,72.026087,71.245725,0.3398,0.329,0.31413,0.3338,0.362217,0.293597,0.5796,0.555913,0.579132,50.24,49.33913,51.616353,54.14,49.982609,50.929661,9.98,10.182609,10.5174,6.94,5.134783,5.521662,0.5346,0.519,0.536718,12.38,14.491304,13.639291,26.78,27.121739,27.529568,0.2778,0.247522,0.258858,26.0,28.130435,26.754705,57.0,62.217391,56.268072,0.4614,0.453478,0.478241,7.4,8.304348,8.26386,19.2,22.73913,19.162653,0.3888,0.363391,0.429127,22.4,15.956522,22.971298,28.0,20.608696,28.78548,0.775,0.775304,0.788219,10.6,10.173913,10.900114,32.6,33.26087,33.32873,15.8,16.086957,16.354421,8.8,9.478261,9.189601,1.4,1.608696,1.714244,12.0,12.869565,13.058401,20.6,19.26087,20.208762,115.12,108.373913,118.24257,108.16,99.195652,104.680081,71.16,73.834783,71.558889,0.505,0.339957,0.521826,0.3386,0.364609,0.34052,0.5854,0.558957,0.608281,53.46,51.517391,54.873584,60.72,56.469565,61.325672,12.4,12.743478,12.831033,4.02,5.095652,4.824586,0.5284,0.520739,0.552592,14.5,15.182609,15.63635,34.22,30.934783,36.304024,0.4064,0.264304,0.418083,0,0,1


In [4]:
train_win_true, test_win_true = training_df.pop('Win'), testing_df.pop('Win')
train_loss_true, test_loss_true = training_df.pop('Loss'), testing_df.pop('Loss')

print(f'{len(training_df)} train examples')
print(f'{len(testing_df)} test examples')

12847 train examples
5507 test examples


## Training the model

In [5]:
# Define the Random Forest model
gbc = GradientBoostingClassifier()

# Define the hyperparameter grid
param_grid = {
    'loss': ['log_loss', 'deviance', 'exponential'],  # Loss function
    'learning_rate': [0.01, 0.05, 0.1],  # Learning rate (eta)
    'max_depth': [3, 5, 8],  # Maximum depth of each tree
    'max_features': ['log2', 'sqrt'],  # Maximum number of features considered for splitting
    'n_estimators': [100, 200, 300]  # Number of trees
}

# Create a single split
cv_single_split = ShuffleSplit(n_splits=1)

### Using Win as label

In [6]:
# Create the grid search object
grid_search_win = GridSearchCV(gbc, param_grid, cv=cv_single_split, verbose=5)

# Fit the grid search to the data
grid_search_win.fit(training_df, train_win_true)

# Get the best hyperparameters
best_params = grid_search_win.best_params_
print("Best hyperparameters:", best_params)

Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=100;, score=0.637 total time=   2.0s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=200;, score=0.665 total time=   3.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=300;, score=0.683 total time=   5.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.645 total time=   3.6s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=200;, score=0.669 total time=   7.0s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=300;, score=0.695 total time=  10.6s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100;, score=0.650 total time=   3.3s
[CV 1/1] END learning_rate=

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\model_selection\_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\base.py", line 1344, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\utils\_param_validation.py", line

Best hyperparameters: {'learning_rate': 0.05, 'loss': 'exponential', 'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 300}


In [7]:
# Acess the best model
clf = grid_search_win.best_estimator_

y_pred = clf.predict(testing_df)

accuracy = accuracy_score(test_win_true, y_pred)
print(f"Accuracy: {(accuracy*100):.2f}")

print("\nClassification Report:")
print(classification_report(test_win_true, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(test_win_true, y_pred))

Accuracy: 68.11

Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.47      0.54      2180
           1       0.70      0.82      0.76      3327

    accuracy                           0.68      5507
   macro avg       0.67      0.65      0.65      5507
weighted avg       0.67      0.68      0.67      5507


Confusion Matrix:
[[1029 1151]
 [ 605 2722]]


### Using Loss as label

In [9]:
# Create the grid search object
grid_search_loss = GridSearchCV(gbc, param_grid, cv=cv_single_split, verbose=5)

# Fit the grid search to the data
grid_search_loss.fit(training_df, train_loss_true)

# Get the best hyperparameters
best_params = grid_search_loss.best_params_
print("Best hyperparameters:", best_params)

Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=100;, score=0.613 total time=   2.0s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=200;, score=0.654 total time=   4.0s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=300;, score=0.665 total time=   5.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.627 total time=   3.5s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=200;, score=0.654 total time=   7.0s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=300;, score=0.668 total time=  10.5s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100;, score=0.640 total time=   3.3s
[CV 1/1] END learning_rate=

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\model_selection\_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\base.py", line 1344, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-utils\Lib\site-packages\sklearn\utils\_param_validation.py", line

Best hyperparameters: {'learning_rate': 0.05, 'loss': 'exponential', 'max_depth': 3, 'max_features': 'log2', 'n_estimators': 300}


In [10]:
# Acess the best model
clf = grid_search_loss.best_estimator_

y_pred = clf.predict(testing_df)

accuracy = accuracy_score(test_loss_true, y_pred)
print(f"Accuracy: {(accuracy*100):.2f}")

print("\nClassification Report:")
print(classification_report(test_loss_true, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(test_loss_true, y_pred))

Accuracy: 68.60

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.82      0.76      3327
           1       0.64      0.48      0.55      2180

    accuracy                           0.69      5507
   macro avg       0.67      0.65      0.65      5507
weighted avg       0.68      0.69      0.68      5507


Confusion Matrix:
[[2742  585]
 [1144 1036]]


# Saving the models

In [12]:
for pair in [('win', grid_search_win.best_estimator_), ('loss', grid_search_loss.best_estimator_)]:
    filename = f'gradient_boosting_{pair[0]}_model.pkl'
    path = os.path.abspath(f'../../machine-learning/model/{filename}')
    pickle.dump(pair[1], open(path, 'wb'))

: 