# This notebook will use a new machine learning library not used in class to make NBA game predictions

In [97]:
# Initial imports
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBClassifier as xgbC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.preprocessing import MinMaxScaler

In [98]:
# Reading in game data
nba_df = pd.read_csv(Path('Resources/nba_game_data.csv'),parse_dates = ['GAME_DATE_EST'], infer_datetime_format= True)
# Changing date column to an integer
nba_df['GAME_DATE_EST'] = nba_df['GAME_DATE_EST'].apply(lambda x: x.toordinal())
# Resetting the index
nba_df = nba_df.reset_index()
# Dropping null values
nba_df = nba_df.dropna()
# Dropping unnecessary columns
nba_df = nba_df.drop(columns= ['index', 'Unnamed: 0'])
# Checking dtypes
nba_df.dtypes
# Displaying dataframe
display(nba_df)

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,TEAM_ID_away,HOME_TEAM_WINS,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away
0,731496,10300011,1610612737,1610612739,2003,1610612737,1610612739,0,106.20,0.43210,0.75900,0.30090,22.25,46.85,108.85,0.44225,0.77540,0.36805,23.35,46.45
1,731603,20300611,1610612737,1610612752,2003,1610612737,1610612752,0,93.10,0.44685,0.79300,0.36380,20.25,43.20,89.60,0.42610,0.71285,0.29690,19.55,40.45
2,731607,20300642,1610612737,1610612756,2003,1610612737,1610612756,0,93.95,0.45340,0.77695,0.36315,20.40,42.90,90.35,0.42740,0.71715,0.31035,19.70,40.05
3,731609,20300658,1610612737,1610612757,2003,1610612737,1610612757,0,92.75,0.44780,0.76265,0.35945,19.90,43.30,90.15,0.42745,0.71715,0.29785,20.05,39.75
4,731611,20300672,1610612737,1610612749,2003,1610612737,1610612749,1,92.20,0.44615,0.75585,0.35620,20.15,43.55,89.05,0.42365,0.72145,0.28680,20.15,40.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24570,738211,22100896,1610612766,1610612761,2021,1610612766,1610612761,1,106.05,0.44970,0.78575,0.35080,24.25,43.40,108.90,0.47170,0.79275,0.36455,23.45,44.65
24571,738213,22100915,1610612766,1610612765,2021,1610612766,1610612765,0,105.80,0.44800,0.77465,0.35155,24.50,42.65,108.15,0.46880,0.79010,0.36275,23.25,45.60
24572,738219,22100955,1610612766,1610612759,2021,1610612766,1610612759,1,105.00,0.44480,0.77300,0.35155,24.10,42.45,107.65,0.46500,0.79570,0.35785,22.85,45.80
24573,738222,22100975,1610612766,1610612751,2021,1610612766,1610612751,0,105.90,0.44815,0.76050,0.35850,24.45,42.15,109.35,0.46680,0.79430,0.36500,23.20,45.95


In [99]:
# Separate the y variable, the labels
y = nba_df[['HOME_TEAM_WINS']]

# Separate the X variable, the features
X = nba_df.drop(columns= ['HOME_TEAM_WINS'])
display(y.head())
display(X.head())

Unnamed: 0,HOME_TEAM_WINS
0,0
1,0
2,0
3,0
4,1


Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,TEAM_ID_away,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away
0,731496,10300011,1610612737,1610612739,2003,1610612737,1610612739,106.2,0.4321,0.759,0.3009,22.25,46.85,108.85,0.44225,0.7754,0.36805,23.35,46.45
1,731603,20300611,1610612737,1610612752,2003,1610612737,1610612752,93.1,0.44685,0.793,0.3638,20.25,43.2,89.6,0.4261,0.71285,0.2969,19.55,40.45
2,731607,20300642,1610612737,1610612756,2003,1610612737,1610612756,93.95,0.4534,0.77695,0.36315,20.4,42.9,90.35,0.4274,0.71715,0.31035,19.7,40.05
3,731609,20300658,1610612737,1610612757,2003,1610612737,1610612757,92.75,0.4478,0.76265,0.35945,19.9,43.3,90.15,0.42745,0.71715,0.29785,20.05,39.75
4,731611,20300672,1610612737,1610612749,2003,1610612737,1610612749,92.2,0.44615,0.75585,0.3562,20.15,43.55,89.05,0.42365,0.72145,0.2868,20.15,40.05


In [100]:
# Scaling the data using MinMax scaler
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [107]:
# Setting XGB classifier instance
xgb_clf = xgb.XGBClassifier()
# Fitting the data to the model
xgb_clf = xgb_clf.fit(X_train, y_train)
# Making predictions
predictions = xgb_clf.predict(X_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 122 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 124 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 100 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 122 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 120 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 118 extra nodes, 0 pruned nodes, max_depth=6
[19:41:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 98 extra nodes, 0 pruned nodes, max_depth=6
[19:41:03] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 106 extra nodes, 0 pruned nodes, max_depth=6
[19:41:03] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 118 extra nodes, 0 p

In [109]:
# Checking accuracy score
accuracy_score(y_test, predictions)

0.6023763020833334

In [110]:
# Viewing classification report
print(classification_report_imbalanced(y_test, predictions)) 

                   pre       rec       spe        f1       geo       iba       sup

          0       0.52      0.37      0.77      0.43      0.53      0.27      2530
          1       0.63      0.77      0.37      0.69      0.53      0.29      3614

avg / total       0.59      0.60      0.53      0.59      0.53      0.28      6144

