# This notebook will use a new machine learning library not used in class to make NBA game predictions

In [1]:
# Initial imports
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBClassifier as xgbC
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2
from imblearn.metrics import classification_report_imbalanced
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Reading in game data
nba_df = pd.read_csv(Path('final_data_xgb.csv'),parse_dates = ['GAME_DATE'], infer_datetime_format= True)
# Changing date and season column to an integer
nba_df['GAME_DATE'] = nba_df['GAME_DATE'].apply(lambda x: x.toordinal())
# Resetting the index
nba_df = nba_df.reset_index()
# Dropping null values
nba_df = nba_df.dropna()
# Dropping unnecessary columns
nba_df = nba_df.drop(columns= ['index','SEASON_YEAR'])
nba_df = nba_df[nba_df.HOME_WL != 2]
# Checking dtypes
display(nba_df.dtypes.value_counts())
# Displaying dataframe
display(nba_df)

float64    163
int64        2
dtype: int64

Unnamed: 0,GAME_ID,GAME_DATE,HOME_TEAM_ID,HOME_WL,AWAY_AST_PCT,AWAY_AST_PCT_RANK,AWAY_AST_RATIO,AWAY_AST_RATIO_RANK,AWAY_AST_TO,AWAY_AST_TO_RANK,...,HOME_PIE_RANK,HOME_POSS,HOME_REB_PCT,HOME_REB_PCT_RANK,HOME_TM_TOV_PCT,HOME_TM_TOV_PCT_RANK,HOME_TS_PCT,HOME_TS_PCT_RANK,HOME_W_PCT_RANK,HOME_W_RANK
0,20100398.0,730845,1610612746,0.0,0.63930,976.40,16.705,1246.40,1.6525,1107.00,...,862.45,93.25,0.51935,952.25,0.17775,1522.85,0.53330,1051.65,357.70,357.70
1,20100415.0,730848,1610612746,0.0,0.64190,952.40,16.700,1248.10,1.7305,1047.30,...,844.75,93.40,0.52390,890.45,0.17850,1540.20,0.53210,1066.95,357.70,357.70
2,20100433.0,730851,1610612746,1.0,0.63615,1001.15,16.765,1230.60,1.7410,1040.40,...,864.05,93.00,0.52030,948.30,0.17645,1496.45,0.53660,1036.95,357.70,357.70
3,20100467.0,730855,1610612746,1.0,0.63060,1043.20,16.670,1252.85,1.6935,1102.60,...,875.75,92.75,0.51370,1021.40,0.17580,1483.75,0.53460,1068.55,357.70,357.70
4,20100471.0,730856,1610612758,1.0,0.56615,1481.00,15.570,1466.45,1.4775,1382.95,...,570.00,96.05,0.51800,925.85,0.13650,860.90,0.54115,973.80,60.45,60.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24741,22201226.0,738619,1610612760,1.0,0.66525,734.40,19.275,959.25,1.7115,1406.55,...,1028.05,102.45,0.50525,1140.50,0.12390,931.40,0.59770,1028.40,493.00,493.00
24742,22201227.0,738619,1610612743,1.0,0.61185,1162.40,18.140,1235.95,1.9240,1186.75,...,773.30,99.40,0.53280,798.60,0.13660,1140.00,0.60185,985.25,247.00,247.00
24743,22201228.0,738619,1610612747,1.0,0.58450,1389.10,17.745,1334.05,2.1730,889.30,...,983.55,100.80,0.50445,1163.10,0.13470,1124.45,0.58685,1206.95,493.00,493.00
24744,22201229.0,738619,1610612756,0.0,0.59475,1274.20,17.215,1476.65,1.7740,1386.60,...,920.60,99.70,0.50645,1168.20,0.12990,1057.75,0.57180,1353.15,370.00,370.00


In [3]:
# Separate the y variable, the labels
y = nba_df[['HOME_WL']]

# Separate the X variable, the features
X = nba_df.drop(columns= ['HOME_WL']).set_index(['GAME_ID'])
display(y.tail())
display(X.head())

Unnamed: 0,HOME_WL
24741,1.0
24742,1.0
24743,1.0
24744,0.0
24745,0.0


Unnamed: 0_level_0,GAME_DATE,HOME_TEAM_ID,AWAY_AST_PCT,AWAY_AST_PCT_RANK,AWAY_AST_RATIO,AWAY_AST_RATIO_RANK,AWAY_AST_TO,AWAY_AST_TO_RANK,AWAY_DEF_RATING,AWAY_DEF_RATING_RANK,...,HOME_PIE_RANK,HOME_POSS,HOME_REB_PCT,HOME_REB_PCT_RANK,HOME_TM_TOV_PCT,HOME_TM_TOV_PCT_RANK,HOME_TS_PCT,HOME_TS_PCT_RANK,HOME_W_PCT_RANK,HOME_W_RANK
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20100398.0,730845,1610612746,0.6393,976.4,16.705,1246.4,1.6525,1107.0,104.005,1239.55,...,862.45,93.25,0.51935,952.25,0.17775,1522.85,0.5333,1051.65,357.7,357.7
20100415.0,730848,1610612746,0.6419,952.4,16.7,1248.1,1.7305,1047.3,103.605,1203.95,...,844.75,93.4,0.5239,890.45,0.1785,1540.2,0.5321,1066.95,357.7,357.7
20100433.0,730851,1610612746,0.63615,1001.15,16.765,1230.6,1.741,1040.4,104.295,1223.15,...,864.05,93.0,0.5203,948.3,0.17645,1496.45,0.5366,1036.95,357.7,357.7
20100467.0,730855,1610612746,0.6306,1043.2,16.67,1252.85,1.6935,1102.6,103.37,1156.1,...,875.75,92.75,0.5137,1021.4,0.1758,1483.75,0.5346,1068.55,357.7,357.7
20100471.0,730856,1610612758,0.56615,1481.0,15.57,1466.45,1.4775,1382.95,109.52,1523.2,...,570.0,96.05,0.518,925.85,0.1365,860.9,0.54115,973.8,60.45,60.45


In [4]:
# Scaling the data using MinMax scaler
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Define search space for RandomizedCV search
search_space = [
  {
    'clf__n_estimators': [50, 100, 150, 200],
    'clf__learning_rate': [0.01, 0.1, 0.2, 0.3],
    'clf__max_depth': range(3, 10),
  }
] 

In [6]:
# Define cross validation
kfold = KFold(n_splits=10, random_state=42, shuffle=True)

In [7]:
# AUC and accuracy as score
scoring = {'AUC':'roc_auc', 'Accuracy':make_scorer(accuracy_score)}

In [8]:
clf_xgb = xgb.XGBClassifier()

In [9]:
# Define grid search
grid = RandomizedSearchCV(
  estimator=clf_xgb,
  param_distributions=search_space,
  cv=kfold,
  scoring=scoring,
  refit='AUC',
  verbose=1,
  n_jobs=-1
)
# Fit grid search
model = grid.fit(X_train, y_train)


Fitting 10 folds for each of 10 candidates, totalling 100 fits


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




In [10]:
predict = model.predict(X_test)
print('Best AUC Score: {}'.format(model.best_score_))
print('Accuracy: {}'.format(accuracy_score(y_test, predict)))
print(confusion_matrix(y_test,predict))

Best AUC Score: 0.6604497521608603
Accuracy: 0.635526103119444
[[1210 1323]
 [ 932 2722]]


In [11]:
print(model.best_params_)

{'clf__n_estimators': 100, 'clf__max_depth': 8, 'clf__learning_rate': 0.01}


In [19]:


clf_xgb = xgb.XGBClassifier(objective='binary:logistic', 
                            eval_metric="aucpr",
                            seed=42, 
                            use_label_encoder=False,
                            clf__learning_rate=0.01,
                            clf__max_depth = 8,
                            clf__n_estimators = 100)


In [20]:
clf_xgb.fit(X_train, 
            y_train,
            verbose=True, ## the next three arguments set up early stopping.
            early_stopping_rounds=10,
            eval_set=[(X_train, y_train),(X_test, y_test)])

Parameters: { "clf__learning_rate", "clf__max_depth", "clf__n_estimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	validation_0-aucpr:0.78187	validation_1-aucpr:0.75005
[1]	validation_0-aucpr:0.79007	validation_1-aucpr:0.75495
[2]	validation_0-aucpr:0.79627	validation_1-aucpr:0.75547
[3]	validation_0-aucpr:0.80142	validation_1-aucpr:0.75659
[4]	validation_0-aucpr:0.80797	validation_1-aucpr:0.75806
[5]	validation_0-aucpr:0.81305	validation_1-aucpr:0.75668
[6]	validation_0-aucpr:0.81602	validation_1-aucpr:0.75650
[7]	validation_0-aucpr:0.82368	validation_1-aucpr:0.75389
[8]	validation_0-aucpr:0.82599	validation_1-aucpr:0.75456
[9]	validation_0-aucpr:0.83005	validation_1-aucpr:0.75335
[10]	validation_0-aucpr:0.83324	validation_1-aucpr:0.753

XGBClassifier(base_score=0.5, booster='gbtree', clf__learning_rate=0.01,
              clf__max_depth=8, clf__n_estimators=100, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric='aucpr', gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=4,
              num_parallel_tree=1, predictor='auto', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=42,
              subsample=1, ...)

In [21]:

# Making predictions
predictions = clf_xgb.predict(X_test)

In [22]:
# Checking accuracy score
accuracy_score(y_test, predictions)

0.650234362372717

In [23]:
# Viewing classification report
print(classification_report_imbalanced(y_test, predictions)) 

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.59      0.46      0.78      0.52      0.60      0.35      2533
        1.0       0.68      0.78      0.46      0.72      0.60      0.37      3654

avg / total       0.64      0.65      0.59      0.64      0.60      0.36      6187



In [24]:
print(confusion_matrix(y_test, predictions))

[[1177 1356]
 [ 808 2846]]


In [25]:
clf_xgb.save_model('xgb_nba.json')

In [26]:
import shap

In [29]:
explainer = shap.Explainer(clf_xgb)
shap_values = explainer.shap_values(clf_xgb)
shap.summary_plot(shap_values, X)

Unknown data type: <class 'xgboost.sklearn.XGBClassifier'>, trying to convert it to csr_matrix


TypeError: Not supported type for data.<class 'xgboost.sklearn.XGBClassifier'>