In [100]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm import tqdm_notebook

import xgboost as xgb
import lightgbm as lgb
from skopt import BayesSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV, train_test_split, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report
warnings.filterwarnings('ignore')
%matplotlib inline

In [176]:
games = pd.read_csv('Data/games.csv')
gps = pd.read_csv('Data/gps.csv', low_memory=False)
rpe = pd.read_csv('Data/rpe.csv')
wellness = pd.read_csv('Data/wellness.csv')

In [3]:
wellness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5011 entries, 0 to 5010
Data columns (total 19 columns):
Date                   5011 non-null object
PlayerID               5011 non-null int64
Fatigue                5011 non-null int64
Soreness               5011 non-null int64
Desire                 5011 non-null int64
Irritability           5011 non-null int64
BedTime                5011 non-null object
WakeTime               5011 non-null object
SleepHours             5011 non-null float64
SleepQuality           5011 non-null int64
MonitoringScore        5011 non-null int64
Pain                   5011 non-null object
Illness                5011 non-null object
Menstruation           4995 non-null object
Nutrition              4174 non-null object
NutritionAdjustment    4266 non-null object
USGMeasurement         4843 non-null object
USG                    629 non-null float64
TrainingReadiness      5011 non-null object
dtypes: float64(2), int64(7), object(10)
memory usage: 743.9+ K

In [4]:
rpe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8860 entries, 0 to 8859
Data columns (total 14 columns):
Date                 8860 non-null object
PlayerID             8860 non-null int64
Training             8860 non-null object
SessionType          7621 non-null object
Duration             7621 non-null float64
RPE                  7621 non-null float64
SessionLoad          7621 non-null float64
DailyLoad            3149 non-null float64
AcuteLoad            4349 non-null float64
ChronicLoad          4383 non-null float64
AcuteChronicRatio    4349 non-null float64
ObjectiveRating      4724 non-null float64
FocusRating          4751 non-null float64
BestOutOfMyself      3019 non-null object
dtypes: float64(9), int64(1), object(4)
memory usage: 969.1+ KB


In [5]:
def fill_cat_data_gb_player(data, col):
    return data.apply(lambda x: data.groupby('PlayerID')[col].value_counts()[x['PlayerID']].index[0], axis=1)

def fill_quant_data_gb_player(data, col):
    return  data[col].fillna(data.groupby(['PlayerID'])[col].transform('median'))

In [184]:
wellness['Menstruation'].fillna(value='No', inplace=True)
wellness['USG'] = wellness.apply(lambda x: wellness.groupby('PlayerID')['USG'].median()[x['PlayerID']], axis=1)
wellness['Nutrition'] = wellness.apply(lambda x: wellness.groupby('PlayerID')['Nutrition'].value_counts()[x['PlayerID']].index[0], axis=1)
wellness['NutritionAdjustment'] = wellness.apply(lambda x: wellness.groupby('PlayerID')['NutritionAdjustment'].value_counts()[x['PlayerID']].index[0], axis=1)
wellness['NutritionAdjustment'] = wellness.apply(lambda x: wellness.groupby('PlayerID')['NutritionAdjustment'].value_counts()[x['PlayerID']].index[0], axis=1)
wellness['USGMeasurement'].fillna(value='Yes', inplace=True)

In [185]:
wellness['WakeTime'] = pd.to_datetime(wellness['WakeTime'])
wellness['BedTime'] = pd.to_datetime(wellness['BedTime'])

wellness['TotalTimeSleptInSecs'] = wellness.apply(lambda x: (x['WakeTime'] - x['BedTime']).seconds, axis=1)
wellness.drop(['WakeTime', 'BedTime'], axis=1, inplace=True)

In [186]:
wellness['TrainingReadiness'] = wellness['TrainingReadiness'].map(lambda x: x.rstrip('%'))
wellness['TrainingReadiness'] = wellness['TrainingReadiness'].astype('int64')

wellness_cat_cols = wellness.columns[wellness.dtypes == 'object']
for col in wellness_cat_cols:
    if wellness[col].isnull().values.any():
        wellness[col] = wellness[col].fillna(wellness.groupby('PlayerID')[col].value_counts()[]

wellness['USG'] = fill_quant_data_gb_player(wellness, 'USG')

In [177]:
rpe['SessionType'] = fill_cat_data_gb_player(rpe, 'SessionType')
rpe['BestOutOfMyself'] = fill_cat_data_gb_player(rpe, 'BestOutOfMyself')
rpe_float_cols = rpe.columns[rpe.dtypes == 'float64']

for col in rpe_float_cols:
    if rpe[col].isnull().values.any():
        rpe[col] = fill_quant_data_gb_player(rpe, col)

In [181]:
temp = rpe

rpe = rpe.groupby(['Date', 'PlayerID', 'SessionType']).max().reset_index().drop(['SessionType', 'Duration', 'SessionLoad', ], axis=1)

In [182]:
count = pd.DataFrame(temp.groupby(['Date', 'PlayerID', 'SessionType']).count().reset_index()['SessionLoad'])
temp = temp.groupby(['Date', 'PlayerID', 'SessionType']).sum().reset_index()
temp = pd.concat([temp, pd.get_dummies(temp['SessionType'])], axis=1).drop('SessionType', axis=1)
temp['SessionCount'] = count
temp['Mobility/Recovery'] = temp.apply(lambda x: x['Mobility/Recovery'] * x['SessionLoad'], axis=1)
temp['Skills'] = temp.apply(lambda x: x['Skills'] * x['SessionLoad'], axis=1)
temp['Strength'] = temp.apply(lambda x: x['Strength'] * x['SessionLoad'], axis=1)
temp = temp.drop('SessionLoad', axis=1)


rpe['SessionLoadTotal_Mobility/Recovery'] = temp['Mobility/Recovery']
rpe['SessionLoadTotal_Skills'] = temp['Skills']
rpe['SessionLoadTotal_Strength'] = temp['Strength']
rpe['SessionCount'] = temp['SessionCount']
rpe['DurationTotal'] = temp['Duration']

In [10]:
def printClassificationErrors(y_test, y_pred):
    print('Confusion Matrix:')
    print(confusion_matrix(y_test, y_pred))
    print('Classification Report:')
    print(classification_report(y_test, y_pred))
    print('Accuracy Score: {}'.format(accuracy_score(y_test, y_pred)))

In [53]:
def oof_predictions(model, X, y, n_folds=5, shuffle=True, random_state=42, predict='hard'):
    k_folds = StratifiedKFold(n_splits=n_folds, shuffle=shuffle, random_state=random_state)
    oof_pred_array = np.zeros(len(X))
    #oof_test_array = np.zeros(len(X_test_actual))
    
    for train_index, test_index in tqdm_notebook(k_folds.split(X, y), total=n_folds):
        X_train, X_test = X.loc[train_index, ], X.loc[test_index, ]
        y_train, y_test = y.loc[train_index, ], y.loc[test_index, ]

        #_, X_test_meta = X_test_actual.loc[train_index, ], X_test_actual.loc[test_index]
        
        
        #print(X_train)
        #X_train = X_train.reindex(index=train_index)
        #y_train = y_train.reindex(index=train_index)

        model.fit(X_train, y_train)

        if predict == 'soft':
            y_pred = model.predict_proba(X_test)
            y_pred = np.array([prob[1] for prob in y_pred])
            
            #y_pred_actual = model.predict_proba(X_test_meta)
            #y_pred_actual = np.array([prob[1] for prob in y_pred_actual])
        elif predict == 'hard':
            y_pred = model.predict(X_test)
            #y_pred_actual = model.predict(X_test_meta)
        
        oof_pred_array[test_index] = y_pred
        #oof_test_array[test_index] = y_pred_actual
    
    return oof_pred_array#, oof_test_array

In [189]:
data = rpe.merge(wellness, how='inner', left_on=['PlayerID', 'Date'], right_on=['PlayerID', 'Date'])
data = data.groupby(['Date', 'PlayerID']).max().reset_index()
data['Date'] = pd.to_datetime(data['Date'])
numerical_features = data.columns[data.dtypes != 'object']
categorical_features = data.columns[data.dtypes == 'object']

data = pd.get_dummies(data=data, columns=categorical_features)

array(['2017-08-01T00:00:00.000000000', '2017-08-02T00:00:00.000000000',
       '2017-08-03T00:00:00.000000000', '2017-08-04T00:00:00.000000000',
       '2017-08-05T00:00:00.000000000', '2017-08-06T00:00:00.000000000',
       '2017-08-07T00:00:00.000000000', '2017-08-08T00:00:00.000000000',
       '2017-08-09T00:00:00.000000000', '2017-08-10T00:00:00.000000000',
       '2017-08-11T00:00:00.000000000', '2017-08-12T00:00:00.000000000',
       '2017-08-13T00:00:00.000000000', '2017-08-14T00:00:00.000000000',
       '2017-08-15T00:00:00.000000000', '2017-08-16T00:00:00.000000000',
       '2017-08-17T00:00:00.000000000', '2017-08-18T00:00:00.000000000',
       '2017-08-19T00:00:00.000000000', '2017-08-20T00:00:00.000000000',
       '2017-08-21T00:00:00.000000000', '2017-08-22T00:00:00.000000000',
       '2017-08-23T00:00:00.000000000', '2017-08-24T00:00:00.000000000',
       '2017-08-25T00:00:00.000000000', '2017-08-26T00:00:00.000000000',
       '2017-08-27T00:00:00.000000000', '2017-08-28

In [190]:
data.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4085 entries, 0 to 4084
Data columns (total 42 columns):
Date                                  4085 non-null datetime64[ns]
PlayerID                              4085 non-null int64
RPE                                   4085 non-null float64
DailyLoad                             4085 non-null float64
AcuteLoad                             4085 non-null float64
ChronicLoad                           4085 non-null float64
AcuteChronicRatio                     4085 non-null float64
ObjectiveRating                       4085 non-null float64
FocusRating                           4085 non-null float64
SessionLoadTotal_Mobility/Recovery    4085 non-null float64
SessionLoadTotal_Skills               4085 non-null float64
SessionLoadTotal_Strength             4085 non-null float64
SessionCount                          4085 non-null int64
DurationTotal                         4085 non-null float64
Fatigue                               4085 non-nul

In [142]:
wellness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5011 entries, 0 to 5010
Data columns (total 18 columns):
Date                    5011 non-null object
PlayerID                5011 non-null int64
Fatigue                 5011 non-null int64
Soreness                5011 non-null int64
Desire                  5011 non-null int64
Irritability            5011 non-null int64
SleepHours              5011 non-null float64
SleepQuality            5011 non-null int64
MonitoringScore         5011 non-null int64
Pain                    5011 non-null object
Illness                 5011 non-null object
Menstruation            5011 non-null object
Nutrition               5011 non-null object
NutritionAdjustment     5011 non-null object
USGMeasurement          5011 non-null object
USG                     5011 non-null float64
TrainingReadiness       5011 non-null int64
TotalTimeSleptInSecs    5011 non-null int64
dtypes: float64(2), int64(9), object(7)
memory usage: 704.8+ KB


In [174]:
pd.set_option('max_columns', 42)

temp = rpe

count = pd.DataFrame(rpe.groupby(['Date', 'PlayerID', 'SessionType']).count().reset_index()['SessionLoad'])
temp = rpe.groupby(['Date', 'PlayerID', 'SessionType']).sum().reset_index()
temp = pd.concat([temp, pd.get_dummies(temp['SessionType'])], axis=1).drop('SessionType', axis=1)
temp['SessionCount'] = count
temp['Mobility/Recovery'] = temp.apply(lambda x: x['Mobility/Recovery'] * x['SessionLoad'], axis=1)
temp['Skills'] = temp.apply(lambda x: x['Skills'] * x['SessionLoad'], axis=1)
temp['Strength'] = temp.apply(lambda x: x['Strength'] * x['SessionLoad'], axis=1)
temp = temp.drop('SessionLoad', axis=1)


rpe['SessionLoadTotal_Mobility/Recovery'] = temp['Mobility/Recovery']
rpe['SessionLoadTotal_Skills'] = temp['Skills']
rpe['SessionLoadTotal_Strength'] = temp['Strength']
rpe['SessionCount'] = temp['SessionCount']
rpe['DurationTotal'] = temp['Duration']

Unnamed: 0,Date,PlayerID,Duration,RPE,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,Mobility/Recovery,Skills,Strength,SessionCount
0,2017-08-01,1,150.0,7.0,1105.0,368.90,332.330,4.925,0.0,0.0,0.0,540.0,0.0,2
1,2017-08-01,3,135.0,10.0,1215.0,452.10,435.805,4.840,14.0,14.0,0.0,0.0,675.0,2
2,2017-08-01,5,90.0,7.0,900.0,240.00,186.290,5.090,16.0,18.0,0.0,0.0,330.0,2
3,2017-08-01,8,135.0,5.0,1005.0,407.00,400.070,4.970,0.0,0.0,0.0,345.0,0.0,2
4,2017-08-01,9,45.0,9.0,405.0,57.90,14.460,4.000,0.0,0.0,0.0,0.0,405.0,1
5,2017-08-01,10,140.0,9.0,1160.5,352.40,308.570,4.920,0.0,16.0,0.0,0.0,640.0,2
6,2017-08-01,11,125.0,8.0,1170.0,452.80,446.470,4.880,16.0,18.0,0.0,500.0,0.0,2
7,2017-08-01,12,140.0,4.0,685.0,211.40,206.430,4.890,12.0,16.0,0.0,0.0,280.0,2
8,2017-08-01,13,150.0,11.0,1290.0,445.60,402.860,4.890,16.0,14.0,840.0,0.0,0.0,2
9,2017-08-01,15,90.0,6.0,780.0,190.90,168.750,5.015,12.0,14.0,0.0,300.0,0.0,2


In [31]:
X = data.drop(['Fatigue', 'Date'], axis=1)
y = data['Fatigue']

In [32]:
columns = X.columns
mm = MinMaxScaler()
X[columns] = mm.fit_transform(X[columns])

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

In [34]:
n_folds=10

In [39]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

X_train = clean_dataset(X_train)

In [55]:
knn = KNeighborsClassifier(n_neighbors=8, p=1)
knn_oof_predictions = oof_predictions(knn, X, y, n_folds, predict='hard')

A Jupyter Widget

In [87]:
printClassificationErrors(y, knn_oof_predictions)

Confusion Matrix:
[[  40   71   45    5    1    0    0]
 [  34  200  252   66    0    0    0]
 [  10  113  969  362    6    0    0]
 [   2   24  475 1041   22    1    0]
 [   0    5   47  122   33    4    0]
 [   0    1    6   13   14   97    0]
 [   0    0    0    0    3    1    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.47      0.25      0.32       162
          2       0.48      0.36      0.41       552
          3       0.54      0.66      0.60      1460
          4       0.65      0.67      0.66      1565
          5       0.42      0.16      0.23       211
          6       0.94      0.74      0.83       131
          7       0.00      0.00      0.00         4

avg / total       0.58      0.58      0.57      4085

Accuracy Score: 0.5826193390452876


In [113]:
xgbc = xgb.XGBClassifier(objective='multi:softmax', 
                         colsample_bylevel=0.8, 
                         colsample_bytree=0.8, 
                         gamma=0.01, 
                         learning_rate=0.001, 
                         max_delta_step=20, 
                         max_depth=20, 
                         min_child_weight=1, 
                         n_estimators=100, 
                         reg_alpha=0, 
                         reg_lambda=1e-09, 
                         scale_pos_weight=0.5362970367111267, 
                         subsample=0.9719854748963996)

xgbc_oof_predictions = oof_predictions(xgbc, X, y, n_folds, predict='hard')
printClassificationErrors(y, xgbc_oof_predictions)

A Jupyter Widget

Confusion Matrix:
[[  62   89   11    0    0    0    0]
 [  37  332  175    8    0    0    0]
 [   4  111 1206  137    2    0    0]
 [   1    7  174 1340   40    3    0]
 [   0    0    6   96   97   12    0]
 [   0    0    1    5   24  101    0]
 [   0    0    0    0    1    3    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.60      0.38      0.47       162
          2       0.62      0.60      0.61       552
          3       0.77      0.83      0.80      1460
          4       0.84      0.86      0.85      1565
          5       0.59      0.46      0.52       211
          6       0.85      0.77      0.81       131
          7       0.00      0.00      0.00         4

avg / total       0.76      0.77      0.76      4085

Accuracy Score: 0.7681762545899633


In [57]:
rfc = RandomForestClassifier(n_estimators=140, 
                             max_features=4, 
                             max_depth=4500, 
                             min_samples_split=16, 
                             random_state=0)

rfc_oof_predictions = oof_predictions(rfc, X, y, n_folds)

A Jupyter Widget

In [88]:
printClassificationErrors(y, rfc_oof_predictions)

Confusion Matrix:
[[  29  103   29    1    0    0    0]
 [  15  271  253   13    0    0    0]
 [   1   90 1107  259    3    0    0]
 [   1    5  226 1311   21    1    0]
 [   0    0    9  127   72    3    0]
 [   0    0    1    7   26   97    0]
 [   0    0    0    0    3    1    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.63      0.18      0.28       162
          2       0.58      0.49      0.53       552
          3       0.68      0.76      0.72      1460
          4       0.76      0.84      0.80      1565
          5       0.58      0.34      0.43       211
          6       0.95      0.74      0.83       131
          7       0.00      0.00      0.00         4

avg / total       0.70      0.71      0.69      4085

Accuracy Score: 0.7067319461444308


In [89]:
logr = LogisticRegression()

logr_oof_predictions = oof_predictions(logr, X, y, n_folds)
printClassificationErrors(y, logr_oof_predictions)

A Jupyter Widget

Confusion Matrix:
[[  12   91   57    2    0    0    0]
 [  11  120  381   40    0    0    0]
 [   3   47 1055  352    3    0    0]
 [   1    3  399 1142   19    1    0]
 [   0    0    4  170   34    3    0]
 [   0    0    0   27   14   90    0]
 [   0    0    0    1    2    1    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.44      0.07      0.13       162
          2       0.46      0.22      0.30       552
          3       0.56      0.72      0.63      1460
          4       0.66      0.73      0.69      1565
          5       0.47      0.16      0.24       211
          6       0.95      0.69      0.80       131
          7       0.00      0.00      0.00         4

avg / total       0.59      0.60      0.57      4085

Accuracy Score: 0.6004895960832314


In [92]:
gbc = GradientBoostingClassifier(n_estimators=1000)

gbc_oof_predictions = oof_predictions(gbc, X, y, n_folds)
printClassificationErrors(y, gbc_oof_predictions)

A Jupyter Widget

Confusion Matrix:
[[  91   66    4    1    0    0    0]
 [  44  429   78    1    0    0    0]
 [   3   67 1325   61    4    0    0]
 [   1    0   64 1443   50    6    1]
 [   0    0    1   68  125   17    0]
 [   0    0    0    3   27  101    0]
 [   0    0    0    0    0    3    1]]
Classification Report:
             precision    recall  f1-score   support

          1       0.65      0.56      0.60       162
          2       0.76      0.78      0.77       552
          3       0.90      0.91      0.90      1460
          4       0.92      0.92      0.92      1565
          5       0.61      0.59      0.60       211
          6       0.80      0.77      0.78       131
          7       0.50      0.25      0.33         4

avg / total       0.86      0.86      0.86      4085

Accuracy Score: 0.8604651162790697


In [95]:
svc = SVC(C=1, kernel='rbf', gamma=0.8)

svc_oof_predictions = oof_predictions(gbc, X, y, n_folds)
printClassificationErrors(y, svc_oof_predictions)

A Jupyter Widget

Confusion Matrix:
[[  90   66    5    1    0    0    0]
 [  43  430   78    1    0    0    0]
 [   3   65 1328   61    3    0    0]
 [   1    0   65 1443   49    6    1]
 [   0    0    1   65  125   20    0]
 [   0    0    0    3   27  101    0]
 [   0    0    0    0    1    3    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.66      0.56      0.60       162
          2       0.77      0.78      0.77       552
          3       0.90      0.91      0.90      1460
          4       0.92      0.92      0.92      1565
          5       0.61      0.59      0.60       211
          6       0.78      0.77      0.77       131
          7       0.00      0.00      0.00         4

avg / total       0.86      0.86      0.86      4085

Accuracy Score: 0.8609547123623011


In [102]:
mlp = MLPClassifier()

mlp_oof_predictions = oof_predictions(mlp, X, y, n_folds)
printClassificationErrors(y, mlp_oof_predictions)

A Jupyter Widget

Confusion Matrix:
[[  58   89   15    0    0    0    0]
 [  31  250  260   11    0    0    0]
 [   6   88 1072  288    6    0    0]
 [   1    4  205 1314   40    1    0]
 [   0    0    2  126   75    8    0]
 [   0    0    0    9   26   96    0]
 [   0    0    0    0    1    3    0]]
Classification Report:
             precision    recall  f1-score   support

          1       0.60      0.36      0.45       162
          2       0.58      0.45      0.51       552
          3       0.69      0.73      0.71      1460
          4       0.75      0.84      0.79      1565
          5       0.51      0.36      0.42       211
          6       0.89      0.73      0.80       131
          7       0.00      0.00      0.00         4

avg / total       0.69      0.70      0.69      4085

Accuracy Score: 0.7013463892288861


In [114]:
oof_predictions_lvl_one = pd.DataFrame(data={'knn': knn_oof_predictions, 
                                             'xgbc': xgbc_oof_predictions, 
                                             'rfc': rfc_oof_predictions, 
                                             'logr': logr_oof_predictions, 
                                             'gbc': gbc_oof_predictions, 
                                             'svc': svc_oof_predictions, 
                                             'mlp': mlp_oof_predictions})

oof_predictions_lvl_one.head()

Unnamed: 0,gbc,knn,logr,mlp,rfc,svc,xgbc
0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
1,4.0,4.0,4.0,4.0,4.0,4.0,4.0
2,5.0,5.0,5.0,5.0,5.0,5.0,5.0
3,3.0,4.0,4.0,4.0,3.0,3.0,3.0
4,4.0,4.0,4.0,4.0,4.0,4.0,4.0


In [115]:
ITERATIONS = 100
METRIC = 'accuracy'

bayes_cv_tuner = BayesSearchCV(
    estimator = xgb.XGBClassifier(
        n_jobs = 2,
        objective = 'multi:softmax',
        eval_metric = 'mlogloss',
        silent=1,
        tree_method='approx'
    ),
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'min_child_weight': (0, 10),
        'max_depth': (0, 50),
        'max_delta_step': (0, 20),
        'subsample': (0.01, 1.0, 'uniform'),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'colsample_bylevel': (0.01, 1.0, 'uniform'),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'gamma': (1e-9, 0.5, 'log-uniform'),
        'min_child_weight': (0, 5),
        'n_estimators': (50, 100),
        'scale_pos_weight': (1e-6, 500, 'log-uniform')
    },    
    scoring = None,
    cv = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 1,
    n_iter = ITERATIONS,
    verbose = 0,
    refit = True,####
    random_state = 42
)

def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest {}: {}\nBest params: {}\n'.format(
        len(all_models),
        METRIC,
        np.round(bayes_cv_tuner.best_score_, 5),
        bayes_cv_tuner.best_params_
    ))

In [116]:
result = bayes_cv_tuner.fit(oof_predictions_lvl_one.values, y.values, callback=status_print)

Model #1
Best accuracy: 0.85973
Best params: {'max_depth': 21, 'reg_lambda': 0.059360706359120489, 'colsample_bylevel': 0.41600291926478072, 'n_estimators': 87, 'learning_rate': 0.042815319280763466, 'subsample': 0.13556548021189216, 'gamma': 0.13031389926541354, 'min_child_weight': 2, 'max_delta_step': 13, 'colsample_bytree': 0.73044848574555188, 'reg_alpha': 5.4975577392897861e-07, 'scale_pos_weight': 0.060830282487222144}

Model #2
Best accuracy: 0.86193
Best params: {'max_depth': 3, 'reg_lambda': 276.54244755742252, 'colsample_bylevel': 0.83901447199775159, 'n_estimators': 68, 'learning_rate': 0.79881794627812419, 'subsample': 0.99237105986371343, 'gamma': 4.3586846084807948e-07, 'min_child_weight': 1, 'max_delta_step': 17, 'colsample_bytree': 0.88448212460705367, 'reg_alpha': 0.00052669830037015467, 'scale_pos_weight': 0.30164107718431421}

Model #3
Best accuracy: 0.86193
Best params: {'max_depth': 3, 'reg_lambda': 276.54244755742252, 'colsample_bylevel': 0.83901447199775159, 'n_e