In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install catboost
!pip install optuna
!pip install shap

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1.1-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.2 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.1.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.0.3-py3-none-any.whl (348 kB)
[K     |████████████████████████████████| 348 kB 7.4 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 9.1 MB/s 
Collecting cmaes>=0.8.2
  Downloading cmaes-0.9.0-py3-none-any.whl (23 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.8.1-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 56.9 MB/s 
Collecting Ma

In [3]:
import numpy as np
import pandas as pd

import sklearn
import sklearn.metrics
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve, auc, precision_score, recall_score, f1_score, classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve

from scipy import stats as st
from random import randrange
from matplotlib import pyplot

import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestClassifier

import optuna

import shap

In [4]:
#Open csv file.

data = pd.read_csv("/content/drive/MyDrive/ACDF/acdf_final.csv", index_col = 0)
data.head()

Unnamed: 0,AGE,HEIGHT,WEIGHT,PRSODM,PRBUN,PRCREAT,PRWBC,PRHCT,PRPLATE,TOTHLOS,...,READMISSION1_Yes,RACE_Black or African American,RACE_Hispanic,RACE_Other,RACE_Unknown,RACE_White,LEVELS_Single,COMP_Yes,LOS_Yes,DISCHARGE_Yes
5090996,0.712329,0.74359,0.318681,0.5,0.085714,0.073253,0.201207,0.70334,0.270111,0.032258,...,0,0,0,0,0,1,1,0,1,0
5089868,0.575342,0.641026,0.263736,0.629032,0.095238,0.046371,0.158954,0.742633,0.194534,0.032258,...,0,0,0,0,0,1,0,0,1,0
5092013,0.39726,0.589744,0.230769,0.580645,0.060714,0.02957,0.191147,0.626719,0.300135,0.043011,...,0,0,0,0,0,1,0,0,1,1
5121528,0.506849,0.512821,0.178022,0.580645,0.086905,0.047715,0.092555,0.679764,0.188322,0.043011,...,0,0,0,0,0,1,0,0,1,0
5157828,0.616438,0.564103,0.252747,0.629032,0.089286,0.027554,0.094567,0.534381,0.202816,0.010753,...,0,0,1,0,0,0,1,0,0,0


In [5]:
#See potential variables.

print(list(data.columns))

['AGE', 'HEIGHT', 'WEIGHT', 'PRSODM', 'PRBUN', 'PRCREAT', 'PRWBC', 'PRHCT', 'PRPLATE', 'TOTHLOS', 'BMI', 'SEX_male', 'SEX_non-binary', 'INOUT_Outpatient', 'TRANST_Transferred', 'TRANST_Unknown', 'SURGSPEC_Orthopedics', 'DIABETES_Yes', 'SMOKE_Yes', 'DYSPNEA_Yes', 'FNSTATUS2_Partially Dependent', 'FNSTATUS2_Totally Dependent', 'FNSTATUS2_Unknown', 'VENTILAT_Yes', 'HXCOPD_Yes', 'ASCITES_Yes', 'HXCHF_Yes', 'HYPERMED_Yes', 'RENAFAIL_Yes', 'DIALYSIS_Yes', 'DISCANCR_Yes', 'WNDINF_Yes', 'STEROID_Yes', 'WTLOSS_Yes', 'BLEEDDIS_Yes', 'TRANSFUS_Yes', 'ASACLAS_2-Mild Disturb', 'ASACLAS_3-Severe Disturb', 'READMISSION1_Yes', 'RACE_Black or African American', 'RACE_Hispanic', 'RACE_Other', 'RACE_Unknown', 'RACE_White', 'LEVELS_Single', 'COMP_Yes', 'LOS_Yes', 'DISCHARGE_Yes']


In [6]:
#Define predictor variables and outcome of interest.

variables = ['AGE', 'HEIGHT', 'WEIGHT', 'PRSODM', 'PRBUN', 'PRCREAT', 'PRWBC', 'PRHCT', 'PRPLATE', 'BMI', 'SEX_male', 'SEX_non-binary', 'INOUT_Outpatient', 'TRANST_Transferred', 'TRANST_Unknown', 'SURGSPEC_Orthopedics', 'DIABETES_Yes', 'SMOKE_Yes', 'DYSPNEA_Yes', 'FNSTATUS2_Partially Dependent', 'FNSTATUS2_Totally Dependent', 'FNSTATUS2_Unknown', 'VENTILAT_Yes', 'HXCOPD_Yes', 'ASCITES_Yes', 'HXCHF_Yes', 'HYPERMED_Yes', 'RENAFAIL_Yes', 'DIALYSIS_Yes', 'DISCANCR_Yes', 'WNDINF_Yes', 'STEROID_Yes', 'WTLOSS_Yes', 'BLEEDDIS_Yes', 'TRANSFUS_Yes', 'ASACLAS_2-Mild Disturb', 'ASACLAS_3-Severe Disturb', 'RACE_Black or African American', 'RACE_Hispanic', 'RACE_Other', 'RACE_Unknown', 'RACE_White', 'LEVELS_Single', 'LOS_Yes']

In [7]:
#Redefine data.

data = data[variables]

In [8]:
#Define predictor variables (x) and outcome of interest (y).

x = data.drop(['LOS_Yes'], axis = 1)
y = data['LOS_Yes']

In [9]:
#Check data shapes.

print(y.shape)
print(x.shape)

(44903,)
(44903, 43)


In [10]:
#Split data into initial train set and test set in 80:20 ratio.

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
  
#Describe initial train set and test set.

print("Number patients x_train dataset: ", x_train.shape)
print("Number patients y_train dataset: ", y_train.shape)
print("Number patients x_test dataset: ", x_test.shape)
print("Number patients y_test dataset: ", y_test.shape)

Number patients x_train dataset:  (35922, 43)
Number patients y_train dataset:  (35922,)
Number patients x_test dataset:  (8981, 43)
Number patients y_test dataset:  (8981,)


In [11]:
#Split initial train set into final train set and validation set in 75:25 ratio.

train_x, valid_x, train_y, valid_y = train_test_split(x_train, y_train, test_size = 0.25, random_state = 0)
  
#Describe train and validation sets.

print("Number patients train_x dataset: ", train_x.shape)
print("Number patients train_y dataset: ", train_y.shape)
print("Number patients valid_x dataset: ", valid_x.shape)
print("Number patients valid_y dataset: ", valid_y.shape)

Number patients train_x dataset:  (26941, 43)
Number patients train_y dataset:  (26941,)
Number patients valid_x dataset:  (8981, 43)
Number patients valid_y dataset:  (8981,)


In [12]:
#Describe outcome of interest before resampling.

print("Before resampling, counts of label '1': {}".format(sum(train_y == 1)))
print("Before resampling, counts of label '0': {} \n".format(sum(train_y == 0)))

Before resampling, counts of label '1': 3010
Before resampling, counts of label '0': 23931 



In [13]:
#Apply SMOTE.

from imblearn.over_sampling import SMOTE

resampler = SMOTE()
train_x, train_y = resampler.fit_resample(train_x, train_y)

In [14]:
#Describe outcome of interest after resampling.

print("After resampling, counts of label '1': {}".format(sum(train_y == 1)))
print("After resampling, counts of label '0': {} \n".format(sum(train_y == 0)))

After resampling, counts of label '1': 23931
After resampling, counts of label '0': 23931 



# XGBoost

In [15]:
def objective(trial):

    dtrain = xgb.DMatrix(train_x, label=train_y)
    dvalid = xgb.DMatrix(valid_x, label=valid_y)

    param = {
        "verbosity": 0,
        "objective":  trial.suggest_categorical("objective", ["binary:logistic"]),
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    }

    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-auc")
    
    bst = xgb.train(param, dtrain, evals=[(dvalid, "validation")], callbacks=[pruning_callback])
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc


if __name__ == "__main__":
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
    study.optimize(objective, n_trials=100)
    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    xgb_params = {}
    
    for key, value in trial.params.items():
        xgb_params[key] = value

[32m[I 2022-11-21 01:20:01,234][0m A new study created in memory with name: no-name-268e1226-33e9-4ec5-a8a6-16377f8871d6[0m


[0]	validation-auc:0.724456
[1]	validation-auc:0.724456
[2]	validation-auc:0.724411
[3]	validation-auc:0.724411
[4]	validation-auc:0.724411
[5]	validation-auc:0.724411
[6]	validation-auc:0.724411
[7]	validation-auc:0.724411
[8]	validation-auc:0.724411
[9]	validation-auc:0.724411


[32m[I 2022-11-21 01:20:03,703][0m Trial 0 finished with value: 0.6676791273098028 and parameters: {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 0.0002623532591987795, 'alpha': 0.07914480155020225, 'max_depth': 6, 'eta': 0.00030148766295521364, 'gamma': 3.907842215039409e-06, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.6676791273098028.[0m


[0]	validation-auc:0.748372
[1]	validation-auc:0.740806
[2]	validation-auc:0.733132
[3]	validation-auc:0.739011
[4]	validation-auc:0.731353
[5]	validation-auc:0.733257
[6]	validation-auc:0.73665
[7]	validation-auc:0.727524
[8]	validation-auc:0.723571
[9]	validation-auc:0.723329


[32m[I 2022-11-21 01:20:07,050][0m Trial 1 finished with value: 0.5942953184888287 and parameters: {'objective': 'binary:logistic', 'booster': 'dart', 'lambda': 0.015776869351741586, 'alpha': 3.5176712121942825e-05, 'max_depth': 4, 'eta': 0.5731734383796065, 'gamma': 0.0010530303721179338, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 1.2189432280570128e-08, 'skip_drop': 0.0032365042432882167}. Best is trial 0 with value: 0.6676791273098028.[0m


[0]	validation-auc:0.744454
[1]	validation-auc:0.747431
[2]	validation-auc:0.747454
[3]	validation-auc:0.747454
[4]	validation-auc:0.747454
[5]	validation-auc:0.747454
[6]	validation-auc:0.747454
[7]	validation-auc:0.747454
[8]	validation-auc:0.747454
[9]	validation-auc:0.747454


[32m[I 2022-11-21 01:20:10,163][0m Trial 2 finished with value: 0.7040063042210197 and parameters: {'objective': 'binary:logistic', 'booster': 'dart', 'lambda': 0.008723438150310719, 'alpha': 0.190264214312677, 'max_depth': 3, 'eta': 7.925291607586733e-07, 'gamma': 0.047647885795878316, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.0021818432591652654, 'skip_drop': 0.0008958233516658135}. Best is trial 2 with value: 0.7040063042210197.[0m


[0]	validation-auc:0.747454
[1]	validation-auc:0.747454
[2]	validation-auc:0.747454
[3]	validation-auc:0.747454
[4]	validation-auc:0.747454
[5]	validation-auc:0.747454
[6]	validation-auc:0.747454
[7]	validation-auc:0.747454
[8]	validation-auc:0.747454
[9]	validation-auc:0.747454


[32m[I 2022-11-21 01:20:12,616][0m Trial 3 finished with value: 0.7040063042210197 and parameters: {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 4.894752059393354e-05, 'alpha': 0.00038815958962666553, 'max_depth': 3, 'eta': 5.485290801655906e-06, 'gamma': 8.61355735790898e-07, 'grow_policy': 'lossguide'}. Best is trial 2 with value: 0.7040063042210197.[0m


[0]	validation-auc:0.688445
[1]	validation-auc:0.688445
[2]	validation-auc:0.688445
[3]	validation-auc:0.688445
[4]	validation-auc:0.688445
[5]	validation-auc:0.688445
[6]	validation-auc:0.688445
[7]	validation-auc:0.688445
[8]	validation-auc:0.688445
[9]	validation-auc:0.688445


[32m[I 2022-11-21 01:20:13,977][0m Trial 4 finished with value: 0.6884450639618196 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0011682518947666934, 'alpha': 0.0789779486934525}. Best is trial 2 with value: 0.7040063042210197.[0m


[0]	validation-auc:0.764498
[1]	validation-auc:0.767515
[2]	validation-auc:0.768273
[3]	validation-auc:0.769035
[4]	validation-auc:0.769401
[5]	validation-auc:0.769762
[6]	validation-auc:0.770054
[7]	validation-auc:0.770273
[8]	validation-auc:0.77045
[9]	validation-auc:0.77066


[32m[I 2022-11-21 01:20:15,594][0m Trial 5 finished with value: 0.7096894391451372 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.03398885823018588, 'alpha': 3.8204040088256335e-06}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.688445
[1]	validation-auc:0.73582
[2]	validation-auc:0.73582
[3]	validation-auc:0.73582
[4]	validation-auc:0.73582


[32m[I 2022-11-21 01:20:16,693][0m Trial 6 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.76742
[1]	validation-auc:0.768495
[2]	validation-auc:0.76732
[3]	validation-auc:0.765624
[4]	validation-auc:0.764906
[5]	validation-auc:0.763501
[6]	validation-auc:0.762656
[7]	validation-auc:0.762626
[8]	validation-auc:0.762631
[9]	validation-auc:0.762775


[32m[I 2022-11-21 01:20:18,365][0m Trial 7 finished with value: 0.7045403263328549 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 2.7793365805604134e-05, 'alpha': 4.4293367481966895e-06}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.725078
[1]	validation-auc:0.728887
[2]	validation-auc:0.726661
[3]	validation-auc:0.727636
[4]	validation-auc:0.727044


[32m[I 2022-11-21 01:20:21,617][0m Trial 8 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.762221
[1]	validation-auc:0.760981
[2]	validation-auc:0.759793
[3]	validation-auc:0.758962
[4]	validation-auc:0.761333
[5]	validation-auc:0.763474
[6]	validation-auc:0.763474
[7]	validation-auc:0.763474
[8]	validation-auc:0.763474
[9]	validation-auc:0.763474


[32m[I 2022-11-21 01:20:23,252][0m Trial 9 finished with value: 0.6884450639618196 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 2.881401979454205e-06, 'alpha': 0.017221167825162805}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.755129
[1]	validation-auc:0.755134
[2]	validation-auc:0.755472
[3]	validation-auc:0.755785
[4]	validation-auc:0.756073
[5]	validation-auc:0.756275
[6]	validation-auc:0.756433
[7]	validation-auc:0.756546
[8]	validation-auc:0.75664
[9]	validation-auc:0.756698


[32m[I 2022-11-21 01:20:24,811][0m Trial 10 finished with value: 0.6908781493440501 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.9743490546382608, 'alpha': 4.0279385002557053e-08}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.757065
[1]	validation-auc:0.75728
[2]	validation-auc:0.757642
[3]	validation-auc:0.75816
[4]	validation-auc:0.758646
[5]	validation-auc:0.758933
[6]	validation-auc:0.759133
[7]	validation-auc:0.759319
[8]	validation-auc:0.759446
[9]	validation-auc:0.759531


[32m[I 2022-11-21 01:20:27,022][0m Trial 11 finished with value: 0.6898523333783598 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.509787373857539, 'alpha': 9.163796282157402e-07}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.766332
[1]	validation-auc:0.767139
[2]	validation-auc:0.766402
[3]	validation-auc:0.765034
[4]	validation-auc:0.763538
[5]	validation-auc:0.763443
[6]	validation-auc:0.7632
[7]	validation-auc:0.762615
[8]	validation-auc:0.76177
[9]	validation-auc:0.761652


[32m[I 2022-11-21 01:20:28,629][0m Trial 12 finished with value: 0.7018379722092272 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 7.425625547274426e-06, 'alpha': 3.2042190013106705e-06}. Best is trial 5 with value: 0.7096894391451372.[0m


[0]	validation-auc:0.765163
[1]	validation-auc:0.76805
[2]	validation-auc:0.769443
[3]	validation-auc:0.76994
[4]	validation-auc:0.770459
[5]	validation-auc:0.770534
[6]	validation-auc:0.770603
[7]	validation-auc:0.770655
[8]	validation-auc:0.770722
[9]	validation-auc:0.770803


[32m[I 2022-11-21 01:20:29,684][0m Trial 13 finished with value: 0.7102037462570119 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.025877037806891337, 'alpha': 3.502070951304498e-08}. Best is trial 13 with value: 0.7102037462570119.[0m


[0]	validation-auc:0.765749
[1]	validation-auc:0.767124
[2]	validation-auc:0.767994
[3]	validation-auc:0.76867
[4]	validation-auc:0.769394
[5]	validation-auc:0.769697
[6]	validation-auc:0.769914
[7]	validation-auc:0.7701
[8]	validation-auc:0.77025
[9]	validation-auc:0.770356


[32m[I 2022-11-21 01:20:30,171][0m Trial 14 finished with value: 0.7086238751956555 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.04708586930934241, 'alpha': 1.631862667996274e-08}. Best is trial 13 with value: 0.7102037462570119.[0m


[0]	validation-auc:0.716188
[1]	validation-auc:0.714991
[2]	validation-auc:0.710798
[3]	validation-auc:0.70904
[4]	validation-auc:0.70427


[32m[I 2022-11-21 01:20:31,463][0m Trial 15 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.765356
[1]	validation-auc:0.766419
[2]	validation-auc:0.766804
[3]	validation-auc:0.767071
[4]	validation-auc:0.767231
[5]	validation-auc:0.767317
[6]	validation-auc:0.767241
[7]	validation-auc:0.767292
[8]	validation-auc:0.76776
[9]	validation-auc:0.767709


[32m[I 2022-11-21 01:20:31,952][0m Trial 16 finished with value: 0.710257612724646 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.003224797063705759, 'alpha': 0.0005251202414900852}. Best is trial 16 with value: 0.710257612724646.[0m


[0]	validation-auc:0.768668
[1]	validation-auc:0.767922
[2]	validation-auc:0.767628
[3]	validation-auc:0.767561
[4]	validation-auc:0.767428
[5]	validation-auc:0.767245
[6]	validation-auc:0.767195
[7]	validation-auc:0.767239
[8]	validation-auc:0.767445
[9]	validation-auc:0.767433


[32m[I 2022-11-21 01:20:32,445][0m Trial 17 finished with value: 0.7075665788268024 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.002487772766084184, 'alpha': 0.0014726968459449887}. Best is trial 16 with value: 0.710257612724646.[0m


[0]	validation-auc:0.5
[1]	validation-auc:0.5
[2]	validation-auc:0.5
[3]	validation-auc:0.5
[4]	validation-auc:0.5


[32m[I 2022-11-21 01:20:32,833][0m Trial 18 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.5
[1]	validation-auc:0.5
[2]	validation-auc:0.5
[3]	validation-auc:0.5
[4]	validation-auc:0.5


[32m[I 2022-11-21 01:20:34,094][0m Trial 19 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.757762
[1]	validation-auc:0.760825
[2]	validation-auc:0.761841
[3]	validation-auc:0.762809
[4]	validation-auc:0.763413
[5]	validation-auc:0.763853
[6]	validation-auc:0.764162
[7]	validation-auc:0.764398
[8]	validation-auc:0.764565
[9]	validation-auc:0.76468


[32m[I 2022-11-21 01:20:34,605][0m Trial 20 finished with value: 0.6987749607798693 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.20552017866543607, 'alpha': 5.118401756763834e-05}. Best is trial 16 with value: 0.710257612724646.[0m


[0]	validation-auc:0.767063
[1]	validation-auc:0.768268
[2]	validation-auc:0.768983
[3]	validation-auc:0.769078
[4]	validation-auc:0.769432
[5]	validation-auc:0.769749
[6]	validation-auc:0.769994
[7]	validation-auc:0.770213
[8]	validation-auc:0.770415
[9]	validation-auc:0.770614


[32m[I 2022-11-21 01:20:35,086][0m Trial 21 finished with value: 0.7134317913150718 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.019544417722676014, 'alpha': 2.828198569634964e-07}. Best is trial 21 with value: 0.7134317913150718.[0m


[0]	validation-auc:0.763768
[1]	validation-auc:0.767436
[2]	validation-auc:0.768454
[3]	validation-auc:0.768641
[4]	validation-auc:0.768722
[5]	validation-auc:0.768795
[6]	validation-auc:0.768878
[7]	validation-auc:0.768994
[8]	validation-auc:0.769111
[9]	validation-auc:0.769264


[32m[I 2022-11-21 01:20:35,573][0m Trial 22 finished with value: 0.71681323179218 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.006584234200782501, 'alpha': 1.1123134031721667e-07}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.767784
[1]	validation-auc:0.767513
[2]	validation-auc:0.768566
[3]	validation-auc:0.768585
[4]	validation-auc:0.768572
[5]	validation-auc:0.768522
[6]	validation-auc:0.76852
[7]	validation-auc:0.768517
[8]	validation-auc:0.768587
[9]	validation-auc:0.768691


[32m[I 2022-11-21 01:20:36,068][0m Trial 23 finished with value: 0.7129912564611143 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.004494142360443159, 'alpha': 4.0544451162757625e-07}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.763668
[1]	validation-auc:0.767487
[2]	validation-auc:0.76792
[3]	validation-auc:0.766844
[4]	validation-auc:0.765922
[5]	validation-auc:0.765906
[6]	validation-auc:0.765375
[7]	validation-auc:0.765324
[8]	validation-auc:0.765034
[9]	validation-auc:0.764824


[32m[I 2022-11-21 01:20:36,586][0m Trial 24 finished with value: 0.7054325890730058 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.00021778570530417982, 'alpha': 3.3282084984546667e-07}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.759874
[1]	validation-auc:0.76702
[2]	validation-auc:0.768171
[3]	validation-auc:0.768884
[4]	validation-auc:0.769311
[5]	validation-auc:0.769402
[6]	validation-auc:0.769495
[7]	validation-auc:0.769525
[8]	validation-auc:0.769542
[9]	validation-auc:0.769528


[32m[I 2022-11-21 01:20:38,019][0m Trial 25 finished with value: 0.7153753723273139 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.005279989397441149, 'alpha': 9.577638366025764e-08}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.760005
[1]	validation-auc:0.76024
[2]	validation-auc:0.761305
[3]	validation-auc:0.762098
[4]	validation-auc:0.7627


[32m[I 2022-11-21 01:20:38,916][0m Trial 26 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.769466
[1]	validation-auc:0.76932
[2]	validation-auc:0.769758
[3]	validation-auc:0.769734
[4]	validation-auc:0.769216
[5]	validation-auc:0.768801
[6]	validation-auc:0.768235
[7]	validation-auc:0.768027
[8]	validation-auc:0.767795
[9]	validation-auc:0.767118


[32m[I 2022-11-21 01:20:40,241][0m Trial 27 finished with value: 0.7099478964349429 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0006981536914689148, 'alpha': 1.3481975026444445e-08}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.688445
[1]	validation-auc:0.688445
[2]	validation-auc:0.688445
[3]	validation-auc:0.688445
[4]	validation-auc:0.688445


[32m[I 2022-11-21 01:20:40,634][0m Trial 28 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.5
[1]	validation-auc:0.5
[2]	validation-auc:0.50179
[3]	validation-auc:0.501861
[4]	validation-auc:0.501674


[32m[I 2022-11-21 01:20:41,641][0m Trial 29 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.718404
[1]	validation-auc:0.718498
[2]	validation-auc:0.718731
[3]	validation-auc:0.718528
[4]	validation-auc:0.718652


[32m[I 2022-11-21 01:20:42,794][0m Trial 30 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.764517
[1]	validation-auc:0.767691
[2]	validation-auc:0.768406
[3]	validation-auc:0.768384
[4]	validation-auc:0.768226
[5]	validation-auc:0.768082
[6]	validation-auc:0.767998
[7]	validation-auc:0.767948
[8]	validation-auc:0.767982
[9]	validation-auc:0.768024


[32m[I 2022-11-21 01:20:43,288][0m Trial 31 finished with value: 0.7133208785411 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0029118864105746687, 'alpha': 3.953343540636344e-07}. Best is trial 22 with value: 0.71681323179218.[0m


[0]	validation-auc:0.768103
[1]	validation-auc:0.769071
[2]	validation-auc:0.769315
[3]	validation-auc:0.769645
[4]	validation-auc:0.769595
[5]	validation-auc:0.769547
[6]	validation-auc:0.769577
[7]	validation-auc:0.769638
[8]	validation-auc:0.769729
[9]	validation-auc:0.769867


[32m[I 2022-11-21 01:20:43,790][0m Trial 32 finished with value: 0.7184130722405933 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.009145962994172004, 'alpha': 7.136294164902744e-07}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.764801
[1]	validation-auc:0.767665
[2]	validation-auc:0.768541
[3]	validation-auc:0.768824
[4]	validation-auc:0.769049
[5]	validation-auc:0.769278
[6]	validation-auc:0.769504
[7]	validation-auc:0.769702
[8]	validation-auc:0.769909
[9]	validation-auc:0.770103


[32m[I 2022-11-21 01:20:44,286][0m Trial 33 finished with value: 0.7165859369216668 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.01261872388140482, 'alpha': 1.197892201894841e-06}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.74014
[1]	validation-auc:0.743558
[2]	validation-auc:0.741029
[3]	validation-auc:0.744747
[4]	validation-auc:0.74334


[32m[I 2022-11-21 01:20:45,069][0m Trial 34 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768698
[1]	validation-auc:0.768829
[2]	validation-auc:0.768484
[3]	validation-auc:0.768164
[4]	validation-auc:0.767583
[5]	validation-auc:0.767137
[6]	validation-auc:0.766858
[7]	validation-auc:0.766676
[8]	validation-auc:0.766598
[9]	validation-auc:0.766617


[32m[I 2022-11-21 01:20:45,560][0m Trial 35 finished with value: 0.7085015150023517 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0008334394298004768, 'alpha': 1.3962761458320109e-06}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.762335
[1]	validation-auc:0.764914
[2]	validation-auc:0.764716
[3]	validation-auc:0.7639
[4]	validation-auc:0.763522


[32m[I 2022-11-21 01:20:45,906][0m Trial 36 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768107
[1]	validation-auc:0.769093
[2]	validation-auc:0.769319
[3]	validation-auc:0.7693
[4]	validation-auc:0.769554
[5]	validation-auc:0.769668
[6]	validation-auc:0.769635
[7]	validation-auc:0.769682
[8]	validation-auc:0.769752
[9]	validation-auc:0.769858


[32m[I 2022-11-21 01:20:46,403][0m Trial 37 finished with value: 0.7182255253538715 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.008771003446041628, 'alpha': 9.971768167111477e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.759694
[1]	validation-auc:0.762849
[2]	validation-auc:0.76496
[3]	validation-auc:0.766053
[4]	validation-auc:0.766765
[5]	validation-auc:0.767252
[6]	validation-auc:0.767583
[7]	validation-auc:0.76779
[8]	validation-auc:0.767935
[9]	validation-auc:0.768062


[32m[I 2022-11-21 01:20:46,889][0m Trial 38 finished with value: 0.7028952685780804 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.10690924204192767, 'alpha': 2.229430921083397e-06}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.735439
[1]	validation-auc:0.5
[2]	validation-auc:0.5
[3]	validation-auc:0.735439
[4]	validation-auc:0.735439


[32m[I 2022-11-21 01:20:47,372][0m Trial 39 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.730239
[1]	validation-auc:0.715955
[2]	validation-auc:0.732109
[3]	validation-auc:0.728334
[4]	validation-auc:0.735771


[32m[I 2022-11-21 01:20:48,156][0m Trial 40 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.764
[1]	validation-auc:0.766647
[2]	validation-auc:0.768065
[3]	validation-auc:0.768406
[4]	validation-auc:0.768551
[5]	validation-auc:0.768648
[6]	validation-auc:0.768778
[7]	validation-auc:0.768881
[8]	validation-auc:0.768989
[9]	validation-auc:0.76913


[32m[I 2022-11-21 01:20:48,722][0m Trial 41 finished with value: 0.715679428504125 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.006229350795302186, 'alpha': 9.85653123780547e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.768314
[1]	validation-auc:0.768839
[2]	validation-auc:0.76873
[3]	validation-auc:0.76898
[4]	validation-auc:0.768734
[5]	validation-auc:0.768086
[6]	validation-auc:0.76774
[7]	validation-auc:0.767465
[8]	validation-auc:0.767322
[9]	validation-auc:0.767274


[32m[I 2022-11-21 01:20:49,226][0m Trial 42 finished with value: 0.7112862905451693 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0014710806483266906, 'alpha': 9.184129681539122e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.762904
[1]	validation-auc:0.765026
[2]	validation-auc:0.765654
[3]	validation-auc:0.764741
[4]	validation-auc:0.763886


[32m[I 2022-11-21 01:20:49,567][0m Trial 43 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.765817
[1]	validation-auc:0.766742
[2]	validation-auc:0.768165
[3]	validation-auc:0.768936
[4]	validation-auc:0.768939
[5]	validation-auc:0.768968
[6]	validation-auc:0.769058
[7]	validation-auc:0.769153
[8]	validation-auc:0.769313
[9]	validation-auc:0.769528


[32m[I 2022-11-21 01:20:50,076][0m Trial 44 finished with value: 0.7169212191145446 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.007331013046643719, 'alpha': 3.4824221723045034e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.765631
[1]	validation-auc:0.767014
[2]	validation-auc:0.768105
[3]	validation-auc:0.76875
[4]	validation-auc:0.76925
[5]	validation-auc:0.769592
[6]	validation-auc:0.769809
[7]	validation-auc:0.770035
[8]	validation-auc:0.770219
[9]	validation-auc:0.770347


[32m[I 2022-11-21 01:20:50,566][0m Trial 45 finished with value: 0.7099707912736068 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.04639497650750565, 'alpha': 2.1624161610556523e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.767135
[1]	validation-auc:0.767573
[2]	validation-auc:0.767296
[3]	validation-auc:0.767357
[4]	validation-auc:0.766474


[32m[I 2022-11-21 01:20:50,901][0m Trial 46 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768876
[1]	validation-auc:0.769045
[2]	validation-auc:0.768787
[3]	validation-auc:0.76858
[4]	validation-auc:0.768082
[5]	validation-auc:0.767644
[6]	validation-auc:0.76735
[7]	validation-auc:0.767243
[8]	validation-auc:0.767113
[9]	validation-auc:0.767086


[32m[I 2022-11-21 01:20:51,414][0m Trial 47 finished with value: 0.7105275810305577 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0012394408308560058, 'alpha': 1.0001264274560495e-08}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.763831
[1]	validation-auc:0.766144
[2]	validation-auc:0.767746
[3]	validation-auc:0.768836
[4]	validation-auc:0.769251
[5]	validation-auc:0.769567
[6]	validation-auc:0.769859
[7]	validation-auc:0.77011
[8]	validation-auc:0.770332
[9]	validation-auc:0.77053


[32m[I 2022-11-21 01:20:51,905][0m Trial 48 finished with value: 0.7114597189480483 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.022060016433222966, 'alpha': 3.921356072155453e-06}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.756559
[1]	validation-auc:0.758454
[2]	validation-auc:0.75962
[3]	validation-auc:0.760315
[4]	validation-auc:0.760864


[32m[I 2022-11-21 01:20:52,274][0m Trial 49 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.754539
[1]	validation-auc:0.754984
[2]	validation-auc:0.755619
[3]	validation-auc:0.75619
[4]	validation-auc:0.756567


[32m[I 2022-11-21 01:20:52,620][0m Trial 50 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.76753
[1]	validation-auc:0.768882
[2]	validation-auc:0.769182
[3]	validation-auc:0.769211
[4]	validation-auc:0.769463
[5]	validation-auc:0.769398
[6]	validation-auc:0.769416
[7]	validation-auc:0.769472
[8]	validation-auc:0.769571
[9]	validation-auc:0.769681


[32m[I 2022-11-21 01:20:53,130][0m Trial 51 finished with value: 0.7180465004348747 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.007993843140763101, 'alpha': 1.5115539118966002e-07}. Best is trial 32 with value: 0.7184130722405933.[0m


[0]	validation-auc:0.76365
[1]	validation-auc:0.767293
[2]	validation-auc:0.768403
[3]	validation-auc:0.768745
[4]	validation-auc:0.769008
[5]	validation-auc:0.769168
[6]	validation-auc:0.769314
[7]	validation-auc:0.769703
[8]	validation-auc:0.769733
[9]	validation-auc:0.769855


[32m[I 2022-11-21 01:20:53,625][0m Trial 52 finished with value: 0.7189131972718511 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.00970688070089502, 'alpha': 4.374331562903457e-08}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.759713
[1]	validation-auc:0.763644
[2]	validation-auc:0.765269
[3]	validation-auc:0.766045
[4]	validation-auc:0.766783


[32m[I 2022-11-21 01:20:53,967][0m Trial 53 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768923
[1]	validation-auc:0.769879
[2]	validation-auc:0.769432
[3]	validation-auc:0.768784
[4]	validation-auc:0.768361
[5]	validation-auc:0.767919
[6]	validation-auc:0.767629
[7]	validation-auc:0.767456
[8]	validation-auc:0.767401
[9]	validation-auc:0.767401


[32m[I 2022-11-21 01:20:54,490][0m Trial 54 finished with value: 0.711803459511877 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0017803934822023665, 'alpha': 1.91722780221373e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.76169
[1]	validation-auc:0.76369
[2]	validation-auc:0.763289
[3]	validation-auc:0.762568
[4]	validation-auc:0.761895


[32m[I 2022-11-21 01:20:54,838][0m Trial 55 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.721415
[1]	validation-auc:0.728117
[2]	validation-auc:0.731161
[3]	validation-auc:0.734004
[4]	validation-auc:0.734998


[32m[I 2022-11-21 01:20:55,869][0m Trial 56 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768298
[1]	validation-auc:0.769606
[2]	validation-auc:0.770108
[3]	validation-auc:0.769803
[4]	validation-auc:0.76955
[5]	validation-auc:0.769376
[6]	validation-auc:0.769489
[7]	validation-auc:0.769517
[8]	validation-auc:0.769476
[9]	validation-auc:0.769728


[32m[I 2022-11-21 01:20:56,385][0m Trial 57 finished with value: 0.7173133568234379 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.006815948919991419, 'alpha': 1.7036418432822557e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.763203
[1]	validation-auc:0.764781
[2]	validation-auc:0.766635
[3]	validation-auc:0.767798
[4]	validation-auc:0.768476
[5]	validation-auc:0.768922
[6]	validation-auc:0.769246
[7]	validation-auc:0.769461
[8]	validation-auc:0.769612
[9]	validation-auc:0.769722


[32m[I 2022-11-21 01:20:56,886][0m Trial 58 finished with value: 0.7028810229006895 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.06321453863725865, 'alpha': 4.678998703172793e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.747454
[1]	validation-auc:0.747454
[2]	validation-auc:0.747454
[3]	validation-auc:0.747454
[4]	validation-auc:0.747454


[32m[I 2022-11-21 01:20:57,455][0m Trial 59 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768577
[1]	validation-auc:0.769275
[2]	validation-auc:0.769377
[3]	validation-auc:0.769147
[4]	validation-auc:0.768915
[5]	validation-auc:0.768749
[6]	validation-auc:0.768682
[7]	validation-auc:0.768661
[8]	validation-auc:0.768714
[9]	validation-auc:0.768806


[32m[I 2022-11-21 01:20:57,956][0m Trial 60 finished with value: 0.7134203438957398 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0044138462447040035, 'alpha': 1.9031160239860876e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.768091
[1]	validation-auc:0.76907
[2]	validation-auc:0.769458
[3]	validation-auc:0.769399
[4]	validation-auc:0.769444
[5]	validation-auc:0.769641
[6]	validation-auc:0.769733
[7]	validation-auc:0.769784
[8]	validation-auc:0.769835
[9]	validation-auc:0.769913


[32m[I 2022-11-21 01:20:58,464][0m Trial 61 finished with value: 0.7184755878695005 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.009021803243203178, 'alpha': 9.443438305317911e-08}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.763782
[1]	validation-auc:0.768383
[2]	validation-auc:0.769035
[3]	validation-auc:0.769176
[4]	validation-auc:0.769176
[5]	validation-auc:0.769168
[6]	validation-auc:0.769214
[7]	validation-auc:0.769297
[8]	validation-auc:0.769419
[9]	validation-auc:0.769719


[32m[I 2022-11-21 01:20:58,995][0m Trial 62 finished with value: 0.7175463754036169 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.008030170279175968, 'alpha': 2.36588462424308e-08}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.734587
[1]	validation-auc:0.734587
[2]	validation-auc:0.734587
[3]	validation-auc:0.753271
[4]	validation-auc:0.754662


[32m[I 2022-11-21 01:20:59,313][0m Trial 63 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767298
[1]	validation-auc:0.768681
[2]	validation-auc:0.769106
[3]	validation-auc:0.769283
[4]	validation-auc:0.769584
[5]	validation-auc:0.769666
[6]	validation-auc:0.76979
[7]	validation-auc:0.769902
[8]	validation-auc:0.770032
[9]	validation-auc:0.770156


[32m[I 2022-11-21 01:20:59,822][0m Trial 64 finished with value: 0.7186205249175976 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.011595970787693106, 'alpha': 1.5972890900102597e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.764235
[1]	validation-auc:0.767551
[2]	validation-auc:0.76789
[3]	validation-auc:0.767798
[4]	validation-auc:0.76783


[32m[I 2022-11-21 01:21:00,175][0m Trial 65 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.762503
[1]	validation-auc:0.763282
[2]	validation-auc:0.764636
[3]	validation-auc:0.765468
[4]	validation-auc:0.766121


[32m[I 2022-11-21 01:21:00,541][0m Trial 66 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767361
[1]	validation-auc:0.768803
[2]	validation-auc:0.76931
[3]	validation-auc:0.769264
[4]	validation-auc:0.769453
[5]	validation-auc:0.769687
[6]	validation-auc:0.769896
[7]	validation-auc:0.770061
[8]	validation-auc:0.770208
[9]	validation-auc:0.770382


[32m[I 2022-11-21 01:21:01,052][0m Trial 67 finished with value: 0.7163643021640456 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.014400933099424928, 'alpha': 4.487415782527582e-07}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.764101
[1]	validation-auc:0.767136
[2]	validation-auc:0.768227
[3]	validation-auc:0.768787
[4]	validation-auc:0.769343
[5]	validation-auc:0.769714
[6]	validation-auc:0.770019
[7]	validation-auc:0.770266
[8]	validation-auc:0.770433
[9]	validation-auc:0.770589


[32m[I 2022-11-21 01:21:01,562][0m Trial 68 finished with value: 0.7094308546617835 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.033918707374366606, 'alpha': 1.995228650723783e-06}. Best is trial 52 with value: 0.7189131972718511.[0m


[0]	validation-auc:0.748372
[1]	validation-auc:0.748372
[2]	validation-auc:0.748372
[3]	validation-auc:0.748372
[4]	validation-auc:0.748372


[32m[I 2022-11-21 01:21:02,234][0m Trial 69 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.764841
[1]	validation-auc:0.76533
[2]	validation-auc:0.767897
[3]	validation-auc:0.766949
[4]	validation-auc:0.766424


[32m[I 2022-11-21 01:21:02,577][0m Trial 70 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767913
[1]	validation-auc:0.768934
[2]	validation-auc:0.769407
[3]	validation-auc:0.769432
[4]	validation-auc:0.769488
[5]	validation-auc:0.769571
[6]	validation-auc:0.769784
[7]	validation-auc:0.769844
[8]	validation-auc:0.769991
[9]	validation-auc:0.770101


[32m[I 2022-11-21 01:21:03,087][0m Trial 71 finished with value: 0.719083700223123 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.010670271263133628, 'alpha': 1.6193422152528249e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.5
[1]	validation-auc:0.5
[2]	validation-auc:0.5
[3]	validation-auc:0.5
[4]	validation-auc:0.5


[32m[I 2022-11-21 01:21:03,417][0m Trial 72 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.76791
[1]	validation-auc:0.767581
[2]	validation-auc:0.768689
[3]	validation-auc:0.768936
[4]	validation-auc:0.769213
[5]	validation-auc:0.769369
[6]	validation-auc:0.769533
[7]	validation-auc:0.769684
[8]	validation-auc:0.769848
[9]	validation-auc:0.770215


[32m[I 2022-11-21 01:21:03,930][0m Trial 73 finished with value: 0.7178163437095285 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.011145582645543222, 'alpha': 6.527287801615883e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.7675
[1]	validation-auc:0.768592
[2]	validation-auc:0.769145
[3]	validation-auc:0.769328
[4]	validation-auc:0.769555
[5]	validation-auc:0.769788
[6]	validation-auc:0.769956
[7]	validation-auc:0.770135
[8]	validation-auc:0.770518
[9]	validation-auc:0.7706


[32m[I 2022-11-21 01:21:04,426][0m Trial 74 finished with value: 0.7157050580040737 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.01511283100159689, 'alpha': 5.451289612276004e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.718313
[1]	validation-auc:0.719061
[2]	validation-auc:0.718863
[3]	validation-auc:0.71901
[4]	validation-auc:0.719117


[32m[I 2022-11-21 01:21:05,582][0m Trial 75 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.764856
[1]	validation-auc:0.766479
[2]	validation-auc:0.767435
[3]	validation-auc:0.76814
[4]	validation-auc:0.768676
[5]	validation-auc:0.76902
[6]	validation-auc:0.769305
[7]	validation-auc:0.769492
[8]	validation-auc:0.769635
[9]	validation-auc:0.769748


[32m[I 2022-11-21 01:21:06,093][0m Trial 76 finished with value: 0.7025599227884286 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.06289805389567545, 'alpha': 2.1760471118027546e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.768075
[1]	validation-auc:0.769045
[2]	validation-auc:0.76931
[3]	validation-auc:0.769392
[4]	validation-auc:0.769443
[5]	validation-auc:0.76964
[6]	validation-auc:0.769632
[7]	validation-auc:0.769665
[8]	validation-auc:0.769783
[9]	validation-auc:0.770072


[32m[I 2022-11-21 01:21:06,618][0m Trial 77 finished with value: 0.7175918470970744 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.009147859812081927, 'alpha': 1.283383249492928e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.765319
[1]	validation-auc:0.767217
[2]	validation-auc:0.768213
[3]	validation-auc:0.769022
[4]	validation-auc:0.769457
[5]	validation-auc:0.769819
[6]	validation-auc:0.770049
[7]	validation-auc:0.770255
[8]	validation-auc:0.770419
[9]	validation-auc:0.770562


[32m[I 2022-11-21 01:21:07,115][0m Trial 78 finished with value: 0.7090216730174406 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.03854454614498975, 'alpha': 3.146205244811002e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.5
[1]	validation-auc:0.5
[2]	validation-auc:0.5
[3]	validation-auc:0.5
[4]	validation-auc:0.5


[32m[I 2022-11-21 01:21:07,458][0m Trial 79 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.76561
[1]	validation-auc:0.764801
[2]	validation-auc:0.763866
[3]	validation-auc:0.762857
[4]	validation-auc:0.761954


[32m[I 2022-11-21 01:21:07,819][0m Trial 80 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767581
[1]	validation-auc:0.769069
[2]	validation-auc:0.769447
[3]	validation-auc:0.769891
[4]	validation-auc:0.76976
[5]	validation-auc:0.769726
[6]	validation-auc:0.76974
[7]	validation-auc:0.770019
[8]	validation-auc:0.770041
[9]	validation-auc:0.770114


[32m[I 2022-11-21 01:21:08,324][0m Trial 81 finished with value: 0.7187711220785868 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.010192795655429934, 'alpha': 1.0078249728032133e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.768577
[1]	validation-auc:0.769268
[2]	validation-auc:0.769483
[3]	validation-auc:0.769394
[4]	validation-auc:0.769599
[5]	validation-auc:0.769545
[6]	validation-auc:0.769327
[7]	validation-auc:0.76916
[8]	validation-auc:0.769066
[9]	validation-auc:0.769


[32m[I 2022-11-21 01:21:08,853][0m Trial 82 finished with value: 0.714304084668166 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.004416983818665218, 'alpha': 2.9679831663771e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.76769
[1]	validation-auc:0.768654
[2]	validation-auc:0.769636
[3]	validation-auc:0.769801
[4]	validation-auc:0.769772
[5]	validation-auc:0.770086
[6]	validation-auc:0.77009
[7]	validation-auc:0.77018
[8]	validation-auc:0.770254
[9]	validation-auc:0.770373


[32m[I 2022-11-21 01:21:09,384][0m Trial 83 finished with value: 0.717140055614107 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.013153838747752986, 'alpha': 7.940567940032073e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.768821
[1]	validation-auc:0.768071
[2]	validation-auc:0.769414
[3]	validation-auc:0.770135
[4]	validation-auc:0.77014
[5]	validation-auc:0.769969
[6]	validation-auc:0.769293
[7]	validation-auc:0.769098
[8]	validation-auc:0.768646
[9]	validation-auc:0.768405


[32m[I 2022-11-21 01:21:09,910][0m Trial 84 finished with value: 0.7123831441074917 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.002559986979645646, 'alpha': 1.0336203550068813e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.765216
[1]	validation-auc:0.766062
[2]	validation-auc:0.764333
[3]	validation-auc:0.763625
[4]	validation-auc:0.762447


[32m[I 2022-11-21 01:21:10,268][0m Trial 85 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767993
[1]	validation-auc:0.769824
[2]	validation-auc:0.770111
[3]	validation-auc:0.770285
[4]	validation-auc:0.76999
[5]	validation-auc:0.769862
[6]	validation-auc:0.769892
[7]	validation-auc:0.769956
[8]	validation-auc:0.770176
[9]	validation-auc:0.770162


[32m[I 2022-11-21 01:21:10,783][0m Trial 86 finished with value: 0.7184585439340506 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.009980134405254401, 'alpha': 1.5361898825545013e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.735439
[1]	validation-auc:0.735439
[2]	validation-auc:0.735439
[3]	validation-auc:0.735439
[4]	validation-auc:0.735439


[32m[I 2022-11-21 01:21:11,267][0m Trial 87 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.718412
[1]	validation-auc:0.723412
[2]	validation-auc:0.725767
[3]	validation-auc:0.725351
[4]	validation-auc:0.725116


[32m[I 2022-11-21 01:21:12,175][0m Trial 88 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.760827
[1]	validation-auc:0.763293
[2]	validation-auc:0.765112
[3]	validation-auc:0.765217
[4]	validation-auc:0.765707


[32m[I 2022-11-21 01:21:12,511][0m Trial 89 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.766873
[1]	validation-auc:0.768076
[2]	validation-auc:0.768294
[3]	validation-auc:0.769128
[4]	validation-auc:0.769551
[5]	validation-auc:0.769836
[6]	validation-auc:0.770398
[7]	validation-auc:0.770607
[8]	validation-auc:0.77069
[9]	validation-auc:0.770806


[32m[I 2022-11-21 01:21:13,042][0m Trial 90 finished with value: 0.7123178938172997 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.0234303029274127, 'alpha': 1.4414091096330925e-06}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.767841
[1]	validation-auc:0.768873
[2]	validation-auc:0.769217
[3]	validation-auc:0.769222
[4]	validation-auc:0.76957
[5]	validation-auc:0.769581
[6]	validation-auc:0.769747
[7]	validation-auc:0.769853
[8]	validation-auc:0.770118
[9]	validation-auc:0.77018


[32m[I 2022-11-21 01:21:13,548][0m Trial 91 finished with value: 0.7183079467730616 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.011338261816858418, 'alpha': 4.1861785060438675e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.764065
[1]	validation-auc:0.766123
[2]	validation-auc:0.76883
[3]	validation-auc:0.769816
[4]	validation-auc:0.769665
[5]	validation-auc:0.769945
[6]	validation-auc:0.77011
[7]	validation-auc:0.769953
[8]	validation-auc:0.769894
[9]	validation-auc:0.769974


[32m[I 2022-11-21 01:21:14,073][0m Trial 92 finished with value: 0.7165376033733765 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.008794613532266328, 'alpha': 3.292184652508733e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.768622
[1]	validation-auc:0.769272
[2]	validation-auc:0.76954
[3]	validation-auc:0.769667
[4]	validation-auc:0.769295
[5]	validation-auc:0.769298
[6]	validation-auc:0.769163
[7]	validation-auc:0.768924
[8]	validation-auc:0.768837


[32m[I 2022-11-21 01:21:14,568][0m Trial 93 pruned. Trial was pruned at iteration 9.[0m


[0]	validation-auc:0.764364
[1]	validation-auc:0.766139
[2]	validation-auc:0.767138
[3]	validation-auc:0.767875
[4]	validation-auc:0.768367


[32m[I 2022-11-21 01:21:14,935][0m Trial 94 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.767286
[1]	validation-auc:0.769001
[2]	validation-auc:0.769379
[3]	validation-auc:0.769374
[4]	validation-auc:0.769573
[5]	validation-auc:0.769779
[6]	validation-auc:0.769918
[7]	validation-auc:0.770138
[8]	validation-auc:0.770314
[9]	validation-auc:0.770467


[32m[I 2022-11-21 01:21:15,437][0m Trial 95 finished with value: 0.7134573572182465 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.017456717983270385, 'alpha': 2.4769328031545895e-07}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.768924
[1]	validation-auc:0.769156
[2]	validation-auc:0.769013
[3]	validation-auc:0.769511
[4]	validation-auc:0.768783
[5]	validation-auc:0.768233
[6]	validation-auc:0.767903
[7]	validation-auc:0.767936
[8]	validation-auc:0.767711


[32m[I 2022-11-21 01:21:15,963][0m Trial 96 pruned. Trial was pruned at iteration 9.[0m


[0]	validation-auc:0.762008
[1]	validation-auc:0.765984
[2]	validation-auc:0.767483
[3]	validation-auc:0.768564
[4]	validation-auc:0.769185
[5]	validation-auc:0.769573
[6]	validation-auc:0.769877
[7]	validation-auc:0.770117
[8]	validation-auc:0.770302
[9]	validation-auc:0.770438


[32m[I 2022-11-21 01:21:16,469][0m Trial 97 finished with value: 0.7094422384843413 and parameters: {'objective': 'binary:logistic', 'booster': 'gblinear', 'lambda': 0.04219669955805191, 'alpha': 4.199677393732778e-08}. Best is trial 71 with value: 0.719083700223123.[0m


[0]	validation-auc:0.763812
[1]	validation-auc:0.767552
[2]	validation-auc:0.768592
[3]	validation-auc:0.768712
[4]	validation-auc:0.768934


[32m[I 2022-11-21 01:21:16,819][0m Trial 98 pruned. Trial was pruned at iteration 5.[0m


[0]	validation-auc:0.768399
[1]	validation-auc:0.769213
[2]	validation-auc:0.769386
[3]	validation-auc:0.769642
[4]	validation-auc:0.769361
[5]	validation-auc:0.769183
[6]	validation-auc:0.76911
[7]	validation-auc:0.769117
[8]	validation-auc:0.76917


[32m[I 2022-11-21 01:21:17,316][0m Trial 99 pruned. Trial was pruned at iteration 9.[0m


Number of finished trials: 100
Best trial:
  Value: 0.719083700223123
  Params: 
    objective: binary:logistic
    booster: gblinear
    lambda: 0.010670271263133628
    alpha: 1.6193422152528249e-07


In [16]:
#Fit XGBoost.

from xgboost import XGBClassifier

xgb = XGBClassifier(**xgb_params)

xgb.fit(train_x, train_y)

XGBClassifier(alpha=1.6193422152528249e-07, booster='gblinear',
              lambda=0.010670271263133628)

In [17]:
#Make predictions on the test set based on the trained XGBoost model.

preds_xgb = xgb.predict(x_test)

probs_xgb = xgb.predict_proba(x_test)
probs_xgb = probs_xgb[:, 1]

In [18]:
#Evaluate XGBoost model.

xgb_precision = precision_score(preds_xgb,y_test)
xgb_recall = recall_score(preds_xgb,y_test)
xgb_f1 = f1_score(preds_xgb,y_test)
xgb_acc = accuracy_score(preds_xgb,y_test)   
xgb_mcc = matthews_corrcoef(y_test, preds_xgb)
xgb_auroc = roc_auc_score(y_test, probs_xgb)

print("Precision: %.3f" % (xgb_precision))
print("Recall: %.3f" % (xgb_recall))
print("F1 Score: %.3f" %(xgb_f1))
print('Accuracy: %.3f' % (xgb_acc))
print('MCC: %.3f' % (xgb_mcc))
print('AUROC: %.3f' % (xgb_auroc))

Precision: 0.672
Recall: 0.213
F1 Score: 0.323
Accuracy: 0.694
MCC: 0.242
AUROC: 0.756


In [19]:
#Evaluate XGBoost model (PRC and AUPRC).

xgb_precision, xgb_recall, _ = precision_recall_curve(y_test, probs_xgb)
xgb_auprc = auc(xgb_recall, xgb_precision)

print('AUPRC: %.3f' % (xgb_auprc))

AUPRC: 0.257


In [20]:
#Recalculate precision and recall for calculation purposes.

xgb_precision = precision_score(preds_xgb,y_test)
xgb_recall = recall_score(preds_xgb,y_test)

xgb_results = [xgb_precision, xgb_recall, xgb_f1, xgb_acc, xgb_mcc, xgb_auroc, xgb_auprc]

In [21]:
#Recalculate precision recall curve for plotting purposes.

xgb_precision, xgb_recall, _ = precision_recall_curve(y_test, probs_xgb)

# LightGBM

In [22]:
#Hyperparameter tuning for LightGBM.

def objective(trial):
    dtrain = lgb.Dataset(train_x, label=train_y)

    param = {
        "objective":  trial.suggest_categorical("objective", ["binary"]),
        "metric": "binary_logloss",
        "verbosity": -1,
        "boosting_type":  trial.suggest_categorical("boosting_type", ["gbdt"]),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)
    return auc


if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    lgb_params = {}
    
    for key, value in trial.params.items():
        lgb_params[key] = value

[32m[I 2022-11-21 01:21:20,336][0m A new study created in memory with name: no-name-c814529b-47c5-4507-9747-0800cca0a680[0m
[32m[I 2022-11-21 01:21:23,487][0m Trial 0 finished with value: 0.5430843341557667 and parameters: {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.004071459103501498, 'lambda_l2': 0.0004727702953210512, 'num_leaves': 95, 'feature_fraction': 0.7412018375878764, 'bagging_fraction': 0.8776030189908115, 'bagging_freq': 6, 'min_child_samples': 75}. Best is trial 0 with value: 0.5430843341557667.[0m
[32m[I 2022-11-21 01:21:26,040][0m Trial 1 finished with value: 0.5364264514755341 and parameters: {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.0015133838972999627, 'lambda_l2': 0.37037887329047386, 'num_leaves': 78, 'feature_fraction': 0.8342954134394472, 'bagging_fraction': 0.7126752089317332, 'bagging_freq': 2, 'min_child_samples': 43}. Best is trial 0 with value: 0.5430843341557667.[0m
[32m[I 2022-11-21 01:21:28,021][0m Trial

Number of finished trials: 100
Best trial:
  Value: 0.7014286633713364
  Params: 
    objective: binary
    boosting_type: gbdt
    lambda_l1: 2.4979101276990208
    lambda_l2: 3.039019408265432e-08
    num_leaves: 2
    feature_fraction: 0.49911918163841645
    bagging_fraction: 0.6263675478864141
    bagging_freq: 7
    min_child_samples: 94


In [23]:
#Fit LightGBM.

import lightgbm as lgb

lgb = lgb.LGBMClassifier(**lgb_params)

lgb.fit(train_x, train_y)

LGBMClassifier(bagging_fraction=0.6263675478864141, bagging_freq=7,
               feature_fraction=0.49911918163841645,
               lambda_l1=2.4979101276990208, lambda_l2=3.039019408265432e-08,
               min_child_samples=94, num_leaves=2, objective='binary')

In [24]:
#Make predictions on the test set based on the trained model.

preds_lgb = lgb.predict(x_test)

probs_lgb = lgb.predict_proba(x_test)
probs_lgb = probs_lgb[:, 1]

In [25]:
#Evaluate LightGBM model.

lgb_precision = precision_score(preds_lgb,y_test)
lgb_recall = recall_score(preds_lgb,y_test)
lgb_f1 = f1_score(preds_lgb,y_test)
lgb_acc = accuracy_score(preds_lgb,y_test)   
lgb_mcc = matthews_corrcoef(y_test, preds_lgb)
lgb_auroc = roc_auc_score(y_test, probs_lgb)
                          
print("Precision: %.3f" % (lgb_precision))
print("Recall: %.3f" % (lgb_recall))
print("F1 Score: %.3f" %(lgb_f1))
print('Accuracy: %.3f' % (lgb_acc))
print('MCC: %.3f' % (lgb_mcc))
print('AUROC: %.3f' % (lgb_auroc))

Precision: 0.831
Recall: 0.193
F1 Score: 0.313
Accuracy: 0.605
MCC: 0.254
AUROC: 0.764


In [26]:
#Evaluate LightGBM model (PRC and AUPRC).

lgb_precision, lgb_recall, _ = precision_recall_curve(y_test, probs_lgb)
lgb_auprc = auc(lgb_recall, lgb_precision)

print('AUPRC: %.3f' % (lgb_auprc))

AUPRC: 0.252


In [27]:
#Recalculate precision and recall for calculation purposes.

lgb_precision = precision_score(preds_lgb,y_test)
lgb_recall = recall_score(preds_lgb,y_test)

lgb_results = [lgb_precision, lgb_recall, lgb_f1, lgb_acc, lgb_mcc, lgb_auroc, lgb_auprc]

In [28]:
#Recalculate precision recall curve for plotting purposes.

lgb_precision, lgb_recall, _ = precision_recall_curve(y_test, probs_lgb)

# CatBoost

In [29]:
#Hyperparameter tuning for CatBoost.

from optuna.integration import CatBoostPruningCallback

def objective(trial: optuna.Trial) -> float:

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
        "eval_metric": "AUC",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1, log=True)

    gbm = cb.CatBoostClassifier(**param)

    pruning_callback = CatBoostPruningCallback(trial, "AUC")
    gbm.fit(
        train_x,
        train_y,
        eval_set=[(valid_x, valid_y)],
        verbose=0,
        early_stopping_rounds=100,
        callbacks=[pruning_callback],
    )

    # evoke pruning manually.
    pruning_callback.check_pruned()

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc

if __name__ == "__main__":
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    cb_params = {}
    
    for key, value in trial.params.items():
        cb_params[key] = value

[32m[I 2022-11-21 01:24:51,413][0m A new study created in memory with name: no-name-419ae43a-2d91-4481-8dd8-dad869720be6[0m
CatBoostPruningCallback is experimental (supported from v3.0.0). The interface can change in the future.
[32m[I 2022-11-21 01:25:01,487][0m Trial 0 finished with value: 0.7093627425167583 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.02966096210335524, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 5.215985307677363}. Best is trial 0 with value: 0.7093627425167583.[0m
CatBoostPruningCallback is experimental (supported from v3.0.0). The interface can change in the future.
[32m[I 2022-11-21 01:25:05,674][0m Trial 1 finished with value: 0.7088255405662199 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.04461505488626063, 'depth': 11, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 9.992787568810996}. Best is trial 0 with value: 0.709362742516758

Number of finished trials: 100
Best trial:
  Value: 0.7161540512289822
  Params: 
    objective: CrossEntropy
    colsample_bylevel: 0.05888279602035283
    depth: 5
    boosting_type: Plain
    bootstrap_type: Bernoulli
    subsample: 0.14630926431770105


In [30]:
#Fit CatBoost.

from catboost import CatBoostClassifier

cb = CatBoostClassifier(**cb_params)

cb.fit(train_x, train_y)

0:	learn: 0.6917292	total: 8.5ms	remaining: 8.49s
1:	learn: 0.6819312	total: 34.1ms	remaining: 17s
2:	learn: 0.6795486	total: 63.2ms	remaining: 21s
3:	learn: 0.6700548	total: 86.5ms	remaining: 21.5s
4:	learn: 0.6681464	total: 110ms	remaining: 21.9s
5:	learn: 0.6671527	total: 132ms	remaining: 21.9s
6:	learn: 0.6660488	total: 156ms	remaining: 22.1s
7:	learn: 0.6641832	total: 179ms	remaining: 22.2s
8:	learn: 0.6639512	total: 188ms	remaining: 20.8s
9:	learn: 0.6633325	total: 203ms	remaining: 20s
10:	learn: 0.6551065	total: 225ms	remaining: 20.2s
11:	learn: 0.6542789	total: 252ms	remaining: 20.8s
12:	learn: 0.6537109	total: 277ms	remaining: 21.1s
13:	learn: 0.6460765	total: 307ms	remaining: 21.6s
14:	learn: 0.6385497	total: 338ms	remaining: 22.2s
15:	learn: 0.6380351	total: 360ms	remaining: 22.1s
16:	learn: 0.6370556	total: 384ms	remaining: 22.2s
17:	learn: 0.6370534	total: 398ms	remaining: 21.7s
18:	learn: 0.6309788	total: 419ms	remaining: 21.6s
19:	learn: 0.6304138	total: 444ms	remaining:

<catboost.core.CatBoostClassifier at 0x7ff8ea8d7d50>

In [31]:
#Make predictions on the test set based on the trained model.

preds_cb = cb.predict(x_test)

probs_cb = cb.predict_proba(x_test)
probs_cb = probs_cb[:, 1]

In [32]:
#Evaluate CatBoost model.

cb_precision = precision_score(preds_cb,y_test)
cb_recall = recall_score(preds_cb,y_test)
cb_f1 = f1_score(preds_cb,y_test)
cb_acc = accuracy_score(preds_cb,y_test)   
cb_mcc = matthews_corrcoef(y_test, preds_cb)
cb_auroc = roc_auc_score(y_test, probs_cb)
                          
print("Precision: %.3f" % (cb_precision))
print("Recall: %.3f" % (cb_recall))
print("F1 Score: %.3f" %(cb_f1))
print('Accuracy: %.3f' % (cb_acc))
print('MCC: %.3f' % (cb_mcc))
print('AUROC: %.3f' % (cb_auroc))

Precision: 0.578
Recall: 0.219
F1 Score: 0.318
Accuracy: 0.731
MCC: 0.226
AUROC: 0.749


In [33]:
#Evaluate XGBoost model (PRC and AUPRC).

cb_precision, cb_recall, _ = precision_recall_curve(y_test, probs_cb)
cb_auprc = auc(cb_recall, cb_precision)

print('AUPRC: %.3f' % (cb_auprc))

AUPRC: 0.242


In [34]:
#Recalculate precision and recall for calculation purposes.

cb_precision = precision_score(preds_cb,y_test)
cb_recall = recall_score(preds_cb,y_test)

cb_results = [cb_precision, cb_recall, cb_f1, cb_acc, cb_mcc, cb_auroc, cb_auprc]

In [35]:
#Recalculate precision recall curve for plotting purposes.

cb_precision, cb_recall, _ = precision_recall_curve(y_test, probs_cb)

# Random Forest

In [36]:
#Hyperparameter tuning for Random Forest.

def objective(trial):
    
    param = {
        "criterion": trial.suggest_categorical("criterion", ["gini", "entropy"]),
        "bootstrap": trial.suggest_categorical("bootstrap", ["auto", "sqrt"]),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt","log2", None]),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "n_estimators": trial.suggest_int("n_estimators", 100, 2000, 100),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4, 1),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10, 1),
    }

    rf = RandomForestClassifier(**param)

    rf.fit(
        train_x,
        train_y,
    )

    preds = rf.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc


if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    rf_params = {}
    
    for key, value in trial.params.items():
        rf_params[key] = value

[32m[I 2022-11-21 01:26:48,849][0m A new study created in memory with name: no-name-61227c0d-9897-4515-ba14-d67c2aff035d[0m
[32m[I 2022-11-21 01:27:36,450][0m Trial 0 finished with value: 0.6376440244343895 and parameters: {'criterion': 'gini', 'bootstrap': 'auto', 'max_features': 'log2', 'max_depth': 35, 'n_estimators': 500, 'min_samples_leaf': 3, 'min_samples_split': 5}. Best is trial 0 with value: 0.6376440244343895.[0m
[32m[I 2022-11-21 01:49:23,202][0m Trial 1 finished with value: 0.517444531529627 and parameters: {'criterion': 'entropy', 'bootstrap': 'auto', 'max_features': None, 'max_depth': 63, 'n_estimators': 1600, 'min_samples_leaf': 4, 'min_samples_split': 7}. Best is trial 0 with value: 0.6376440244343895.[0m


Number of finished trials: 2
Best trial:
  Value: 0.6376440244343895
  Params: 
    criterion: gini
    bootstrap: auto
    max_features: log2
    max_depth: 35
    n_estimators: 500
    min_samples_leaf: 3
    min_samples_split: 5


In [37]:
#Fit Random Forest.

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(**rf_params)

rf.fit(train_x, train_y)

RandomForestClassifier(bootstrap='auto', max_depth=35, max_features='log2',
                       min_samples_leaf=3, min_samples_split=5,
                       n_estimators=500)

In [38]:
#Make predictions on the test set based on the trained model.

preds_rf = rf.predict(x_test)

probs_rf = rf.predict_proba(x_test)
probs_rf = probs_rf[:, 1]

In [39]:
#Evaluate Random Forest model.

rf_precision = precision_score(preds_rf,y_test)
rf_recall = recall_score(preds_rf,y_test)
rf_f1 = f1_score(preds_rf,y_test)
rf_acc = accuracy_score(preds_rf,y_test)   
rf_mcc = matthews_corrcoef(y_test, preds_rf)
rf_auroc = roc_auc_score(y_test, probs_rf)
                          
print("Precision: %.3f" % (rf_precision))
print("Recall: %.3f" % (rf_recall))
print("F1 Score: %.3f" %(rf_f1))
print('Accuracy: %.3f' % (rf_acc))
print('MCC: %.3f' % (rf_mcc))
print('AUROC: %.3f' % (rf_auroc))

Precision: 0.403
Recall: 0.252
F1 Score: 0.310
Accuracy: 0.806
MCC: 0.212
AUROC: 0.759


In [40]:
#Evaluate Random Forest model (PRC and AUPRC).

rf_precision, rf_recall, _ = precision_recall_curve(y_test, probs_rf)
rf_auprc = auc(rf_recall, rf_precision)

print('AUPRC: %.3f' % (rf_auprc))

AUPRC: 0.234


In [41]:
#Recalculate precision and recall for calculation purposes.

rf_precision = precision_score(preds_rf,y_test)
rf_recall = recall_score(preds_rf,y_test)

rf_results = [rf_precision, rf_recall, rf_f1, rf_acc, rf_mcc, rf_auroc, rf_auprc]

In [42]:
#Recalculate precision recall curve for plotting purposes.

rf_precision, rf_recall, _ = precision_recall_curve(y_test, probs_rf)

# ROC, PR, and Calibration Plots

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

xgb_fpr, xgb_tpr, _ = roc_curve(y_test, probs_xgb)
pyplot.plot(xgb_fpr, xgb_tpr, label='XGBoost AUROC: {:.3f}'.format(xgb_auroc), color='red')

lgb_fpr, lgb_tpr, _ = roc_curve(y_test, probs_lgb)
pyplot.plot(lgb_fpr, lgb_tpr, label='LightGBM AUROC: {:.3f}'.format(lgb_auroc), color='darkblue')

cb_fpr, cb_tpr, _ = roc_curve(y_test, probs_cb)
pyplot.plot(cb_fpr, cb_tpr, label='CatBoost AUROC: {:.3f}'.format(cb_auroc), color = 'darkgreen')

rf_fpr, rf_tpr, _ = roc_curve(y_test, probs_rf)
pyplot.plot(rf_fpr, rf_tpr, label='Random Forest AUROC: {:.3f}'.format(rf_auroc), color = 'orange')

pyplot.plot([0, 1], [0, 1], linestyle = '--')

pyplot.title('Receiver Operating Characteristic Curve', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
pyplot.xlabel('False Positive Rate', fontsize = 16, labelpad = 10)
pyplot.ylabel('True Positive Rate', fontsize = 16, labelpad = 10)
pyplot.tick_params(axis="y",direction="out")
pyplot.tick_params(axis="x",direction="out")

leg = pyplot.legend(loc = 'lower right', fontsize = 12)

pyplot.savefig('/content/drive/MyDrive/ACDF/los_roc_us.png', dpi=300)
pyplot.show()

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

pyplot.plot(xgb_recall, xgb_precision, label='XGBoost AUPRC: {:.3f}'.format(xgb_auprc), color = 'red')
pyplot.plot(lgb_recall, lgb_precision, label='LightGBM AUPRC: {:.3f}'.format(lgb_auprc), color = 'darkblue')
pyplot.plot(cb_recall, cb_precision, label='CatBoost AUPRC: {:.3f}'.format(cb_auprc), color = 'darkgreen')
pyplot.plot(rf_recall, rf_precision, label='Random Forest AUPRC: {:.3f}'.format(rf_auprc), color = 'orange')


pyplot.title('Precision Recall Curve', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
pyplot.xlabel('Recall', fontsize = 16, labelpad = 10)
pyplot.ylabel('Precision', fontsize = 16, labelpad = 10)
leg = pyplot.legend(loc = 'lower right', fontsize = 12)

pyplot.savefig('/content/drive/MyDrive/ACDF/los_prc_us.png', dpi=300)
pyplot.show()

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

x_cal_xgb, y_cal_xgb = calibration_curve(y_test, probs_xgb, n_bins = 10, normalize = True)
x_cal_lgb, y_cal_lgb = calibration_curve(y_test, probs_lgb, n_bins = 10, normalize = True)
x_cal_cb, y_cal_cb = calibration_curve(y_test, probs_cb, n_bins = 10, normalize = True)
x_cal_rf, y_cal_rf = calibration_curve(y_test, probs_rf, n_bins = 10, normalize = True)


pyplot.plot([0, 1], [0, 1], linestyle = '--', label = 'Ideally Calibrated')

pyplot.plot(y_cal_xgb, x_cal_xgb, label = 'XGBoost', color = 'red')
pyplot.plot(y_cal_lgb, x_cal_lgb, label = 'LightGBM', color = 'darkblue')
pyplot.plot(y_cal_cb, x_cal_cb, label = 'CatBoost', color = 'darkgreen')
pyplot.plot(y_cal_rf, x_cal_xgb, label = 'Random Forest', color = 'orange')

pyplot.title('Calibration', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
leg = pyplot.legend(loc = 'lower right', fontsize = 12)
pyplot.xlabel('Average Predicted Probability in each bin', fontsize = 16, labelpad = 10)
pyplot.ylabel('Ratio of positives', fontsize = 16, labelpad = 10)

pyplot.savefig('/content/drive/MyDrive/ACDF/los_cal_us.png', dpi=300)
pyplot.show()

# Results Summary

In [None]:
results = {'XGBoost':xgb_results, 'LightGBM':lgb_results, 'CatBoost':cb_results, 'Random Forest':rf_results}

results = pd.DataFrame(results, columns = ['XGBoost', 'LightGBM', 'CatBoost', 'Random Forest'])

results

In [None]:
results = pd.DataFrame({'XGBoost':xgb_results, 'LightGBM':lgb_results, 'CatBoost':cb_results, 'Random Forest':rf_results})

results = results.T

results.columns = ['Precision', 'Recall', 'F1', 'Accuracy', 'MCC', 'AUROC', 'AUPRC']

results.to_csv('/content/drive/MyDrive/ACDF/los_results_us.csv')

results

# SHAP Plots

In [None]:
# Fits the explainer
xgb_explainer = shap.Explainer(xgb.predict, x_test)
lgb_explainer = shap.Explainer(lgb.predict, x_test)
cb_explainer = shap.Explainer(cb.predict, x_test)
rf_explainer = shap.Explainer(rf.predict, x_test)

# Calculates the SHAP values - It takes some time
xgb_shap_values = xgb_explainer(x_test)
lgb_shap_values = lgb_explainer(x_test)
cb_shap_values = cb_explainer(x_test)
rf_shap_values = rf_explainer(x_test)

In [None]:
from scipy.special import softmax

def print_feature_importances_shap_values(shap_values, features):
    '''
    Prints the feature importances based on SHAP values in an ordered way
    shap_values -> The SHAP values calculated from a shap.Explainer object
    features -> The name of the features, on the order presented to the explainer
    '''
    # Calculates the feature importance (mean absolute shap value) for each feature
    importances = []
    for i in range(shap_values.values.shape[1]):
        importances.append(np.mean(np.abs(shap_values.values[:, i])))
    # Calculates the normalized version
    importances_norm = softmax(importances)
    # Organize the importances and columns in a dictionary
    feature_importances = {fea: imp for imp, fea in zip(importances, features)}
    feature_importances_norm = {fea: imp for imp, fea in zip(importances_norm, features)}
    # Sorts the dictionary
    feature_importances = {k: v for k, v in sorted(feature_importances.items(), key=lambda item: item[1], reverse = True)}
    feature_importances_norm= {k: v for k, v in sorted(feature_importances_norm.items(), key=lambda item: item[1], reverse = True)}
    # Prints the feature importances
    for k, v in feature_importances.items():
        print(f"{k} -> {v:.4f} (softmax = {feature_importances_norm[k]:.4f})")

In [None]:
shap.plots.bar(xgb_shap_values, max_display = 10, show=False)
pyplot.savefig('/content/drive/MyDrive/ACDF/los_shap_xgb.png', dpi=300)

In [None]:
shap.plots.bar(lgb_shap_values, max_display = 10, show=False)
pyplot.savefig('/content/drive/MyDrive/ACDF/los_shap_lgb.png', dpi=300)

In [None]:
shap.plots.bar(cb_shap_values, max_display = 10, show=False)
pyplot.savefig('/content/drive/MyDrive/ACDF/los_shap_cb.png', dpi=300)

In [None]:
shap.plots.bar(rf_shap_values, max_display = 10, show=False)
pyplot.savefig('/content/drive/MyDrive/ACDF/los_shap_rf.png', dpi=300)