In [None]:
import pandas as pd
import numpy as np
import time
import random
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
sns.set_theme(style="white", palette="rocket_r")

# For reproducibility
np.random.seed(42)

In [None]:
# Load eICU and MIMIC-IV for 6 hour mortality prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_6.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_6.npy')
y_train_eICU  = np.load('Revised Experiments/y_train_static_6.npy')
y_test_eICU  = np.load('Revised Experiments/y_test_static_6.npy')

X_eICU = np.vstack((X_train_eICU, X_test_eICU))

X_train_MIMICIV = np.load('MIMIC-IV/MIMICIV_data/X_train_static_6.npy')
X_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/X_test_static_6.npy')
y_train_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_train_static_6.npy')
y_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_test_static_6.npy')

X_MIMICIV = np.vstack((X_train_MIMICIV, X_test_MIMICIV))

In [None]:
y_eICU = np.vstack((y_train_eICU.reshape(-1, 1), y_test_eICU.reshape(-1, 1)))
y_MIMICIV = np.vstack((y_train_MIMICIV.reshape(-1, 1), y_test_MIMICIV.reshape(-1, 1)))

In [None]:
X_eICU = np.hstack((X_eICU, y_eICU.reshape(-1, 1)))
X_MIMICIV = np.hstack((X_MIMICIV, y_MIMICIV.reshape(-1, 1)))

In [None]:
X_eICU.shape, X_MIMICIV.shape

In [None]:
# Plot and compare age
X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 2] < 18) | (X_MIMICIV[:, 2] > 89))[0], axis=0)

f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 4], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 2], color="green", ax=axes[1])
plt.xlabel('Age', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare lactate_mean
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 104], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 52], color="green", ax=axes[1])
plt.xlabel('Lactate', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare SBP_mean
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 65], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 115], color="green", ax=axes[1])
plt.xlabel('SBP', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare SBP_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 68], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 152], color="green", ax=axes[1])
plt.xlabel('SBP_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Glucose_mean
X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 107] > 500))[0], axis=0)

f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 103], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 107], color="green", ax=axes[1])
plt.xlabel('Glucose', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Glucose_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 149], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 144], color="green", ax=axes[1])
plt.xlabel('Glucose_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare WBC
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 73], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 67], color="green", ax=axes[1])
plt.xlabel('WBC', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare WBC_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 119], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 97], color="green", ax=axes[1])
plt.xlabel('WBC_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# CANNOT USE THIS ONE, DIFFERENT UNITS

# Plot and compare Neutrophils
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 75], color="red", ax=axes[0])
#sns.histplot(X_MIMICIV[:, 64], color="green", ax=axes[1])
plt.xlabel('Neutrophils', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare RDW
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 93], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 62], color="green", ax=axes[1])
plt.xlabel('RDW', loc='left')
plt.tight_layout()
plt.show()

In [None]:
X_eICU = np.delete(X_eICU, np.where(
    (X_eICU[:, 139] > 5))[0], axis=0)

# Plot and compare RDW_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 139], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 92], color="green", ax=axes[1])
plt.xlabel('RDW_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Urea Nitrogen
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 78], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 66], color="green", ax=axes[1])
plt.xlabel('Urea Nitrogen', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Urea Nitrogen_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 124], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 96], color="green", ax=axes[1])
plt.xlabel('Urea Nitrogen_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
#X_MIMICIV[:, 37] = X_MIMICIV[:, 37] * 5

#CANT USE

# Plot and compare AST
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 77], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 37], color="green", ax=axes[1])
plt.xlabel('AST', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Bicarbonate
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 99], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 39], color="green", ax=axes[1])
plt.xlabel('Bicarbonate', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Plot and compare Bicarbonate_std
f, axes = plt.subplots(1, 2, sharex=True)
sns.histplot(X_eICU[:, 145], color="red", ax=axes[0])
sns.histplot(X_MIMICIV[:, 72], color="green", ax=axes[1])
plt.xlabel('Bicarbonate_std', loc='left')
plt.tight_layout()
plt.show()

In [None]:
# Display summary statistics like mean and std
print('Age: ')
print(np.mean(X_eICU[:, 4]), np.std(X_eICU[:, 4]))
print(np.mean(X_MIMICIV[:, 2]), np.std(X_MIMICIV[:, 2]))
print('################################################')
print('Lactate: ')
print(np.nanmean(X_eICU[:, 104]), np.nanstd(X_eICU[:, 104]))
print(np.nanmean(X_MIMICIV[:, 52]), np.nanstd(X_MIMICIV[:, 52]))
print('################################################')
print('SBP: ')
print(np.nanmean(X_eICU[:, 65]), np.nanstd(X_eICU[:, 65]))
print(np.nanmean(X_MIMICIV[:, 115]), np.nanstd(X_MIMICIV[:, 115]))
print('################################################')
print('Glucose: ')
print(np.nanmean(X_eICU[:, 103]), np.nanstd(X_eICU[:, 103]))
print(np.nanmean(X_MIMICIV[:, 107]), np.nanstd(X_MIMICIV[:, 107]))
print('################################################')
print('WBC: ')
print(np.nanmean(X_eICU[:, 73]), np.nanstd(X_eICU[:, 73]))
print(np.nanmean(X_MIMICIV[:, 67]), np.nanstd(X_MIMICIV[:, 67]))
print('################################################')
print('RDW: ')
print(np.nanmean(X_eICU[:, 93]), np.nanstd(X_eICU[:, 93]))
print(np.nanmean(X_MIMICIV[:, 62]), np.nanstd(X_MIMICIV[:, 62]))
print('################################################')
print('Urea Nitrogen: ')
print(np.nanmean(X_eICU[:, 78]), np.nanstd(X_eICU[:, 78]))
print(np.nanmean(X_MIMICIV[:, 66]), np.nanstd(X_MIMICIV[:, 66]))
print('################################################')
print('Bicarbonate: ')
print(np.nanmean(X_eICU[:, 99]), np.nanstd(X_eICU[:, 99]))
print(np.nanmean(X_MIMICIV[:, 39]), np.nanstd(X_MIMICIV[:, 39]))
print('################################################')

# Train on eICU and validate on MIMIC-IV

In [None]:
y_eICU = X_eICU[:, -1]
y_MIMICIV = X_MIMICIV[:, -1]

X_eICU = X_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_MIMICIV_original = X_MIMICIV.copy()
X_MIMICIV = X_MIMICIV[:, [2, 52, 115, 152, 107, 144, 67, 97, 62, 92, 66, 96, 39, 72]]

In [None]:
X_eICU.shape, X_MIMICIV.shape, y_eICU.shape, X_MIMICIV_original.shape

In [None]:
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from skopt import BayesSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from imblearn.pipeline import Pipeline
import shap
from sklearn.inspection import permutation_importance
# explicitly require this experimental feature
from sklearn.experimental import enable_iterative_imputer  # noqa
# now you can import normally from sklearn.impute
from sklearn.impute import IterativeImputer
from xgboost import plot_importance

In [None]:
# Get the validation folds
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=33)

In [None]:
imp_mean = IterativeImputer(random_state=0, max_iter = 10)

In [None]:
imp_mean.fit(X_eICU)
X_train_res = imp_mean.transform(X_eICU)
X_test_res = imp_mean.transform(X_MIMICIV)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
np.unique(y_eICU, return_counts=True)

In [None]:
23074/3138

In [None]:
np.unique(y_MIMICIV, return_counts=True)

In [None]:
1248/162

In [None]:
# Tune the XGBoost model using Bayesian optimisation
# Define the hyperparameters you want to sweep through (important it is manual for generalisation)
# C for regularisation if doing regression
# kernel if doing SVM for example

# In this case we are tuning for XGBoost hyperparameters
# Depth of tree
max_depth = np.linspace(1, 12, 12, endpoint=True)
max_depth = [round(x) for x in max_depth]
# maximum features
n_estimators = list(range(50, 400, 50))
# Learning rate
lr = [0.001, 0.01, 0.1, 0.3, 0.5, 0.7]
# Regularisation for imbalanced data
max_delta_step = [0, 1, 3, 5, 7, 10]
# Gamma for overfitting control
min_split_loss = [0, 0.5]
# Balance weights for imbalanced classes for AUC
scale_pos_weight = [7.353091140854048]

# Create the grid
param_grid = {'m__max_depth': max_depth,
               'm__n_estimators': n_estimators,
               'm__learning_rate': lr,
              'm__max_delta_step': max_delta_step,
             'm__min_split_loss': min_split_loss,
             'm__scale_pos_weight': scale_pos_weight}

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_eICU, np.ravel(y_eICU))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.353091140854048, max_depth = 3, n_estimators = 200,learning_rate = 0.1, min_split_loss = 0.0, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_eICU))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:

# Evaluate only on specific subpopulations
y_test_men = y_MIMICIV[np.where(X_MIMICIV_original[:,1] == 1)[0]]
X_test_men = X_test_res[np.where(X_MIMICIV_original[:,1] == 1)[0], :]
# Test results without undersampling or thresholding
print('Results for (Men)')
print('AUROC is:', metrics.roc_auc_score(y_test_men, XGBOOSTmodel.predict_proba(X_test_men)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test_men, XGBOOSTmodel.predict(X_test_men)))
print('Average Precision is:', metrics.average_precision_score(y_test_men, XGBOOSTmodel.predict_proba(X_test_men)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test_men, XGBOOSTmodel.predict(X_test_men), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test_men, XGBOOSTmodel.predict(X_test_men)))
print('#################################################')
y_test_women = y_MIMICIV[np.where(X_MIMICIV_original[:,1] == 0)[0]]
X_test_women = X_test_res[np.where(X_MIMICIV_original[:,1] == 0)[0], :]
# Test results without undersampling or thresholding
print('Results for (Women)')
print('AUROC is:', metrics.roc_auc_score(y_test_women, XGBOOSTmodel.predict_proba(X_test_women)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test_women, XGBOOSTmodel.predict(X_test_women)))
print('Average Precision is:', metrics.average_precision_score(y_test_women, XGBOOSTmodel.predict_proba(X_test_women)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test_women, XGBOOSTmodel.predict(X_test_women), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test_women, XGBOOSTmodel.predict(X_test_women)))
# Evaluate only on specific subpopulations
y_test_caucasian = y_MIMICIV[np.where((X_MIMICIV_original[:,12] == 1).astype(int) | (X_MIMICIV_original[:,13] == 1).astype(int))[0]]
X_test_caucasian = X_test_res[np.where((X_MIMICIV_original[:,12] == 1).astype(int) | (X_MIMICIV_original[:,13] == 1).astype(int))[0], :]
# Test results without undersampling or thresholding
print('Results for Caucasian')
print('AUROC is:', metrics.roc_auc_score(y_test_caucasian, XGBOOSTmodel.predict_proba(X_test_caucasian)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test_caucasian, XGBOOSTmodel.predict(X_test_caucasian)))
print('Average Precision is:', metrics.average_precision_score(y_test_caucasian, XGBOOSTmodel.predict_proba(X_test_caucasian)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test_caucasian, XGBOOSTmodel.predict(X_test_caucasian), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test_caucasian, XGBOOSTmodel.predict(X_test_caucasian)))
print('#################################################')
y_test_black_hispanic = y_MIMICIV[np.where(X_MIMICIV_original[:,9] == 1)[0]]
X_test_black_hispanic = X_test_res[np.where(X_MIMICIV_original[:,9] == 1)[0], :]
# Test results without undersampling or thresholding
print('Results for Black/Hispanic')
print('AUROC is:', metrics.roc_auc_score(y_test_black_hispanic, XGBOOSTmodel.predict_proba(X_test_black_hispanic)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test_black_hispanic, XGBOOSTmodel.predict(X_test_black_hispanic)))
print('Average Precision is:', metrics.average_precision_score(y_test_black_hispanic, XGBOOSTmodel.predict_proba(X_test_black_hispanic)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test_black_hispanic, XGBOOSTmodel.predict(X_test_black_hispanic), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test_black_hispanic, XGBOOSTmodel.predict(X_test_black_hispanic)))

In [None]:
# For 12 hour prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_12.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_12.npy')
y_train_eICU  = np.load('Revised Experiments/y_train_static_12.npy')
y_test_eICU  = np.load('Revised Experiments/y_test_static_12.npy')

X_eICU = np.vstack((X_train_eICU, X_test_eICU))

X_train_MIMICIV = np.load('MIMIC-IV/MIMICIV_data/X_train_static_12.npy')
X_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/X_test_static_12.npy')
y_train_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_train_static_12.npy')
y_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_test_static_12.npy')

X_MIMICIV = np.vstack((X_train_MIMICIV, X_test_MIMICIV))

y_eICU = np.vstack((y_train_eICU.reshape(-1, 1), y_test_eICU.reshape(-1, 1)))
y_MIMICIV = np.vstack((y_train_MIMICIV.reshape(-1, 1), y_test_MIMICIV.reshape(-1, 1)))

X_eICU = np.hstack((X_eICU, y_eICU.reshape(-1, 1)))
X_MIMICIV = np.hstack((X_MIMICIV, y_MIMICIV.reshape(-1, 1)))

# Process features
X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 2] < 18) | (X_MIMICIV[:, 2] > 89))[0], axis=0)

X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 107] > 500))[0], axis=0)

X_eICU = np.delete(X_eICU, np.where(
    (X_eICU[:, 139] > 5))[0], axis=0)

y_eICU = X_eICU[:, -1]
y_MIMICIV = X_MIMICIV[:, -1]

X_eICU = X_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_MIMICIV = X_MIMICIV[:, [2, 52, 115, 152, 107, 144, 67, 97, 62, 92, 66, 96, 39, 72]]

In [None]:
imp_mean.fit(X_eICU)
X_train_res = imp_mean.transform(X_eICU)
X_test_res = imp_mean.transform(X_MIMICIV)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
np.unique(y_eICU, return_counts=True)

In [None]:
21933/2844

In [None]:
# Tune the XGBoost model using Bayesian optimisation
# Define the hyperparameters you want to sweep through (important it is manual for generalisation)
# C for regularisation if doing regression
# kernel if doing SVM for example

# In this case we are tuning for XGBoost hyperparameters
# Depth of tree
max_depth = np.linspace(1, 12, 12, endpoint=True)
max_depth = [round(x) for x in max_depth]
# maximum features
n_estimators = list(range(50, 400, 50))
# Learning rate
lr = [0.001, 0.01, 0.1, 0.3, 0.5, 0.7]
# Regularisation for imbalanced data
max_delta_step = [0, 1, 3, 5, 7, 10]
# Gamma for overfitting control
min_split_loss = [0, 0.5]
# Balance weights for imbalanced classes for AUC
scale_pos_weight = [7.712025316455696]

# Create the grid
param_grid = {'m__max_depth': max_depth,
               'm__n_estimators': n_estimators,
               'm__learning_rate': lr,
              'm__max_delta_step': max_delta_step,
             'm__min_split_loss': min_split_loss,
             'm__scale_pos_weight': scale_pos_weight}

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_eICU, np.ravel(y_eICU))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.712025316455696, max_depth = 2, n_estimators = 350,learning_rate = 0.1, min_split_loss = 0.0, max_delta_step = 10, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_eICU))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:
# For 18 hour prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_18.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_18.npy')
y_train_eICU  = np.load('Revised Experiments/y_train_static_18.npy')
y_test_eICU  = np.load('Revised Experiments/y_test_static_18.npy')

X_eICU = np.vstack((X_train_eICU, X_test_eICU))

X_train_MIMICIV = np.load('MIMIC-IV/MIMICIV_data/X_train_static_18.npy')
X_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/X_test_static_18.npy')
y_train_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_train_static_18.npy')
y_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_test_static_18.npy')

X_MIMICIV = np.vstack((X_train_MIMICIV, X_test_MIMICIV))

y_eICU = np.vstack((y_train_eICU.reshape(-1, 1), y_test_eICU.reshape(-1, 1)))
y_MIMICIV = np.vstack((y_train_MIMICIV.reshape(-1, 1), y_test_MIMICIV.reshape(-1, 1)))

X_eICU = np.hstack((X_eICU, y_eICU.reshape(-1, 1)))
X_MIMICIV = np.hstack((X_MIMICIV, y_MIMICIV.reshape(-1, 1)))

# Process features
X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 2] < 18) | (X_MIMICIV[:, 2] > 89))[0], axis=0)

X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 107] > 500))[0], axis=0)

X_eICU = np.delete(X_eICU, np.where(
    (X_eICU[:, 139] > 5))[0], axis=0)

y_eICU = X_eICU[:, -1]
y_MIMICIV = X_MIMICIV[:, -1]

X_eICU = X_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_MIMICIV = X_MIMICIV[:, [2, 52, 115, 152, 107, 144, 67, 97, 62, 92, 66, 96, 39, 72]]

In [None]:
imp_mean.fit(X_eICU)
X_train_res = imp_mean.transform(X_eICU)
X_test_res = imp_mean.transform(X_MIMICIV)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
np.unique(y_eICU, return_counts=True)

In [None]:
20039/2617

In [None]:
# Tune the XGBoost model using Bayesian optimisation
# Define the hyperparameters you want to sweep through (important it is manual for generalisation)
# C for regularisation if doing regression
# kernel if doing SVM for example

# In this case we are tuning for XGBoost hyperparameters
# Depth of tree
max_depth = np.linspace(1, 12, 12, endpoint=True)
max_depth = [round(x) for x in max_depth]
# maximum features
n_estimators = list(range(50, 400, 50))
# Learning rate
lr = [0.001, 0.01, 0.1, 0.3, 0.5, 0.7]
# Regularisation for imbalanced data
max_delta_step = [0, 1, 3, 5, 7, 10]
# Gamma for overfitting control
min_split_loss = [0, 0.5]
# Balance weights for imbalanced classes for AUC
scale_pos_weight = [7.657241115781429]

# Create the grid
param_grid = {'m__max_depth': max_depth,
               'm__n_estimators': n_estimators,
               'm__learning_rate': lr,
              'm__max_delta_step': max_delta_step,
             'm__min_split_loss': min_split_loss,
             'm__scale_pos_weight': scale_pos_weight}

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_eICU, np.ravel(y_eICU))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.657241115781429, max_depth = 2, n_estimators = 300,learning_rate = 0.1, min_split_loss = 0.0, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_eICU))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:
# For 24 hour prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_24.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_24.npy')
y_train_eICU  = np.load('Revised Experiments/y_train_static_24.npy')
y_test_eICU  = np.load('Revised Experiments/y_test_static_24.npy')

X_eICU = np.vstack((X_train_eICU, X_test_eICU))

X_train_MIMICIV = np.load('MIMIC-IV/MIMICIV_data/X_train_static_24.npy')
X_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/X_test_static_24.npy')
y_train_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_train_static_24.npy')
y_test_MIMICIV  = np.load('MIMIC-IV/MIMICIV_data/y_test_static_24.npy')

X_MIMICIV = np.vstack((X_train_MIMICIV, X_test_MIMICIV))

y_eICU = np.vstack((y_train_eICU.reshape(-1, 1), y_test_eICU.reshape(-1, 1)))
y_MIMICIV = np.vstack((y_train_MIMICIV.reshape(-1, 1), y_test_MIMICIV.reshape(-1, 1)))

X_eICU = np.hstack((X_eICU, y_eICU.reshape(-1, 1)))
X_MIMICIV = np.hstack((X_MIMICIV, y_MIMICIV.reshape(-1, 1)))

# Process features
X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 2] < 18) | (X_MIMICIV[:, 2] > 89))[0], axis=0)

X_MIMICIV = np.delete(X_MIMICIV, np.where(
    (X_MIMICIV[:, 107] > 500))[0], axis=0)

X_eICU = np.delete(X_eICU, np.where(
    (X_eICU[:, 139] > 5))[0], axis=0)

y_eICU = X_eICU[:, -1]
y_MIMICIV = X_MIMICIV[:, -1]

X_eICU = X_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_MIMICIV = X_MIMICIV[:, [2, 52, 115, 152, 107, 144, 67, 97, 62, 92, 66, 96, 39, 72]]

In [None]:
imp_mean.fit(X_eICU)
X_train_res = imp_mean.transform(X_eICU)
X_test_res = imp_mean.transform(X_MIMICIV)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
np.unique(y_eICU, return_counts=True)

In [None]:
17015/2400

In [None]:
# Tune the XGBoost model using Bayesian optimisation
# Define the hyperparameters you want to sweep through (important it is manual for generalisation)
# C for regularisation if doing regression
# kernel if doing SVM for example

# In this case we are tuning for XGBoost hyperparameters
# Depth of tree
max_depth = np.linspace(1, 12, 12, endpoint=True)
max_depth = [round(x) for x in max_depth]
# maximum features
n_estimators = list(range(50, 400, 50))
# Learning rate
lr = [0.001, 0.01, 0.1, 0.3, 0.5, 0.7]
# Regularisation for imbalanced data
max_delta_step = [0, 1, 3, 5, 7, 10]
# Gamma for overfitting control
min_split_loss = [0, 0.5]
# Balance weights for imbalanced classes for AUC
scale_pos_weight = [7.089583333333334]

# Create the grid
param_grid = {'m__max_depth': max_depth,
               'm__n_estimators': n_estimators,
               'm__learning_rate': lr,
              'm__max_delta_step': max_delta_step,
             'm__min_split_loss': min_split_loss,
             'm__scale_pos_weight': scale_pos_weight}

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_eICU, np.ravel(y_eICU))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.089583333333334, max_depth = 2, n_estimators = 350,learning_rate = 0.1, min_split_loss = 0.0, max_delta_step = 1, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_eICU))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_MIMICIV, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_MIMICIV, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

# Tune and test on eICU

In [None]:
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from skopt import BayesSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from imblearn.pipeline import Pipeline
import shap
from sklearn.inspection import permutation_importance
# explicitly require this experimental feature
from sklearn.experimental import enable_iterative_imputer  # noqa
# now you can import normally from sklearn.impute
from sklearn.impute import IterativeImputer
from xgboost import plot_importance

In [None]:
# Load eICU and MIMIC-IV for 6 hour mortality prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_6.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_6.npy')
y_train  = np.load('Revised Experiments/y_train_static_6.npy')
y_test  = np.load('Revised Experiments/y_test_static_6.npy')

In [None]:
X_train = X_train_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_test = X_test_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]

In [None]:
# Get the validation folds
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=33)
imp_mean = IterativeImputer(random_state=0, max_iter = 10)
imp_mean.fit(X_train)
X_train_res = imp_mean.transform(X_train)
X_test_res = imp_mean.transform(X_test)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
# Tune the XGBoost model using Bayesian optimisation
# Define the hyperparameters you want to sweep through (important it is manual for generalisation)
# C for regularisation if doing regression
# kernel if doing SVM for example

# In this case we are tuning for XGBoost hyperparameters
# Depth of tree
max_depth = np.linspace(1, 12, 12, endpoint=True)
max_depth = [round(x) for x in max_depth]
# maximum features
n_estimators = list(range(50, 400, 50))
# Learning rate
lr = [0.001, 0.01, 0.1, 0.3, 0.5, 0.7]
# Regularisation for imbalanced data
max_delta_step = [0, 1, 3, 5, 7, 10]
# Gamma for overfitting control
min_split_loss = [0, 0.5]
# Balance weights for imbalanced classes for AUC
scale_pos_weight = [7.352847471127041]

# Create the grid
param_grid = {'m__max_depth': max_depth,
               'm__n_estimators': n_estimators,
               'm__learning_rate': lr,
              'm__max_delta_step': max_delta_step,
             'm__min_split_loss': min_split_loss,
             'm__scale_pos_weight': scale_pos_weight}

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_train, np.ravel(y_train))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.352847471127041, max_depth = 10, n_estimators = 200,learning_rate = 0.1, min_split_loss = 0.5, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_train))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:
# Load eICU and MIMIC-IV for 12 hour mortality prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_12.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_12.npy')
y_train  = np.load('Revised Experiments/y_train_static_12.npy')
y_test  = np.load('Revised Experiments/y_test_static_12.npy')

In [None]:
X_train = X_train_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_test = X_test_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]

In [None]:
# Get the validation folds
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=33)
imp_mean = IterativeImputer(random_state=0, max_iter = 10)
imp_mean.fit(X_train)
X_train_res = imp_mean.transform(X_train)
X_test_res = imp_mean.transform(X_test)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_train, np.ravel(y_train))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:
# Test the model on standalone set
XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.352847471127041, max_depth = 10, n_estimators = 200,learning_rate = 0.1, min_split_loss = 0.5, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_train))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:
# Load eICU and MIMIC-IV for 18 hour mortality prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_18.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_18.npy')
y_train = np.load('Revised Experiments/y_train_static_18.npy')
y_test = np.load('Revised Experiments/y_test_static_18.npy')

In [None]:
X_train = X_train_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_test = X_test_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]

In [None]:
# Get the validation folds
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=33)
imp_mean = IterativeImputer(random_state=0, max_iter = 10)
imp_mean.fit(X_train)
X_train_res = imp_mean.transform(X_train)
X_test_res = imp_mean.transform(X_test)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_train, np.ravel(y_train))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:

XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.352847471127041, max_depth = 10, n_estimators = 200,learning_rate = 0.1, min_split_loss = 0.5, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_train))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')

In [None]:
# Load eICU and MIMIC-IV for 24 hour mortality prediction
X_train_eICU = np.load('Revised Experiments/X_train_static_24.npy')
X_test_eICU  = np.load('Revised Experiments/X_test_static_24.npy')
y_train = np.load('Revised Experiments/y_train_static_24.npy')
y_test = np.load('Revised Experiments/y_test_static_24.npy')

In [None]:
X_train = X_train_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]
X_test = X_test_eICU[:, [4, 104, 65, 68, 103, 149, 73, 119, 93, 139, 78, 124, 99, 145]]

In [None]:
# Get the validation folds
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=33)
imp_mean = IterativeImputer(random_state=0, max_iter = 10)
imp_mean.fit(X_train)
X_train_res = imp_mean.transform(X_train)
X_test_res = imp_mean.transform(X_test)

In [None]:
# Define the model you are interested in
XGBOOSTmodel = XGBClassifier(use_label_encoder=False)
imputer = IterativeImputer(random_state=0, max_iter = 10)
pipeline = Pipeline(steps=[('i', imputer), ('m', XGBOOSTmodel)])

In [None]:
# Try with Bayesian optimisation for faster computation of tuning
opt = BayesSearchCV(pipeline, param_grid, n_iter=50, cv=cv, verbose=1, refit=False, scoring='roc_auc')
opt.fit(X_train, np.ravel(y_train))

In [None]:
# With the following parameter combination being optimal
print("Best parameter combo:", opt.best_params_)
# Having the following score
print("Best AUROC:", opt.best_score_)

In [None]:

XGBOOSTmodel = XGBClassifier(scale_pos_weight = 7.352847471127041, max_depth = 10, n_estimators = 200,learning_rate = 0.1, min_split_loss = 0.5, max_delta_step = 0, use_label_encoder=False)

In [None]:
XGBOOSTmodel.fit(X_train_res, np.ravel(y_train))

In [None]:
print('Results:')
print('AUROC is:', metrics.roc_auc_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Accuracy is:', metrics.accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Average Precision is:', metrics.average_precision_score(y_test, XGBOOSTmodel.predict_proba(X_test_res)[:,1]))
print('Weighted F1 is:', metrics.f1_score(y_test, XGBOOSTmodel.predict(X_test_res), average='weighted'))
print('Sensitivity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Balanced accuracy is:', metrics.balanced_accuracy_score(y_test, XGBOOSTmodel.predict(X_test_res)))
print('Specificity is:', metrics.recall_score(y_test, XGBOOSTmodel.predict(X_test_res), pos_label=0))
print('#################################################')