In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install catboost
!pip install optuna
!pip install shap

In [None]:
import numpy as np
import pandas as pd

import sklearn
import sklearn.metrics
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve, auc, precision_score, recall_score, f1_score, classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve

from scipy import stats as st
from random import randrange
from matplotlib import pyplot

import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestClassifier

import optuna

import shap

In [None]:
#Open csv file.

data = pd.read_csv("/content/drive/MyDrive/NSQIP-PLF/plf_final.csv", index_col = 0)
data.head()

In [None]:
#See potential variables.

print(list(data.columns))

In [None]:
#Define predictor variables and outcome of interest.

variables = ['AGE', 'HEIGHT', 'WEIGHT', 'PRSODM', 'PRBUN', 'PRCREAT', 'PRWBC', 'PRHCT', 'PRPLATE', 'BMI', 'SEX_male', 'SEX_non-binary', 'INOUT_Outpatient', 'TRANST_Transferred', 'TRANST_Unknown', 'SURGSPEC_Orthopedics', 'DIABETES_Yes', 'SMOKE_Yes', 'DYSPNEA_Yes', 'FNSTATUS2_Partially Dependent', 'FNSTATUS2_Totally Dependent', 'FNSTATUS2_Unknown', 'VENTILAT_Yes', 'HXCOPD_Yes', 'ASCITES_Yes', 'HXCHF_Yes', 'HYPERMED_Yes', 'RENAFAIL_Yes', 'DIALYSIS_Yes', 'DISCANCR_Yes', 'WNDINF_Yes', 'STEROID_Yes', 'WTLOSS_Yes', 'BLEEDDIS_Yes', 'TRANSFUS_Yes', 'ASACLAS_2-Mild Disturb', 'ASACLAS_3-Severe Disturb', 'RACE_Black or African American', 'RACE_Hispanic', 'RACE_Other', 'RACE_Unknown', 'RACE_White', 'LEVELS_Single', 'LOS_Yes']

In [None]:
#Redefine data.

data = data[variables]

In [None]:
#Define predictor variables (x) and outcome of interest (y).

x = data.drop(['LOS_Yes'], axis = 1)
y = data['LOS_Yes']

In [None]:
#Check data shapes.

print(y.shape)
print(x.shape)

In [None]:
#Split data into initial train set and test set in 80:20 ratio.

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
  
#Describe initial train set and test set.

print("Number patients x_train dataset: ", x_train.shape)
print("Number patients y_train dataset: ", y_train.shape)
print("Number patients x_test dataset: ", x_test.shape)
print("Number patients y_test dataset: ", y_test.shape)

In [None]:
#Split initial train set into final train set and validation set in 75:25 ratio.

train_x, valid_x, train_y, valid_y = train_test_split(x_train, y_train, test_size = 0.25, random_state = 0)
  
#Describe train and validation sets.

print("Number patients train_x dataset: ", train_x.shape)
print("Number patients train_y dataset: ", train_y.shape)
print("Number patients valid_x dataset: ", valid_x.shape)
print("Number patients valid_y dataset: ", valid_y.shape)

In [None]:
#Describe outcome of interest before resampling.

print("Before resampling, counts of label '1': {}".format(sum(train_y == 1)))
print("Before resampling, counts of label '0': {} \n".format(sum(train_y == 0)))

In [None]:
#Apply random undersampling.

from imblearn.under_sampling import RandomUnderSampler

resampler = RandomUnderSampler(sampling_strategy = 'majority')
train_x, train_y = resampler.fit_resample(train_x, train_y)

In [None]:
#Describe outcome of interest after resampling.

print("After resampling, counts of label '1': {}".format(sum(train_y == 1)))
print("After resampling, counts of label '0': {} \n".format(sum(train_y == 0)))

# XGBoost

In [None]:
#Hyperparameter tuning for XGBoost.

def objective(trial):

    dtrain = xgb.DMatrix(train_x, label=train_y)
    dvalid = xgb.DMatrix(valid_x, label=valid_y)

    param = {
        "verbosity": 0,
        "objective":  trial.suggest_categorical("objective", ["binary:logistic"]),
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        "max_depth" : trial.suggest_int("max_depth", 1, 9),
        "eta" : trial.suggest_float("eta", 1e-8, 1.0, log=True),
        "gamma" : trial.suggest_float("gamma", 1e-8, 1.0, log=True),
        "grow_policy" : trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
    }


    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-auc")
    
    bst = xgb.train(param, dtrain, evals=[(dvalid, "validation")], callbacks=[pruning_callback])
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc


if __name__ == "__main__":
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
    study.optimize(objective, n_trials=100)
    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  
    : ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    xgb_params = {}
    
    for key, value in trial.params.items():
        xgb_params[key] = value

In [None]:
print(xgb_params)

In [None]:
#Fit XGBoost.

from xgboost import XGBClassifier

xgb = XGBClassifier(**xgb_params)

xgb.fit(train_x, train_y)

In [None]:
#Make predictions on the test set based on the trained XGBoost model.

preds_xgb = xgb.predict(x_test)

probs_xgb = xgb.predict_proba(x_test)
probs_xgb = probs_xgb[:, 1]

In [None]:
#Evaluate XGBoost model.

xgb_precision = precision_score(preds_xgb,y_test)
xgb_recall = recall_score(preds_xgb,y_test)
xgb_f1 = f1_score(preds_xgb,y_test)
xgb_acc = accuracy_score(preds_xgb,y_test)   
xgb_mcc = matthews_corrcoef(y_test, preds_xgb)
xgb_auroc = roc_auc_score(y_test, probs_xgb)

print("Precision: %.3f" % (xgb_precision))
print("Recall: %.3f" % (xgb_recall))
print("F1 Score: %.3f" %(xgb_f1))
print('Accuracy: %.3f' % (xgb_acc))
print('MCC: %.3f' % (xgb_mcc))
print('AUROC: %.3f' % (xgb_auroc))

In [None]:
#Evaluate XGBoost model (PRC and AUPRC).

xgb_precision, xgb_recall, _ = precision_recall_curve(y_test, probs_xgb)
xgb_auprc = auc(xgb_recall, xgb_precision)

print('AUPRC: %.3f' % (xgb_auprc))

In [None]:
#Recalculate precision and recall for calculation purposes.

xgb_precision = precision_score(preds_xgb,y_test)
xgb_recall = recall_score(preds_xgb,y_test)

xgb_results = [xgb_precision, xgb_recall, xgb_f1, xgb_acc, xgb_mcc, xgb_auroc, xgb_auprc]

In [None]:
#Recalculate precision recall curve for plotting purposes.

xgb_precision, xgb_recall, _ = precision_recall_curve(y_test, probs_xgb)

# LightGBM

In [None]:
#Hyperparameter tuning for LightGBM.

def objective(trial):
    dtrain = lgb.Dataset(train_x, label=train_y)

    param = {
        "objective":  trial.suggest_categorical("objective", ["binary"]),
        "metric": "binary_logloss",
        "verbosity": -1,
        "boosting_type":  trial.suggest_categorical("boosting_type", ["gbdt"]),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)
    return auc


if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    lgb_params = {}
    
    for key, value in trial.params.items():
        lgb_params[key] = value

In [None]:
print(lgb_params)

In [None]:
#Fit LightGBM.

import lightgbm as lgb

lgb = lgb.LGBMClassifier(**lgb_params)

lgb.fit(train_x, train_y)

In [None]:
#Make predictions on the test set based on the trained model.

preds_lgb = lgb.predict(x_test)

probs_lgb = lgb.predict_proba(x_test)
probs_lgb = probs_lgb[:, 1]

In [None]:
#Evaluate LightGBM model.

lgb_precision = precision_score(preds_lgb,y_test)
lgb_recall = recall_score(preds_lgb,y_test)
lgb_f1 = f1_score(preds_lgb,y_test)
lgb_acc = accuracy_score(preds_lgb,y_test)   
lgb_mcc = matthews_corrcoef(y_test, preds_lgb)
lgb_auroc = roc_auc_score(y_test, probs_lgb)
                          
print("Precision: %.3f" % (lgb_precision))
print("Recall: %.3f" % (lgb_recall))
print("F1 Score: %.3f" %(lgb_f1))
print('Accuracy: %.3f' % (lgb_acc))
print('MCC: %.3f' % (lgb_mcc))
print('AUROC: %.3f' % (lgb_auroc))

In [None]:
#Evaluate LightGBM model (PRC and AUPRC).

lgb_precision, lgb_recall, _ = precision_recall_curve(y_test, probs_lgb)
lgb_auprc = auc(lgb_recall, lgb_precision)

print('AUPRC: %.3f' % (lgb_auprc))

In [None]:
#Recalculate precision and recall for calculation purposes.

lgb_precision = precision_score(preds_lgb,y_test)
lgb_recall = recall_score(preds_lgb,y_test)

lgb_results = [lgb_precision, lgb_recall, lgb_f1, lgb_acc, lgb_mcc, lgb_auroc, lgb_auprc]

In [None]:
#Recalculate precision recall curve for plotting purposes.

lgb_precision, lgb_recall, _ = precision_recall_curve(y_test, probs_lgb)

# CatBoost

In [None]:
#Hyperparameter tuning for CatBoost.

from optuna.integration import CatBoostPruningCallback

def objective(trial: optuna.Trial) -> float:

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
        "eval_metric": "AUC",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1, log=True)

    gbm = cb.CatBoostClassifier(**param)

    pruning_callback = CatBoostPruningCallback(trial, "AUC")
    gbm.fit(
        train_x,
        train_y,
        eval_set=[(valid_x, valid_y)],
        verbose=0,
        early_stopping_rounds=100,
        callbacks=[pruning_callback],
    )

    # evoke pruning manually.
    pruning_callback.check_pruned()

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc

if __name__ == "__main__":
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    cb_params = {}
    
    for key, value in trial.params.items():
        cb_params[key] = value

In [None]:
print(cb_params)

In [None]:
#Fit CatBoost.

from catboost import CatBoostClassifier

cb = CatBoostClassifier(**cb_params)

cb.fit(train_x, train_y)

In [None]:
#Make predictions on the test set based on the trained model.

preds_cb = cb.predict(x_test)

probs_cb = cb.predict_proba(x_test)
probs_cb = probs_cb[:, 1]

In [None]:
#Evaluate CatBoost model.

cb_precision = precision_score(preds_cb,y_test)
cb_recall = recall_score(preds_cb,y_test)
cb_f1 = f1_score(preds_cb,y_test)
cb_acc = accuracy_score(preds_cb,y_test)   
cb_mcc = matthews_corrcoef(y_test, preds_cb)
cb_auroc = roc_auc_score(y_test, probs_cb)
                          
print("Precision: %.3f" % (cb_precision))
print("Recall: %.3f" % (cb_recall))
print("F1 Score: %.3f" %(cb_f1))
print('Accuracy: %.3f' % (cb_acc))
print('MCC: %.3f' % (cb_mcc))
print('AUROC: %.3f' % (cb_auroc))

In [None]:
#Evaluate XGBoost model (PRC and AUPRC).

cb_precision, cb_recall, _ = precision_recall_curve(y_test, probs_cb)
cb_auprc = auc(cb_recall, cb_precision)

print('AUPRC: %.3f' % (cb_auprc))

In [None]:
#Recalculate precision and recall for calculation purposes.

cb_precision = precision_score(preds_cb,y_test)
cb_recall = recall_score(preds_cb,y_test)

cb_results = [cb_precision, cb_recall, cb_f1, cb_acc, cb_mcc, cb_auroc, cb_auprc]

In [None]:
#Recalculate precision recall curve for plotting purposes.

cb_precision, cb_recall, _ = precision_recall_curve(y_test, probs_cb)

# Random Forest

In [None]:
#Hyperparameter tuning for Random Forest.

def objective(trial):
    
    param = {
        "criterion": trial.suggest_categorical("criterion", ["gini", "entropy"]),
        "bootstrap": trial.suggest_categorical("bootstrap", ["auto", "sqrt"]),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt","log2", None]),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "n_estimators": trial.suggest_int("n_estimators", 100, 2000, 100),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4, 1),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10, 1),
    }

    rf = RandomForestClassifier(**param)

    rf.fit(
        train_x,
        train_y,
    )

    preds = rf.predict(valid_x)
    pred_labels = np.rint(preds)
    auc = sklearn.metrics.roc_auc_score(valid_y, pred_labels)

    return auc


if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    rf_params = {}
    
    for key, value in trial.params.items():
        rf_params[key] = value

In [None]:
print(rf_params)

In [None]:
#Fit Random Forest.

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(**rf_params)

rf.fit(train_x, train_y)

In [None]:
#Make predictions on the test set based on the trained model.

preds_rf = rf.predict(x_test)

probs_rf = rf.predict_proba(x_test)
probs_rf = probs_rf[:, 1]

In [None]:
#Evaluate Random Forest model.

rf_precision = precision_score(preds_rf,y_test)
rf_recall = recall_score(preds_rf,y_test)
rf_f1 = f1_score(preds_rf,y_test)
rf_acc = accuracy_score(preds_rf,y_test)   
rf_mcc = matthews_corrcoef(y_test, preds_rf)
rf_auroc = roc_auc_score(y_test, probs_rf)
                          
print("Precision: %.3f" % (rf_precision))
print("Recall: %.3f" % (rf_recall))
print("F1 Score: %.3f" %(rf_f1))
print('Accuracy: %.3f' % (rf_acc))
print('MCC: %.3f' % (rf_mcc))
print('AUROC: %.3f' % (rf_auroc))

In [None]:
#Evaluate Random Forest model (PRC and AUPRC).

rf_precision, rf_recall, _ = precision_recall_curve(y_test, probs_rf)
rf_auprc = auc(rf_recall, rf_precision)

print('AUPRC: %.3f' % (rf_auprc))

In [None]:
#Recalculate precision and recall for calculation purposes.

rf_precision = precision_score(preds_rf,y_test)
rf_recall = recall_score(preds_rf,y_test)

rf_results = [rf_precision, rf_recall, rf_f1, rf_acc, rf_mcc, rf_auroc, rf_auprc]

In [None]:
#Recalculate precision recall curve for plotting purposes.

rf_precision, rf_recall, _ = precision_recall_curve(y_test, probs_rf)

# ROC, PR, and Calibration Plots

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

xgb_fpr, xgb_tpr, _ = roc_curve(y_test, probs_xgb)
pyplot.plot(xgb_fpr, xgb_tpr, label='XGBoost AUROC: {:.3f}'.format(xgb_auroc), color='red')

lgb_fpr, lgb_tpr, _ = roc_curve(y_test, probs_lgb)
pyplot.plot(lgb_fpr, lgb_tpr, label='LightGBM AUROC: {:.3f}'.format(lgb_auroc), color='darkblue')

cb_fpr, cb_tpr, _ = roc_curve(y_test, probs_cb)
pyplot.plot(cb_fpr, cb_tpr, label='CatBoost AUROC: {:.3f}'.format(cb_auroc), color = 'darkgreen')

rf_fpr, rf_tpr, _ = roc_curve(y_test, probs_rf)
pyplot.plot(rf_fpr, rf_tpr, label='Random Forest AUROC: {:.3f}'.format(rf_auroc), color = 'orange')

pyplot.plot([0, 1], [0, 1], linestyle = '--')

pyplot.title('Receiver Operating Characteristic Curve', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
pyplot.xlabel('False Positive Rate', fontsize = 16, labelpad = 10)
pyplot.ylabel('True Positive Rate', fontsize = 16, labelpad = 10)
pyplot.tick_params(axis="y",direction="out")
pyplot.tick_params(axis="x",direction="out")

leg = pyplot.legend(loc = 'lower right', fontsize = 12)

pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_roc.png', dpi=300)
pyplot.show()

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

pyplot.plot(xgb_recall, xgb_precision, label='XGBoost AUPRC: {:.3f}'.format(xgb_auprc), color = 'red')
pyplot.plot(lgb_recall, lgb_precision, label='LightGBM AUPRC: {:.3f}'.format(lgb_auprc), color = 'darkblue')
pyplot.plot(cb_recall, cb_precision, label='CatBoost AUPRC: {:.3f}'.format(cb_auprc), color = 'darkgreen')
pyplot.plot(rf_recall, rf_precision, label='Random Forest AUPRC: {:.3f}'.format(rf_auprc), color = 'orange')


pyplot.title('Precision Recall Curve', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
pyplot.xlabel('Recall', fontsize = 16, labelpad = 10)
pyplot.ylabel('Precision', fontsize = 16, labelpad = 10)
leg = pyplot.legend(loc = 'lower right', fontsize = 12)

pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_prc.png', dpi=300)
pyplot.show()

In [None]:
f = pyplot.figure()
f.set_figwidth(12)
f.set_figheight(12)

x_cal_xgb, y_cal_xgb = calibration_curve(y_test, probs_xgb, n_bins = 10, normalize = True)
x_cal_lgb, y_cal_lgb = calibration_curve(y_test, probs_lgb, n_bins = 10, normalize = True)
x_cal_cb, y_cal_cb = calibration_curve(y_test, probs_cb, n_bins = 10, normalize = True)
x_cal_rf, y_cal_rf = calibration_curve(y_test, probs_rf, n_bins = 10, normalize = True)


pyplot.plot([0, 1], [0, 1], linestyle = '--', label = 'Ideally Calibrated')

pyplot.plot(y_cal_xgb, x_cal_xgb, label = 'XGBoost', color = 'red')
pyplot.plot(y_cal_lgb, x_cal_lgb, label = 'LightGBM', color = 'darkblue')
pyplot.plot(y_cal_cb, x_cal_cb, label = 'CatBoost', color = 'darkgreen')
pyplot.plot(y_cal_rf, x_cal_xgb, label = 'Random Forest', color = 'orange')

pyplot.title('Calibration', loc='center', fontsize = 20, fontweight = 'heavy', pad = 20)
leg = pyplot.legend(loc = 'lower right', fontsize = 12)
pyplot.xlabel('Average Predicted Probability in each bin', fontsize = 16, labelpad = 10)
pyplot.ylabel('Ratio of positives', fontsize = 16, labelpad = 10)

pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_cal.png', dpi=300)
pyplot.show()

# Results Summary

In [None]:
results = {'XGBoost':xgb_results, 'LightGBM':lgb_results, 'CatBoost':cb_results, 'Random Forest':rf_results}

results = pd.DataFrame(results, columns = ['XGBoost', 'LightGBM', 'CatBoost', 'Random Forest'])

results

In [None]:
results = pd.DataFrame({'XGBoost':xgb_results, 'LightGBM':lgb_results, 'CatBoost':cb_results, 'Random Forest':rf_results})

results = results.T

results.columns = ['Precision', 'Recall', 'F1', 'Accuracy', 'MCC', 'AUROC', 'AUPRC']

results.to_csv('/content/drive/MyDrive/NSQIP-PLF/los_results.csv')

results

# SHAP Plots

In [None]:
xgb_explainer = shap.Explainer(xgb.predict, x_test)
xgb_shap_values = xgb_explainer(x_test)

shap.plots.bar(xgb_shap_values, max_display = 10, show=False)
pyplot.tight_layout()
pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_shap_xgb.png', dpi=300)

In [None]:
lgb_explainer = shap.Explainer(lgb.predict, x_test)
lgb_shap_values = lgb_explainer(x_test)

shap.plots.bar(lgb_shap_values, max_display = 10, show=False)
pyplot.tight_layout()
pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_shap_lgb.png', dpi=300)

In [None]:
cb_explainer = shap.Explainer(cb.predict, x_test)
cb_shap_values = cb_explainer(x_test)

shap.plots.bar(cb_shap_values, max_display = 10, show=False)
pyplot.tight_layout()
pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_shap_cb.png', dpi=300)

In [None]:
rf_explainer = shap.Explainer(rf.predict, x_test)
rf_shap_values = rf_explainer(x_test)

shap.plots.bar(rf_shap_values, max_display = 10, show=False)
pyplot.tight_layout()
pyplot.savefig('/content/drive/MyDrive/NSQIP-PLF/los_shap_rf.png', dpi=300)