Reference : https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html

In [25]:
# Import libraries
import joblib
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import shap

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Read the data and re-introduce the train-test split
df = pd.read_csv("../data/processed/nhanes_data_processed_label_encoded.csv")

with open("../data/results/split_data.pkl", "rb") as f:
    split_data = pickle.load(f)

with open("../data/results/metadata.pkl", "rb") as f:
    metadata = pickle.load(f)
feature_names = metadata["feature_names"]

RANDOM_STATE = 42
X_train = split_data["X_train"]
X_train_scaled = split_data["X_train_scaled"]
y_train = split_data["y_train"]
X_test = split_data["X_test"]
X_test_scaled = split_data["X_test_scaled"]
y_test = split_data["y_test"]

In [4]:
# Load models
dt_model = joblib.load('../models/decision_tree_model.pkl')
rf_model = joblib.load('../models/random_forest_model.pkl')
xgb_model = joblib.load('../models/xgboost_model.pkl')
adb_model = joblib.load('../models/adaboost_model.pkl')
lgr_model = joblib.load('../models/logistic_regression_model.pkl')
knn_model = joblib.load('../models/k-nearest_neighbors_model.pkl')
svc_model = joblib.load('../models/support_vector_machine_model.pkl')
nn_model = tf.keras.models.load_model("../models/neural_network_model.keras", compile=True)

In [6]:
# Build stacking ensemble (excluding neural network for scikit-learn StackingClassifier)
base_models = [
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("lgr", lgr_model)
    ]

# Define meta-model
meta_model = LogisticRegression(class_weight="balanced", max_iter=100, multi_class="multinomial", n_jobs=-1, random_state=RANDOM_STATE)

# Create StratifiedKFold for cross validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [9]:
# Create and fit the stacking ensemble
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,
    cv=cv,
    n_jobs=-1,
    stack_method="predict_proba",
    passthrough=False
    )

# Define the parameter grid for optimising the meta-model
param_grid = {"final_estimator__penalty" : ["l2", "l1", "elasticnet"],
                "final_estimator__solver" : ["lbfgs", "newton-cholesky", "saga"],
                "final_estimator__C" : [0.01, 0.1, 1],
                "final_estimator__max_iter": [100, 200]
}

# Initialise and run GridSearchCV
grid_search = GridSearchCV(estimator=stacking_clf,
                            param_grid=param_grid,
                            scoring="f1_macro",
                            cv=cv,
                            n_jobs=-1,
                            verbose=2
                            )

# Build the grid search using the training set
grid_search.fit(X_train, y_train)

print(f"Best hyperparameters values: {grid_search.best_params_}")

Fitting 5 folds for each of 54 candidates, totalling 270 fits


180 fits failed out of a total of 270.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ryans\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ryans\AppData\Roaming\Python\Python312\site-packages\sklearn\ensemble\_stacking.py", line 672, in fit
    return super().fit(X, y_encoded, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ryans\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^

Best hyperparameters values: {'final_estimator__C': 1, 'final_estimator__max_iter': 100, 'final_estimator__penalty': 'l2', 'final_estimator__solver': 'lbfgs'}




In [10]:
# Create the optimised meta-model and stacking ensemble
meta_model_optimised = LogisticRegression(class_weight="balanced", C=1, max_iter=100, penalty="l2", multi_class="multinomial", solver="lbfgs", n_jobs=-1, random_state=RANDOM_STATE)

stacking_clf_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model_optimised,
    cv=cv,
    n_jobs=-1,
    stack_method="predict_proba",
    passthrough=False
    )

# Fit the stacking ensemble with training
stacking_clf_model.fit(X_train, y_train)



In [11]:
# Predict using the stacking ensemble|
y_pred = stacking_clf_model.predict(X_test)
y_pred

array([1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 2, 1,
       1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 2, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 0,
       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 2, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 2, 1, 1])

In [12]:
# Generate confusion matrix and display with each row and column labelled
sclf_cnf_matrix_test = confusion_matrix(y_test, y_pred, labels=stacking_clf_model.classes_)

cnf_matrix_test_df = pd.DataFrame(sclf_cnf_matrix_test, columns=["Diabetes_yes_pred", "Diabetes_no_pred", "Diabetes_borderline_pred"], 
                                    index=["Diabetes_yes_actual", "Diabetes_no_actual", "Diabetes_borderline_actual"])
cnf_matrix_test_df

Unnamed: 0,Diabetes_yes_pred,Diabetes_no_pred,Diabetes_borderline_pred
Diabetes_yes_actual,23,7,1
Diabetes_no_actual,13,163,10
Diabetes_borderline_actual,3,5,2


In [13]:
# Generate classification report
sclf_model_classfication_report = classification_report(y_test, y_pred, target_names=["Diabetes_yes", "Diabetes_no", "Diabetes_borderline"], output_dict=True)
pd.DataFrame.from_dict(sclf_model_classfication_report).T

Unnamed: 0,precision,recall,f1-score,support
Diabetes_yes,0.589744,0.741935,0.657143,31.0
Diabetes_no,0.931429,0.876344,0.903047,186.0
Diabetes_borderline,0.153846,0.2,0.173913,10.0
accuracy,0.828194,0.828194,0.828194,0.828194
macro avg,0.558339,0.606093,0.578034,227.0
weighted avg,0.850512,0.828194,0.837345,227.0


In [14]:
# Generate the ROC AUC score
from sklearn.metrics import roc_auc_score

y_pred_proba_sclf = stacking_clf_model.predict_proba(X_test)

sclf_model_roc_auc = roc_auc_score(y_test, y_pred_proba_sclf, average="macro", multi_class="ovr")
print(sclf_model_roc_auc)

0.7404936745219987


## Results comparison

In [15]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [16]:
# Load results
with open("../data/results/model_results.pkl", "rb") as f:
    results = pickle.load(f)

confusion_matrices = results["confusion_matrices"]
auc_scores = results["auc_scores"]
classification_reports = results["classification_reports"]
pred_proba = results["pred_proba"]

In [17]:
confusion_matrices["Stacked Ensemble"] = sclf_cnf_matrix_test
auc_scores["Stacked Ensemble"] = sclf_model_roc_auc
classification_reports["Stacked Ensemble"] = sclf_model_classfication_report
pred_proba["Stacked Ensemble"] = y_pred_proba_sclf

In [18]:
# Iterate through the models and combine the macro precision, recall, f1-score, and ROC AUC into a single DataFrame
results_comparison = {}

for model_name in classification_reports.keys():
    classification_report = classification_reports[model_name]
    auc_score = auc_scores[model_name]
    
    results_comparison[model_name] = {
        "Macro Precision" : classification_report["macro avg"]["precision"],
        "Macro Recall" : classification_report["macro avg"]["recall"],
        "Macro F1" : classification_report["macro avg"]["f1-score"],
        "ROC AUC" : auc_scores[model_name]
    }

results_comparison_df = pd.DataFrame.from_dict(results_comparison, orient="index")
results_comparison_df = results_comparison_df.round(4).sort_values(["ROC AUC", "Macro F1", "Macro Recall", "Macro Precision"], ascending=False)

print(f"Model results comparison:\n{results_comparison_df}")

Model results comparison:
                        Macro Precision  Macro Recall  Macro F1  ROC AUC
Logistic Regression              0.5569        0.5642    0.5227   0.7995
Random Forest                    0.5468        0.6305    0.5700   0.7896
XGBoost                          0.5596        0.6079    0.5794   0.7762
Support Vector Machine           0.4566        0.4688    0.4546   0.7540
Neural Network                   0.4891        0.5172    0.4904   0.7406
Stacked Ensemble                 0.5583        0.6061    0.5780   0.7405
AdaBoost                         0.5288        0.5692    0.5450   0.7312
Decision Tree                    0.4732        0.5534    0.4671   0.7112
k-Nearest Neighbors              0.4816        0.4968    0.4681   0.6906


In [19]:
# Iterate through the models and compare the recall scores of "Diabetes_yes"
yes_recall_comparison = {}

for model_name, classification_report in classification_reports.items():
    
    yes_recall_comparison[model_name] = {
        "Recall" : classification_report["Diabetes_yes"]["recall"]
    }
    
    yes_recall_comparison_df = pd.DataFrame.from_dict(yes_recall_comparison, orient="index")
    
yes_recall_comparison_df = yes_recall_comparison_df.round(4).sort_values("Recall", ascending=False)

print(f"[Diabetes_yes] Recall comparison:\n{yes_recall_comparison_df}")

[Diabetes_yes] Recall comparison:
                        Recall
Random Forest           0.7742
AdaBoost                0.7419
XGBoost                 0.7419
Stacked Ensemble        0.7419
Decision Tree           0.7097
Logistic Regression     0.7097
Neural Network          0.6774
k-Nearest Neighbors     0.5484
Support Vector Machine  0.5161


In [20]:
# Iterate through the models and compare the precision scores of "Diabetes_no"
no_precision_f1_comparison = {}

for model_name, classification_report in classification_reports.items():
    
    no_precision_f1_comparison[model_name] = {
        "Precision" : classification_report["Diabetes_no"]["precision"],
        "F1" : classification_report["Diabetes_no"]["f1-score"]
    }
    
    no_precision_comparison_df = pd.DataFrame.from_dict(no_precision_f1_comparison, orient="index")
    
no_precision_comparison_df = no_precision_comparison_df.round(4).sort_values("F1", ascending=False)

print(f"[Diabetes_no] Precision and F1 comparison:\n{no_precision_comparison_df}")

[Diabetes_no] Precision and F1 comparison:
                        Precision      F1
XGBoost                    0.9371  0.9086
Stacked Ensemble           0.9314  0.9030
AdaBoost                   0.9253  0.8944
Random Forest              0.9441  0.8761
Support Vector Machine     0.9130  0.8473
Neural Network             0.9351  0.8471
k-Nearest Neighbors        0.9388  0.8288
Logistic Regression        0.9549  0.7962
Decision Tree              0.9380  0.7683


In [21]:
# Iterate through the models and compare the recall scores of "Diabetes_borderline"
borderline_recall_comparison = {}

for model_name, classification_report in classification_reports.items():
    
    borderline_recall_comparison[model_name] = {
        "Recall" : classification_report["Diabetes_borderline"]["recall"]
    }
    
    borderline_recall_comparison_df = pd.DataFrame.from_dict(borderline_recall_comparison, orient="index")
    
borderline_recall_comparison_df = borderline_recall_comparison_df.round(4).sort_values("Recall", ascending=False)

print(f"[Diabetes_borderline] Recall comparison:\n{borderline_recall_comparison_df}")

[Diabetes_borderline] Recall comparison:
                        Recall
Decision Tree              0.3
Random Forest              0.3
Logistic Regression        0.3
XGBoost                    0.2
k-Nearest Neighbors        0.2
Stacked Ensemble           0.2
AdaBoost                   0.1
Support Vector Machine     0.1
Neural Network             0.1
