Reference : https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html

In [260]:
# Import libraries
import joblib
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from mlxtend.classifier import StackingCVClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
import itertools
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix

In [261]:
# Read the data and re-introduce the train-test split
df = pd.read_csv("../data/processed/nhanes_data_processed_label_encoded.csv")

with open("../data/results/split_data.pkl", "rb") as f:
    split_data = pickle.load(f)

with open("../data/results/metadata.pkl", "rb") as f:
    metadata = pickle.load(f)
feature_names = metadata["feature_names"]

RANDOM_STATE = 42
X_train = split_data["X_train"]
X_train_scaled = split_data["X_train_scaled"]
y_train = split_data["y_train"]
X_test = split_data["X_test"]
X_test_scaled = split_data["X_test_scaled"]
y_test = split_data["y_test"]

In [262]:
# Load models
dt_model = joblib.load('../models/decision_tree_model.pkl')
rf_model = joblib.load('../models/random_forest_model.pkl')
xgb_model = joblib.load('../models/xgboost_model.pkl')
adb_model = joblib.load('../models/adaboost_model.pkl')
lgr_model = joblib.load('../models/logistic_regression_model.pkl')
knn_model = joblib.load('../models/k-nearest_neighbors_model.pkl')
svc_model = joblib.load('../models/support_vector_machine_model.pkl')
nn_model = tf.keras.models.load_model("../models/neural_network_model.keras", compile=True)

In [263]:
# Wrap the neural network Keras model
# epochs=0 to skip training since it is already trained
nn_clf = KerasClassifier(model=nn_model, epochs=0) 


In [264]:
# Build stacking ensemble (excluding neural network for scikit-learn StackingClassifier)
base_estimators = [
    ("dt", dt_model),
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("adb", adb_model),
    ("lgr", lgr_model),
    ("knn", knn_model),
    ("svc", svc_model),
    ("nn", nn_clf)
]

# Define meta-model
meta_model = LogisticRegression(class_weight="balanced", max_iter=100, multi_class="multinomial", n_jobs=-1, random_state=42)

# Create StratifiedKFold for cross validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [265]:
# Extract model names and create a lookup dictionary
model_names_list = [name for name, _ in base_estimators]
model_dict = {name: model for name, model in base_estimators}

# Define evaluation function for any subset
def evaluate_subset(model_names):
    classifiers = [model_dict[name] for name in model_names]
    stack = StackingCVClassifier(
        classifiers=classifiers,
        meta_classifier=meta_model,
        cv=cv,
        n_jobs=-1,
        use_probas=True,
        random_state=42
    )
    scores = cross_val_score(stack, X_train, y_train, cv=cv, scoring="f1_macro", n_jobs=-1)
    return scores.mean()

# Search all subsets (size 2 up to all models)
best_score = -float("inf")
best_subset = None

for subset_size in range(2, len(model_names_list) + 1):
    for subset in itertools.combinations(model_names_list, subset_size):
        score = evaluate_subset(subset)
        print(f"Subset {subset} - Macro F1: {score:.4f}")
        if score > best_score:
            best_score = score
            best_subset = subset

print(f"Best subset: {best_subset} with Macro F1: {best_score:.4f}")

Subset ('dt', 'rf') - Macro F1: 0.8877
Subset ('dt', 'xgb') - Macro F1: 0.9135
Subset ('dt', 'adb') - Macro F1: 0.8313
Subset ('dt', 'lgr') - Macro F1: 0.8288
Subset ('dt', 'knn') - Macro F1: 0.8875
Subset ('dt', 'svc') - Macro F1: 0.8704
Subset ('dt', 'nn') - Macro F1: 0.8213
Subset ('rf', 'xgb') - Macro F1: 0.9135
Subset ('rf', 'adb') - Macro F1: 0.8832
Subset ('rf', 'lgr') - Macro F1: 0.8859
Subset ('rf', 'knn') - Macro F1: 0.9053
Subset ('rf', 'svc') - Macro F1: 0.8802
Subset ('rf', 'nn') - Macro F1: 0.8846
Subset ('xgb', 'adb') - Macro F1: 0.9144
Subset ('xgb', 'lgr') - Macro F1: 0.9144
Subset ('xgb', 'knn') - Macro F1: 0.9225
Subset ('xgb', 'svc') - Macro F1: 0.9140
Subset ('xgb', 'nn') - Macro F1: 0.9149
Subset ('adb', 'lgr') - Macro F1: 0.8005
Subset ('adb', 'knn') - Macro F1: 0.8800
Subset ('adb', 'svc') - Macro F1: 0.8632
Subset ('adb', 'nn') - Macro F1: 0.7827
Subset ('lgr', 'knn') - Macro F1: 0.8774
Subset ('lgr', 'svc') - Macro F1: 0.8623
Subset ('lgr', 'nn') - Macro F1: 0

In [266]:
# Extract only the model objects for mlxtend (not tuples)
best_subset_models = [model_dict[model] for model in best_subset]

In [267]:
# Create and fit the stacking ensemble
stacking_clf = StackingCVClassifier(
    classifiers=best_subset_models,
    meta_classifier=meta_model,
    cv=cv,
    n_jobs=-1,
    use_probas=True,
    random_state=42
    )

# Fit the stacking ensemble with training
stacking_clf.fit(X_train, y_train)



In [268]:
# Predict using the stacking ensemble
y_pred = stacking_clf.predict(X_test_scaled)
y_pred



array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1])

In [269]:
# Generate confusion matrix and display with each row and column labelled
sclf_cnf_matrix_test = confusion_matrix(y_test, y_pred, labels=dt_model.classes_)

cnf_matrix_test_df = pd.DataFrame(sclf_cnf_matrix_test, columns=["Diabetes_yes_pred", "Diabetes_no_pred", "Diabetes_borderline_pred"], 
                                    index=["Diabetes_yes_actual", "Diabetes_no_actual", "Diabetes_borderline_actual"])
cnf_matrix_test_df

Unnamed: 0,Diabetes_yes_pred,Diabetes_no_pred,Diabetes_borderline_pred
Diabetes_yes_actual,5,26,0
Diabetes_no_actual,1,185,0
Diabetes_borderline_actual,1,9,0


In [270]:
# Generate classification report
sclf_model_classfication_report = classification_report(y_test, y_pred, target_names=["Diabetes_yes", "Diabetes_no", "Diabetes_borderline"], output_dict=True)
pd.DataFrame.from_dict(sclf_model_classfication_report).T

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,precision,recall,f1-score,support
Diabetes_yes,0.714286,0.16129,0.263158,31.0
Diabetes_no,0.840909,0.994624,0.91133,186.0
Diabetes_borderline,0.0,0.0,0.0,10.0
accuracy,0.837004,0.837004,0.837004,0.837004
macro avg,0.518398,0.385305,0.391496,227.0
weighted avg,0.786572,0.837004,0.782666,227.0


In [271]:
# Generate the ROC AUC score
from sklearn.metrics import roc_auc_score

y_pred_proba_sclf = stacking_clf.predict_proba(X_test)

sclf_model_roc_auc = roc_auc_score(y_test, y_pred_proba_sclf, average="macro", multi_class="ovr")
print(sclf_model_roc_auc)

0.760382490263349
