# Setup

In [1]:
# ============================================================
# File: tabpfn_optuna_ensemble_all.py
# Purpose: End-to-end TabPFN + Optuna-tuned models + Weighted Ensemble
# ============================================================

import os
import time
import random
import warnings
import numpy as np
import pandas as pd
import torch
import optuna
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
)
from sklearn.ensemble import (
    RandomForestClassifier,
    GradientBoostingClassifier,
)
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from tabpfn import TabPFNClassifier

# ============================================================
# 0Ô∏è‚É£ Environment setup (one seed to rule them all)
# ============================================================

warnings.filterwarnings("ignore", category=UserWarning)
os.environ["TABPFN_DEVICE"] = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["HF_TOKEN"] = "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"

GLOBAL_SEED = 24  # üî∏ Change this ONE line to update all seeds globally

def set_global_seed(seed: int):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_global_seed(GLOBAL_SEED)


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"‚úÖ Using device: {DEVICE.upper()} (seed={GLOBAL_SEED})")

# ============================================================
# 1Ô∏è‚É£ Load & prepare data
# ============================================================

data_path = r"C:\Data.csv"
df = pd.read_csv(data_path)
df["RiskLevel"] = df["RiskLevel"].map({"low risk": 0, "mid risk": 1, "high risk": 2})

X = df.drop("RiskLevel", axis=1)
y = df["RiskLevel"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=GLOBAL_SEED
)




‚úÖ Using device: CUDA (seed=24)


# Baseline Models

In [30]:
import time

# ============================================================
# Baseline Models (No Tuning) + TabPFN
# ============================================================

print("\nüî∞ Training Baseline Models (No Tuning)...")

baseline_models = {
    "RandomForest": RandomForestClassifier(random_state=GLOBAL_SEED),
    "XGBoost": XGBClassifier(random_state=GLOBAL_SEED,
                            n_estimators=50,
                             max_depth=5
                        ),
    "LightGBM": LGBMClassifier(random_state=GLOBAL_SEED,
                              verbose=-1  # silences all LightGBM info/warning logs
                          ),
    "GradientBoosting": GradientBoostingClassifier(random_state=GLOBAL_SEED),
    "MLP": MLPClassifier(
        hidden_layer_sizes=(100, 100),
        max_iter=300,
        random_state=GLOBAL_SEED,
    ),
    "kNN": KNeighborsClassifier(),
    "TabPFN_GPU": TabPFNClassifier(device=DEVICE)
}

def baseline_evaluate(model_name, model, X, y, cv=5):
    """Evaluate a single baseline model using Stratified K-Fold with timing."""
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=GLOBAL_SEED)
    metrics = []

    print(f"\n=== Baseline: {model_name} ===")

    total_time = 0

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
        X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
        y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]

        pipeline = Pipeline([
            ("clf", model)
        ])

        t0 = time.time()
        pipeline.fit(X_train_fold, y_train_fold)
        preds = pipeline.predict(X_val_fold)
        elapsed = time.time() - t0
        total_time += elapsed

        metrics.append({
            "fold": fold,
            "accuracy": accuracy_score(y_val_fold, preds),
            "precision": precision_score(y_val_fold, preds, average="macro", zero_division=0),
            "recall": recall_score(y_val_fold, preds, average="macro", zero_division=0),
            "f1_macro": f1_score(y_val_fold, preds, average="macro", zero_division=0),
            "fold_time_sec": elapsed
        })

    df = pd.DataFrame(metrics)

    # Remove "fold" before averaging metrics
    summary = df.drop(columns=["fold"]).mean(numeric_only=True)
    summary["Model"] = model_name
    summary["total_time_sec"] = df["fold_time_sec"].sum()

    return summary


# ------------------------------------------------------------
# Run all baselines
# ------------------------------------------------------------
baseline_results = []

for model_name, model in baseline_models.items():
    result = baseline_evaluate(model_name, model, X_train, y_train)
    baseline_results.append(result)

baseline_summary = (
    pd.DataFrame(baseline_results)
    .sort_values("accuracy", ascending=False)
    [["Model", "accuracy", "precision", "recall", "f1_macro", "total_time_sec"]]
)

print("\n\nüèÅ === Baseline Model Performance Summary (with Time) ===")
print(baseline_summary.round(4))

baseline_summary.to_csv("Baseline_Model_Results_With_Time.csv", index=False)
print("\nüìÅ Saved: Baseline_Model_Results_With_Time.csv")



üî∞ Training Baseline Models (No Tuning)...

=== Baseline: RandomForest ===

=== Baseline: XGBoost ===

=== Baseline: LightGBM ===

=== Baseline: GradientBoosting ===

=== Baseline: MLP ===

=== Baseline: kNN ===

=== Baseline: TabPFN_GPU ===


üèÅ === Baseline Model Performance Summary (with Time) ===
              Model  accuracy  precision  recall  f1_macro  total_time_sec
6        TabPFN_GPU    0.8249     0.8287  0.8271    0.8263          8.6408
1           XGBoost    0.8101     0.8151  0.8128    0.8122          0.3495
0      RandomForest    0.8027     0.8110  0.8067    0.8067          1.2027
2          LightGBM    0.7965     0.8023  0.8012    0.8003          0.7340
3  GradientBoosting    0.7719     0.7845  0.7746    0.7756          2.5407
5               kNN    0.6497     0.6762  0.6468    0.6531          0.0349
4               MLP    0.6140     0.6826  0.6100    0.5956          2.3457

üìÅ Saved: Baseline_Model_Results_With_Time.csv


## Phase I: Hyperparameter Tuning

In [19]:
# ============================================================
# 2Ô∏è‚É£ Define hyperparameter spaces
# ============================================================

def rf_space(trial):
    return {
        "n_estimators": trial.suggest_int("n_estimators", 100, 800),
        "max_depth": trial.suggest_int("max_depth", 3, 40),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 15),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "random_state": GLOBAL_SEED,
    }

def xgb_space(trial):
    return {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "eval_metric": "mlogloss",
        "use_label_encoder": False,
        "random_state": GLOBAL_SEED,
    }

def lgbm_space(trial):
    return {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 15, 150),
        "random_state": GLOBAL_SEED,
        "verbose": -1,
    }

def gb_space(trial):
    return {
        "n_estimators": trial.suggest_int("n_estimators", 100, 800),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "random_state": GLOBAL_SEED,
    }

def mlp_space(trial):
    return {
        "hidden_layer_sizes": (
            trial.suggest_int("h1", 50, 300),
            trial.suggest_int("h2", 50, 300),
        ),
        "activation": trial.suggest_categorical("activation", ["relu", "tanh"]),
        "learning_rate_init": trial.suggest_float("lr", 1e-4, 1e-2, log=True),
        "max_iter": 400,
        "random_state": GLOBAL_SEED,
    }

def knn_space(trial):
    return {
        "n_neighbors": trial.suggest_int("n_neighbors", 3, 30),
        "weights": trial.suggest_categorical("weights", ["uniform", "distance"]),
        "p": trial.suggest_categorical("p", [1, 2]),
    }

# ============================================================
# 3Ô∏è‚É£ Evaluation helper
# ============================================================

def evaluate_model(model_name, model, X, y, cv=5):
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=GLOBAL_SEED)
    records = []
    print(f"\n=== Training {model_name} ===")
    for fold, (tr_idx, val_idx) in enumerate(skf.split(X, y), 1):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]
        t0 = time.time()
        model.fit(X_tr, y_tr)
        preds = model.predict(X_val)
        t1 = time.time() - t0
        records.append({
            "fold": fold,
            "accuracy": accuracy_score(y_val, preds),
            "precision": precision_score(y_val, preds, average="macro", zero_division=0),
            "recall": recall_score(y_val, preds, average="macro", zero_division=0),
            "f1_macro": f1_score(y_val, preds, average="macro", zero_division=0),
            "train_time_sec": t1
        })
    df = pd.DataFrame(records)
    summary = df.mean().round(4)
    summary["Model"] = model_name
    return summary

# ============================================================
# 4Ô∏è‚É£ Optuna tuning
# ============================================================

def optuna_tune(model_name, model_class, param_space, X, y, n_trials=30):
    """
    Tune model hyperparameters with Optuna and print:
      - best parameters
      - total optimization time (seconds)
      - accuracy and F1 score
    """
    start_time = time.time()  # measure tuning duration

    def objective(trial):
        params = param_space(trial)
        model = model_class(**params)
        pipe = Pipeline([("scaler", StandardScaler()), ("clf", model)])
        scores = cross_val_score(pipe, X, y, cv=5, scoring="accuracy")
        return np.mean(scores)

    study = optuna.create_study(
        direction="maximize",
        sampler=optuna.samplers.TPESampler(seed=GLOBAL_SEED)
    )
    study.optimize(objective, n_trials=n_trials, show_progress_bar=False)

    elapsed = round(time.time() - start_time, 2)
    best_params = study.best_params

    # Fix MLP special case
    if model_name == "MLP":
        best_params = {
            "hidden_layer_sizes": (best_params["h1"], best_params["h2"]),
            "activation": best_params["activation"],
            "learning_rate_init": best_params["lr"],
            "max_iter": 400,
            "random_state": GLOBAL_SEED,
        }

    # Save tuning curve
    pd.DataFrame({
        "trial": [t.number for t in study.trials],
        "accuracy": [t.value for t in study.trials],
    }).to_csv(f"{model_name}_optuna_curve.csv", index=False)

    # Train and evaluate with best params
    best_model = model_class(**best_params)
    final_pipeline = Pipeline([("scaler", StandardScaler()), ("clf", best_model)])
    summary = evaluate_model(model_name, final_pipeline, X, y)

    # Extract accuracy and F1 from your summary
    acc = summary.get("accuracy")

    print("\n==============================")
    print(f"Model: {model_name}")
    print(f"Best Parameters: {best_params}")
    print(f"Optuna Time: {elapsed} sec")
    print(f"Accuracy: {acc}")
    print("==============================\n")

    return summary, best_params

# ============================================================
# 5Ô∏è‚É£ Train TabPFN and tune all other models
# ============================================================

results, best_params_dict = [], {}
tabpfn_model = TabPFNClassifier(device=DEVICE)
tabpfn_summary = evaluate_model("TabPFN_GPU", tabpfn_model, X_train, y_train)
tabpfn_summary["optuna_time_min"] = 0.0
results.append(tabpfn_summary)
best_params_dict["TabPFN_GPU"] = {"device": DEVICE}
n_trials_tune = 100

tunable_models = {
    "RandomForest": (RandomForestClassifier, rf_space, n_trials_tune),
    "XGBoost": (XGBClassifier, xgb_space, n_trials_tune),
    "LightGBM": (LGBMClassifier, lgbm_space, n_trials_tune),
    "GradientBoosting": (GradientBoostingClassifier, gb_space, n_trials_tune),
    "MLP": (MLPClassifier, mlp_space, n_trials_tune),
    "kNN": (KNeighborsClassifier, knn_space, n_trials_tune),
}

for name, (cls, space, n_trials) in tunable_models.items():
    summary, params = optuna_tune(name, cls, space, X_train, y_train, n_trials)
    results.append(summary)
    best_params_dict[name] = params

final_summary = pd.DataFrame(results).sort_values(by="accuracy", ascending=False)
final_summary.to_csv("All_Model_Results.csv", index=False)

print("\nüèÅ === Model Summary ===")
print(final_summary[["Model", "accuracy", "f1_macro"]])



=== Training TabPFN_GPU ===

=== Training RandomForest ===

Model: RandomForest
Best Parameters: {'n_estimators': 606, 'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 1}
Optuna Time: 284.92 sec
Accuracy: 0.8076


=== Training XGBoost ===

Model: XGBoost
Best Parameters: {'n_estimators': 266, 'max_depth': 14, 'learning_rate': 0.03322780739006341, 'subsample': 0.8386648614390065, 'colsample_bytree': 0.794378381656277}
Optuna Time: 325.85 sec
Accuracy: 0.8101


=== Training LightGBM ===

Model: LightGBM
Best Parameters: {'n_estimators': 526, 'max_depth': 11, 'learning_rate': 0.1702355797771671, 'num_leaves': 46}
Optuna Time: 184.07 sec
Accuracy: 0.8027


=== Training GradientBoosting ===

Model: GradientBoosting
Best Parameters: {'n_estimators': 346, 'learning_rate': 0.11163786742578959, 'max_depth': 14}
Optuna Time: 3892.46 sec
Accuracy: 0.8014


=== Training MLP ===

Model: MLP
Best Parameters: {'hidden_layer_sizes': (265, 89), 'activation': 'tanh', 'learning_rate_init': 0

## Phase II: Weighted Soft-voting Ensembles

In [23]:
import time

# ============================================================
# 6Ô∏è‚É£ Weighted Ensemble: Multiple Configurations + Comparison
# ============================================================

print("\nü§ù Building Multiple Weighted Ensembles (Top-3 + TabPFN variations)‚Ä¶")

# ------------------------------------------------------------
# Helper: build a timed weighted ensemble
# ------------------------------------------------------------
def run_weighted_ensemble(model_entries, X_train, y_train, X_test, y_test):
    """
    model_entries = list of tuples:
       [ (name, model_instance, weight), ... ]
    """

    # -------------------------
    # TIMING ‚Äî TRAINING
    # -------------------------
    t0_train = time.time()

    trained = []
    weights = []

    for name, model, w in model_entries:
        model.fit(X_train, y_train)
        trained.append((name, model))
        weights.append(w)

    train_time = time.time() - t0_train

    # Normalize weights
    weights = np.array(weights)
    weights = weights / weights.sum()

    # -------------------------
    # TIMING ‚Äî TESTING
    # -------------------------
    t0_test = time.time()

    proba_sum = np.zeros((X_test.shape[0], len(np.unique(y_test))))
    for (name, model), w in zip(trained, weights):
        proba_sum += model.predict_proba(X_test) * w

    pred = np.argmax(proba_sum, axis=1)

    test_time = time.time() - t0_test

    return {
        "accuracy": accuracy_score(y_test, pred),
        "precision": precision_score(y_test, pred, average="macro"),
        "recall": recall_score(y_test, pred, average="macro"),
        "f1": f1_score(y_test, pred, average="macro"),
        "pred": pred,
        "train_time": train_time,
        "test_time": test_time,
        "total_time": train_time + test_time,
    }


# ------------------------------------------------------------
# Prep: Sorted tuned models & top-3 (EXCLUDING TabPFN)
# ------------------------------------------------------------

# Remove TabPFN from ranking so it never appears in "Top-3"
tuned_non_tabpfn = final_summary[final_summary["Model"] != "TabPFN_GPU"]

# Sort by accuracy
tuned_non_tabpfn = tuned_non_tabpfn.sort_values("accuracy", ascending=False)

# Select the top-3 non-TabPFN models
top3 = tuned_non_tabpfn.head(3)


model_map = {
    "RandomForest": RandomForestClassifier,
    "XGBoost": XGBClassifier,
    "LightGBM": LGBMClassifier,
    "GradientBoosting": GradientBoostingClassifier,
    "MLP": MLPClassifier,
    "kNN": KNeighborsClassifier,
}

def build_model_entry(row):
    name = row["Model"]

    # TabPFN should not be rebuilt ‚Äî use the pre-trained instance
    if name == "TabPFN_GPU":
        return ("TabPFN_GPU", tabpfn_model, row["accuracy"])

    params = best_params_dict[name]
    model = model_map[name](**params)
    return (name, model, row["accuracy"])



# ------------------------------------------------------------
# Ensemble 1: Top-3 + TabPFN
# ------------------------------------------------------------
tabpfn_acc = tuned_models.loc[tuned_models["Model"] == "TabPFN_GPU", "accuracy"].values[0]
top3_plus_tabpfn = [build_model_entry(row) for _, row in top3.iterrows()] + [
    ("TabPFN_GPU", tabpfn_model, tabpfn_acc)
]

res_top3_tabpfn = run_weighted_ensemble(top3_plus_tabpfn, X_train, y_train, X_test, y_test)


# ------------------------------------------------------------
# Ensemble 2: Top-3 only (no TabPFN)
# ------------------------------------------------------------
top3_only = [build_model_entry(row) for _, row in top3.iterrows()]

res_top3_only = run_weighted_ensemble(top3_only, X_train, y_train, X_test, y_test)


# ------------------------------------------------------------
# Ensembles 3‚Äì5: TabPFN + each of the top-3 individually
# ------------------------------------------------------------
res_tabpfn_best = run_weighted_ensemble(
    [build_model_entry(top3.iloc[0]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)],
    X_train, y_train, X_test, y_test,
)

res_tabpfn_second = run_weighted_ensemble(
    [build_model_entry(top3.iloc[1]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)],
    X_train, y_train, X_test, y_test,
)

res_tabpfn_third = run_weighted_ensemble(
    [build_model_entry(top3.iloc[2]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)],
    X_train, y_train, X_test, y_test,
)


# ------------------------------------------------------------
# üìä Comparison Table WITH MODEL NAMES + TIMING
# ------------------------------------------------------------

def extract_model_names(model_entries):
    """Return comma-separated model names from ensemble entries."""
    return ", ".join([name for name, _, _ in model_entries])


comparison = pd.DataFrame([
    [
        "Top-3 + TabPFN",
        extract_model_names(top3_plus_tabpfn),
        res_top3_tabpfn["accuracy"],
        res_top3_tabpfn["precision"],
        res_top3_tabpfn["recall"],
        res_top3_tabpfn["f1"],
        res_top3_tabpfn["train_time"],
        res_top3_tabpfn["test_time"],
        res_top3_tabpfn["total_time"]
    ],
    [
        "Top-3 only",
        extract_model_names(top3_only),
        res_top3_only["accuracy"],
        res_top3_only["precision"],
        res_top3_only["recall"],
        res_top3_only["f1"],
        res_top3_only["train_time"],
        res_top3_only["test_time"],
        res_top3_only["total_time"]
    ],
    [
        "TabPFN + Best",
        extract_model_names([build_model_entry(top3.iloc[0]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)]),
        res_tabpfn_best["accuracy"],
        res_tabpfn_best["precision"],
        res_tabpfn_best["recall"],
        res_tabpfn_best["f1"],
        res_tabpfn_best["train_time"],
        res_tabpfn_best["test_time"],
        res_tabpfn_best["total_time"]
    ],
    [
        "TabPFN + 2nd",
        extract_model_names([build_model_entry(top3.iloc[1]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)]),
        res_tabpfn_second["accuracy"],
        res_tabpfn_second["precision"],
        res_tabpfn_second["recall"],
        res_tabpfn_second["f1"],
        res_tabpfn_second["train_time"],
        res_tabpfn_second["test_time"],
        res_tabpfn_second["total_time"]
    ],
    [
        "TabPFN + 3rd",
        extract_model_names([build_model_entry(top3.iloc[2]), ("TabPFN_GPU", tabpfn_model, tabpfn_acc)]),
        res_tabpfn_third["accuracy"],
        res_tabpfn_third["precision"],
        res_tabpfn_third["recall"],
        res_tabpfn_third["f1"],
        res_tabpfn_third["train_time"],
        res_tabpfn_third["test_time"],
        res_tabpfn_third["total_time"]
    ],
], columns=[
    "Ensemble", "Models", "Accuracy", "Precision", "Recall", "F1",
    "Train_Time", "Test_Time", "Total_Time"
])

print("\n\nüèÜ === Ensemble Comparison (with model names + timing) ===")
print(comparison.round(4))

comparison.to_csv("Ensemble_Comparison.csv", index=False)
print("\nüìÅ Saved: Ensemble_Comparison.csv")

# Save main ensemble predictions (Top-3 + TabPFN)
pd.DataFrame({
    "y_true": y_test,
    "y_pred": res_top3_tabpfn["pred"]
}).to_csv("Ensemble_Top3_TabPFN_Predictions.csv", index=False)

print("\nüìÅ Saved: Ensemble_Top3_TabPFN_Predictions.csv")
print("‚úÖ All ensemble variations completed (with timing).\n")



ü§ù Building Multiple Weighted Ensembles (Top-3 + TabPFN variations)‚Ä¶


üèÜ === Ensemble Comparison (with model names + timing) ===
         Ensemble                                       Models  Accuracy  \
0  Top-3 + TabPFN  XGBoost, RandomForest, LightGBM, TabPFN_GPU    0.8867   
1      Top-3 only              XGBoost, RandomForest, LightGBM    0.8768   
2   TabPFN + Best                          XGBoost, TabPFN_GPU    0.8916   
3    TabPFN + 2nd                     RandomForest, TabPFN_GPU    0.8818   
4    TabPFN + 3rd                         LightGBM, TabPFN_GPU    0.8818   

   Precision  Recall      F1  Train_Time  Test_Time  Total_Time  
0     0.8858  0.8946  0.8889      2.3179     1.7089      4.0268  
1     0.8770  0.8863  0.8799      2.0206     0.0868      2.1074  
2     0.8922  0.8987  0.8947      0.8948     1.4271      2.3219  
3     0.8805  0.8887  0.8836      1.6542     1.4573      3.1115  
4     0.8821  0.8905  0.8841      0.8202     1.4272      2.2474  

üìÅ Save

## Phase II: Stacking Ensemble Models

In [29]:
# ============================================================
# 7Ô∏è‚É£ STACKING ENSEMBLES (With Timing + Model Listing)
# ============================================================

from sklearn.ensemble import StackingClassifier
import time

# ------------------------------------
# Ensure top-3 DOES NOT include TabPFN
# ------------------------------------
top3_no_tabpfn = tuned_models[tuned_models["Model"] != "TabPFN_GPU"].head(3)
top3_names = top3_no_tabpfn["Model"].tolist()

print("Top-3 models (excluding TabPFN):", top3_names)

def build_model_instance(name):
    if name == "TabPFN_GPU":
        return tabpfn_model
    return model_map[name](**best_params_dict[name])

stacking_results = {}

def train_stacking(meta_name):
    
    meta_model = build_model_instance(meta_name)

    # ------------------------------------
    # Base models = top-3 + TabPFN EXCEPT meta
    # ------------------------------------
    all_models = top3_names + ["TabPFN_GPU"]
    base_models = [m for m in all_models if m != meta_name]
    
    # Remove duplicates (safety)
    base_models = list(dict.fromkeys(base_models))

    # Unique (name, estimator) pairs for sklearn
    estimators = [(f"{m}_base", build_model_instance(m)) for m in base_models]

    # ------------------------------------
    # TIMING ‚Äî TRAINING
    # ------------------------------------
    t0_train = time.time()
    stack = StackingClassifier(
        estimators=estimators,
        final_estimator=meta_model,
        stack_method="predict_proba",
        passthrough=False
    )
    stack.fit(X_train, y_train)
    train_time = time.time() - t0_train

    # ------------------------------------
    # TIMING ‚Äî TESTING
    # ------------------------------------
    t0_test = time.time()
    preds = stack.predict(X_test)
    test_time = time.time() - t0_test

    # ------------------------------------
    # Return full record
    # ------------------------------------
    return {
        "pred": preds,
        "accuracy": accuracy_score(y_test, preds),
        "precision": precision_score(y_test, preds, average="macro"),
        "recall": recall_score(y_test, preds, average="macro"),
        "f1": f1_score(y_test, preds, average="macro"),
        "meta": meta_name,
        "base_models": base_models,
        "train_time": train_time,
        "test_time": test_time,
        "total_time": train_time + test_time
    }


# ------------------------------------
# Train stacking ensembles
# ------------------------------------
for meta in ["TabPFN_GPU"] + top3_names:
    stacking_results[meta] = train_stacking(meta)


# ------------------------------------
# Convert to DataFrame with clean ‚ÄúModels‚Äù column
# ------------------------------------
def join_models(meta, base):
    """
    Represent ensemble structure clearly:
    Example:
        RF_base, XGB_base, TabPFN_base ‚Üí META=LightGBM
    """
    base_list = [f"{b}_base" for b in base]
    base_str = ", ".join(base_list)
    return f"{base_str} ‚Üí META = {meta}"

stacking_df = pd.DataFrame([
    [
        f"Stacking (meta={meta})",
        stacking_results[meta]["accuracy"],
        stacking_results[meta]["precision"],
        stacking_results[meta]["recall"],
        stacking_results[meta]["f1"],
        stacking_results[meta]["train_time"],
        stacking_results[meta]["test_time"],
        stacking_results[meta]["total_time"],
        join_models(meta, stacking_results[meta]["base_models"])
    ]
    for meta in stacking_results
], columns=[
    "Ensemble",
    "Accuracy",
    "Precision",
    "Recall",
    "F1",
    "Train_Time",
    "Test_Time",
    "Total_Time",
    "Models"
])



# ============================================================
# 8Ô∏è‚É£ COMBINE ALL ENSEMBLES (Voting + Stacking)
# ============================================================

all_ensembles = pd.concat([comparison, stacking_df], ignore_index=True)

# Sort two ways
rank_accuracy = all_ensembles.sort_values("Accuracy", ascending=False)
rank_f1 = all_ensembles.sort_values("F1", ascending=False)

print("\n\nüèÜ === ALL ENSEMBLES RANKED BY ACCURACY ===")
print(rank_accuracy.round(4))

print("\n\nüèÜ === ALL ENSEMBLES RANKED BY F1 SCORE ===")
print(rank_f1.round(4))

rank_accuracy.to_csv("All_Ensembles_Sorted_by_Accuracy.csv", index=False)
rank_f1.to_csv("All_Ensembles_Sorted_by_F1.csv", index=False)




Top-3 models (excluding TabPFN): ['XGBoost', 'RandomForest', 'LightGBM']


üèÜ === ALL ENSEMBLES RANKED BY ACCURACY ===
                       Ensemble  \
2                 TabPFN + Best   
0                Top-3 + TabPFN   
3                  TabPFN + 2nd   
4                  TabPFN + 3rd   
1                    Top-3 only   
8      Stacking (meta=LightGBM)   
5    Stacking (meta=TabPFN_GPU)   
6       Stacking (meta=XGBoost)   
7  Stacking (meta=RandomForest)   

                                              Models  Accuracy  Precision  \
2                                XGBoost, TabPFN_GPU    0.8916     0.8922   
0        XGBoost, RandomForest, LightGBM, TabPFN_GPU    0.8867     0.8858   
3                           RandomForest, TabPFN_GPU    0.8818     0.8805   
4                               LightGBM, TabPFN_GPU    0.8818     0.8821   
1                    XGBoost, RandomForest, LightGBM    0.8768     0.8770   
8  XGBoost_base, RandomForest_base, TabPFN_GPU_ba...    0.8621    

InvalidParameterError: The 'y_pred' parameter of classification_report must be an array-like or a sparse matrix. Got None instead.

# Final Model
## Final Ensemble Model Evaluation

In [67]:
# ============================================================
# ‚≠ê FINAL MODEL: TabPFN + XGBoost Weighted Ensemble
# ============================================================

import time
from sklearn.metrics import classification_report

print("\nüöÄ Training FINAL MODEL: TabPFN + XGBoost (Weighted Ensemble)‚Ä¶")

# ------------------------------------------------------------
# Get accuracy scores for weighting
# ------------------------------------------------------------
tabpfn_acc = float(tuned_models.loc[tuned_models["Model"] == "TabPFN_GPU", "accuracy"].values[0])
xgb_acc = float(final_summary.loc[final_summary["Model"] == "XGBoost", "accuracy"].values[0])

# ------------------------------------------------------------
# Build XGBoost model
# ------------------------------------------------------------
xgb_model = XGBClassifier(**best_params_dict["XGBoost"])

# ------------------------------------------------------------
# Prepare weighted models
# ------------------------------------------------------------
final_models = [
    ("XGBoost", xgb_model, xgb_acc),
    ("TabPFN_GPU", tabpfn_model, tabpfn_acc),
]

# ------------------------------------------------------------
# Run final ensemble
# ------------------------------------------------------------
final_result = run_weighted_ensemble(
    final_models, X_train, y_train, X_test, y_test
)

y_pred_final = final_result["pred"]

# ------------------------------------------------------------
# Print classification report (rounded)
# ------------------------------------------------------------
report = classification_report(y_test, y_pred_final, output_dict=True)
report_df = pd.DataFrame(report).T.round(4)

print("\nüìä === FINAL MODEL: TabPFN + XGBoost (Weighted Ensemble) ===\n")
print(report_df)

# ------------------------------------------------------------
# Print timing (rounded)
# ------------------------------------------------------------
print("\n‚è±Ô∏è === TIMING (Final Model) ===")
print(f"Training Time : {final_result['train_time']:.4f} sec")
print(f"Testing Time  : {final_result['test_time']:.4f} sec")
print(f"Total Time    : {final_result['total_time']:.4f} sec")

# ------------------------------------------------------------
# Save predictions
# ------------------------------------------------------------
pd.DataFrame({
    "y_true": y_test,
    "y_pred": y_pred_final
}).to_csv("Final_Model_TabPFN_XGBoost_Predictions.csv", index=False)

print("\nüìÅ Saved predictions to: Final_Model_TabPFN_XGBoost_Predictions.csv")
print("üéâ Final model training completed.\n")



üöÄ Training FINAL MODEL: TabPFN + XGBoost (Weighted Ensemble)‚Ä¶

üìä === FINAL MODEL: TabPFN + XGBoost (Weighted Ensemble) ===

              precision  recall  f1-score   support
0                0.9200  0.8519    0.8846   81.0000
1                0.8429  0.8806    0.8613   67.0000
2                0.9138  0.9636    0.9381   55.0000
accuracy         0.8916  0.8916    0.8916    0.8916
macro avg        0.8922  0.8987    0.8947  203.0000
weighted avg     0.8929  0.8916    0.8914  203.0000

‚è±Ô∏è === TIMING (Final Model) ===
Training Time : 1.2174 sec
Testing Time  : 1.7763 sec
Total Time    : 2.9937 sec

üìÅ Saved predictions to: Final_Model_TabPFN_XGBoost_Predictions.csv
üéâ Final model training completed.



# Save Final Model for Gradio

## Save TabPFN and XGBoost Models

In [64]:
import joblib
from xgboost import XGBClassifier

# -----------------------------
# Already trained models
# -----------------------------
# xgb_model is trained with X_train, y_train
# tabpfn_model is already trained earlier

# Save XGBoost
joblib.dump(xgb_model, "final_xgb_model.pkl")
print("‚úÖ Saved XGBoost model: final_xgb_model.pkl")

# Save TabPFN
joblib.dump(tabpfn_model, "final_tabpfn_model.pkl")
print("‚úÖ Saved TabPFN model: final_tabpfn_model.pkl")

# Optional: save ensemble weights for reference
import json
weights = {
    "XGBoost": float(xgb_acc),
    "TabPFN_GPU": float(tabpfn_acc)
}
with open("final_ensemble_weights.json", "w") as f:
    json.dump(weights, f)
print("‚úÖ Saved ensemble weights: final_ensemble_weights.json")


‚úÖ Saved XGBoost model: final_xgb_model.pkl
‚úÖ Saved TabPFN model: final_tabpfn_model.pkl
‚úÖ Saved ensemble weights: final_ensemble_weights.json


# Dashboard Building with Gradio
## Pre-compute SHAP Values with k-means Clustering

## Dashboard

### V1: XGBoost

In [5]:
import time
import numpy as np
import joblib
import shap
import torch
import gradio as gr
import matplotlib.pyplot as plt

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

FEATURE_NAMES = [
    "Age",
    "Systolic BP",
    "Diastolic BP",
    "Blood Sugar (mmol/L)",
    "Temperature (¬∞F)",
    "Heart Rate",
]

RISK_MAP = {0: "Low Risk", 1: "Mid Risk", 2: "High Risk"}

def format_input(age, sbp, dbp, sugar, temp, hr):
    return np.array([[age, sbp, dbp, sugar, temp, hr]], dtype=float)

def timed(fn):
    start = time.perf_counter()
    out = fn()
    return out, (time.perf_counter() - start) * 1000

model = joblib.load("final_xgb_model.pkl")
explainer = shap.TreeExplainer(model)

def predict(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)
    risk, t_pred = timed(lambda: RISK_MAP[int(model.predict(X)[0])])

    metrics = (
        "Model Accuracy: 81.01%\n"
        "F1 Score: 81.26%\n"
        f"Inference Time: {t_pred:.2f} ms"
    )

    return risk, metrics, f"{t_pred:.2f} ms", None, "Calculating..."

def compute_shap(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)

    def _compute():
        shap_vals = explainer.shap_values(X)
        pred_class = int(model.predict(X)[0])

        if isinstance(shap_vals, list):
            values = shap_vals[pred_class][0]
        else:
            values = shap_vals[0, :, pred_class]

        return np.abs(values), pred_class

    (values, pred_class), t_shap = timed(_compute)

    fig, ax = plt.subplots(figsize=(6, 3))
    ax.bar(FEATURE_NAMES, values)
    ax.set_title(f"Variable Contributions to {RISK_MAP[pred_class]}")
    ax.set_ylabel("Risk Score")
    plt.xticks(rotation=30, ha="right")
    plt.tight_layout()

    return fig, f"{t_shap:.2f} ms"

with gr.Blocks() as demo:
    gr.Markdown("# Maternal Health Risk Prediction Dashboard - XGBoost")

    with gr.Row():
        with gr.Column(scale=1):
            age = gr.Number(label="Age", value=30)
            sbp = gr.Number(label="Systolic Blood Pressure", value=120)
            dbp = gr.Number(label="Diastolic Blood Pressure", value=80)
            sugar = gr.Number(label="Blood Sugar (mmol/L)", value=7.0)
            temp = gr.Number(label="Body Temperature (¬∞F)", value=98.6)
            hr = gr.Number(label="Heart Rate", value=75)
            btn = gr.Button("üîç Predict Risk")

        with gr.Column(scale=1):
            risk_out = gr.Label(label="Risk Level")

            metrics_out = gr.Textbox(
                label="Model Metrics",
                interactive=False,
                lines=4,
                max_lines=4,
            )

            shap_plot = gr.Plot(label="Risk Factor Graph")

            shap_time = gr.Textbox(
                label="SHAP Computation Time",
                interactive=False,
                lines=1,
                max_lines=1,
            )

    btn.click(
        predict,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[risk_out, metrics_out, shap_time, shap_plot, shap_time],
        queue=False,
    ).then(
        compute_shap,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[shap_plot, shap_time],
    )

if __name__ == "__main__":
    demo.launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


### V2: TabPFN (Kernel SHAP)

In [21]:
# app.py
import time
import numpy as np
import joblib
import shap
import torch
import gradio as gr
import matplotlib.pyplot as plt
import multiprocessing as mp

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SHAP_TIMEOUT_SECONDS = 1000

FEATURE_NAMES = [
    "Age",
    "Systolic BP",
    "Diastolic BP",
    "Blood Sugar (mmol/L)",
    "Temperature (¬∞F)",
    "Heart Rate",
]

RISK_MAP = {0: "Low Risk", 1: "Mid Risk", 2: "High Risk"}


def format_input(age, sbp, dbp, sugar, temp, hr):
    return np.array([[age, sbp, dbp, sugar, temp, hr]], dtype=float)


def timed(fn):
    start = time.perf_counter()
    out = fn()
    return out, (time.perf_counter() - start) * 1000


# ------------------ MODEL ------------------
model = joblib.load("final_tabpfn_model.pkl")


def predict(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)

    def _predict():
        probs = model.predict_proba(X)[0]
        return RISK_MAP[int(np.argmax(probs))]

    risk, t_pred = timed(_predict)

    metrics = (
        "Model Accuracy: 82.24%\n"
        "F1 Score: 82.39%\n\n"
        f"Inference Time: {t_pred:.2f} ms"
    )

    return risk, metrics, "Calculating...", None, "Calculating..."


# ------------------ SHAP (Kernel + Timeout) ------------------
def _shap_worker(X, queue):
    try:
        background = np.zeros((20, X.shape[1]))
        explainer = shap.KernelExplainer(model.predict_proba, background)
        shap_vals = explainer.shap_values(X, nsamples=100)
        pred_class = int(np.argmax(model.predict_proba(X)))
        values = np.abs(shap_vals[pred_class][0])
        queue.put((values, pred_class))
    except Exception:
        queue.put(None)  # ensure queue always returns


def compute_shap(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)
    queue = mp.Queue()
    process = mp.Process(target=_shap_worker, args=(X, queue))

    start = time.perf_counter()
    process.start()
    process.join(timeout=SHAP_TIMEOUT_SECONDS)

    if process.is_alive():
        process.terminate()
        process.join()
        return None, f"Stopped (>{SHAP_TIMEOUT_SECONDS}s timeout)"

    # Check if worker returned results
    if queue.empty():
        return None, f"Stopped (>{SHAP_TIMEOUT_SECONDS}s timeout)"

    values_pred = queue.get()
    if values_pred is None:
        return None, f"Stopped (>{SHAP_TIMEOUT_SECONDS}s timeout)"

    values, pred_class = values_pred
    t_shap = (time.perf_counter() - start) * 1000

    fig, ax = plt.subplots(figsize=(6, 3))
    ax.bar(FEATURE_NAMES, values)
    ax.set_title(f"Variable Contributions to {RISK_MAP[pred_class]}")
    ax.set_ylabel("Impact")
    plt.xticks(rotation=30, ha="right")
    plt.tight_layout()

    return fig, f"{t_shap:.2f} ms"


# ------------------ UI ------------------
with gr.Blocks() as demo:
    gr.Markdown("# Maternal Health Risk Prediction Dashboard - TabPFN Kernel SHAP")

    with gr.Row():
        with gr.Column(scale=1):
            age = gr.Number(label="Age", value=30)
            sbp = gr.Number(label="Systolic Blood Pressure", value=120)
            dbp = gr.Number(label="Diastolic Blood Pressure", value=80)
            sugar = gr.Number(label="Blood Sugar (mmol/L)", value=7.0)
            temp = gr.Number(label="Body Temperature (¬∞F)", value=98.6)
            hr = gr.Number(label="Heart Rate", value=75)
            btn = gr.Button("üîç Predict Risk")

        with gr.Column(scale=1):
            risk_out = gr.Label(label="Risk Level")

            metrics_out = gr.Textbox(
                label="Model Metrics",
                interactive=False,
                lines=4,
                max_lines=4,
            )

            shap_plot = gr.Plot(label="Risk Factor Graph")

            shap_time = gr.Textbox(
                label="SHAP Computation Time",
                interactive=False,
                lines=1,
                max_lines=1,
            )

    btn.click(
        predict,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[risk_out, metrics_out, shap_time, shap_plot, shap_time],
        queue=False,
    ).then(
        compute_shap,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[shap_plot, shap_time],
    )

if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    demo.launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7876
* To create a public link, set `share=True` in `launch()`.


### V3: XGBoost-TabPFN Weighted Soft-voting Ensemble Model (Kernel SHAP)

In [20]:
# app.py
import time
import numpy as np
import joblib
import shap
import gradio as gr
import matplotlib.pyplot as plt
import multiprocessing as mp

SHAP_TIMEOUT_SECONDS = 1000
MODEL_PATH = "final_weighted_ensemble.pkl"

FEATURE_NAMES = [
    "Age",
    "Systolic BP",
    "Diastolic BP",
    "Blood Sugar (mmol/L)",
    "Temperature (¬∞F)",
    "Heart Rate",
]

RISK_MAP = {0: "Low Risk", 1: "Mid Risk", 2: "High Risk"}


def format_input(age, sbp, dbp, sugar, temp, hr):
    return np.array([[age, sbp, dbp, sugar, temp, hr]], dtype=float)


def timed(fn):
    start = time.perf_counter()
    out = fn()
    return out, (time.perf_counter() - start) * 1000


# ------------------ MODEL ------------------
model = joblib.load(MODEL_PATH)


def predict(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)

    def _predict():
        probs = model.predict_proba(X)[0]
        return RISK_MAP[int(np.argmax(probs))]

    risk, t_pred = timed(_predict)

    metrics = (
        "Model: XGBoost‚ÄìTabPFN Weighted Soft-Voting Ensemble\n"
        "Accuracy: 89.16%\n"
        "F1 Score: 89.48%\n\n"
        f"Inference Time: {t_pred:.2f} ms"
    )

    return risk, metrics, "Calculating...", None, "Calculating..."


# ------------------ SHAP (Kernel + Timeout) ------------------
def _shap_worker(model_path, X, queue):
    try:
        model_local = joblib.load(model_path)
        background = np.zeros((20, X.shape[1]))

        explainer = shap.KernelExplainer(
            model_local.predict_proba,
            background
        )

        shap_vals = explainer.shap_values(X, nsamples=100)
        pred_class = int(np.argmax(model_local.predict_proba(X)))

        values = np.abs(shap_vals[pred_class][0])
        queue.put((values, pred_class))
    except Exception:
        queue.put(None)


def compute_shap(age, sbp, dbp, sugar, temp, hr):
    X = format_input(age, sbp, dbp, sugar, temp, hr)
    queue = mp.Queue()

    process = mp.Process(
        target=_shap_worker,
        args=(MODEL_PATH, X, queue)
    )

    start = time.perf_counter()
    process.start()
    process.join(timeout=SHAP_TIMEOUT_SECONDS)

    if process.is_alive() or queue.empty():
        process.terminate()
        process.join()
        return None, f"Stopped (>{SHAP_TIMEOUT_SECONDS}s timeout)"

    result = queue.get()
    if result is None:
        return None, f"Stopped (>{SHAP_TIMEOUT_SECONDS}s timeout)"

    values, pred_class = result
    t_shap = (time.perf_counter() - start) * 1000

    fig, ax = plt.subplots(figsize=(6, 3))
    ax.bar(FEATURE_NAMES, values)
    ax.set_title(f"Variable Contributions to {RISK_MAP[pred_class]}")
    ax.set_ylabel("Impact")
    plt.xticks(rotation=30, ha="right")
    plt.tight_layout()

    return fig, f"{t_shap:.2f} ms"


# ------------------ UI ------------------
with gr.Blocks() as demo:
    gr.Markdown(
        "# Maternal Health Risk Prediction Dashboard\n"
        "**Model:** XGBoost‚ÄìTabPFN Weighted Soft-Voting Ensemble (Kernel SHAP)"
    )

    with gr.Row():
        with gr.Column(scale=1):
            age = gr.Number(label="Age", value=30)
            sbp = gr.Number(label="Systolic Blood Pressure", value=120)
            dbp = gr.Number(label="Diastolic Blood Pressure", value=80)
            sugar = gr.Number(label="Blood Sugar (mmol/L)", value=7.0)
            temp = gr.Number(label="Body Temperature (¬∞F)", value=98.6)
            hr = gr.Number(label="Heart Rate", value=75)
            btn = gr.Button("üîç Predict Risk")

        with gr.Column(scale=1):
            risk_out = gr.Label(label="Risk Level")
            metrics_out = gr.Textbox(
                label="Model Metrics",
                interactive=False,
                lines=5,
                max_lines=6,
            )
            shap_plot = gr.Plot(label="Risk Factor Graph")
            shap_time = gr.Textbox(
                label="SHAP Computation Time",
                interactive=False,
                lines=1,
                max_lines=1,
            )

    btn.click(
        predict,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[risk_out, metrics_out, shap_time, shap_plot, shap_time],
        queue=False,
    ).then(
        compute_shap,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[shap_plot, shap_time],
    )


if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    demo.launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7875
* To create a public link, set `share=True` in `launch()`.


### V4: Weighted Soft-voting Ensemble Model (k-means Lookup)

In [13]:
import time
import numpy as np
import pandas as pd
import joblib
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.metrics import pairwise_distances_argmin

# ============================================================
# RESTORED CLASS ‚Äî MUST MATCH SAVED VERSION EXACTLY
# ============================================================

class WeightedEnsembleModel:
    def __init__(self, models, weights):
        self.models = models
        self.weights = np.array(weights) / np.sum(weights)

    def predict_proba(self, X):
        n_classes = self.models[0][1].predict_proba(X).shape[1]
        proba_sum = np.zeros((X.shape[0], n_classes))
        for (_, model), w in zip(self.models, self.weights):
            proba_sum += model.predict_proba(X) * w
        return proba_sum

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

# ============================================================
# CONFIG
# ============================================================

CSV_FEATURE_COLUMNS = [
    "Age", "SystolicBP", "DiastolicBP",
    "BS", "BodyTemp", "HeartRate",
]

UI_FEATURE_NAMES = [
    "Age", "Systolic BP", "Diastolic BP",
    "Blood Sugar", "Temperature", "Heart Rate",
]

RISK_MAP = {0: "üü¢ Low Risk", 1: "üü° Mid Risk", 2: "üî¥ High Risk"}

MODEL_PATH = "final_weighted_ensemble.pkl"
SHAP_LIBRARY_PATH = "shap_library_ensemble.pkl"

# ============================================================
# LOAD MODEL + SHAP LIBRARY
# ============================================================

ensemble = joblib.load(MODEL_PATH)

shap_lib = joblib.load(SHAP_LIBRARY_PATH)
CLUSTER_CENTERS = shap_lib["centers"]
SHAP_VALUES = shap_lib["shap_values"]
PRED_CLASSES = shap_lib["pred_classes"]

# ============================================================
# HELPERS
# ============================================================

def format_input(age, sbp, dbp, sugar, temp, hr):
    return pd.DataFrame([[age, sbp, dbp, sugar, temp, hr]],
                         columns=CSV_FEATURE_COLUMNS)

def nearest_cluster(X):
    return pairwise_distances_argmin(X.values, CLUSTER_CENTERS)[0]

# ============================================================
# PREDICTION
# ============================================================

def predict(age, sbp, dbp, sugar, temp, hr):
    start = time.perf_counter()

    X = format_input(age, sbp, dbp, sugar, temp, hr)
    risk_idx = int(ensemble.predict(X)[0])

    inference_ms = (time.perf_counter() - start) * 1000

    metrics_md = f"""
### üìä Model Performance  
**Weighted Ensemble (XGBoost + TabPFN)**  

- **Accuracy:** `0.8916`  
- **F1 Score:** `0.8947`  
- **Inference Time:** `{inference_ms:.2f} ms`
"""

    return RISK_MAP[risk_idx], metrics_md, f"{inference_ms:.2f} ms", None, "‚Äî"

# ============================================================
# SHAP (PRECOMPUTED ‚Äî TRUE WALL TIME)
# ============================================================

def compute_shap(age, sbp, dbp, sugar, temp, hr):
    start_total = time.perf_counter()

    X = format_input(age, sbp, dbp, sugar, temp, hr)
    idx = nearest_cluster(X)

    pred_class = PRED_CLASSES[idx]
    arr = np.squeeze(SHAP_VALUES[idx])

    if arr.ndim == 2:
        arr = np.mean(np.abs(arr), axis=1)

    # ---------- PLOT ----------
    fig, ax = plt.subplots(figsize=(6, 2.8))
    bars = ax.bar(UI_FEATURE_NAMES, arr)

    max_val = float(np.max(arr))
    padding = max_val * 0.15
    ax.set_ylim(0, max_val + padding)

    for bar in bars:
        height = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width() / 2,
            height + padding * 0.05,
            f"{height:.3f}",
            ha="center",
            va="bottom",
            fontsize=9,
            clip_on=True,
        )

    ax.set_title(f"Risk Contribution")
    ax.set_ylabel("SHAP Score")
    plt.xticks(rotation=25, ha="right")
    plt.tight_layout()

    total_ms = (time.perf_counter() - start_total) * 1000

    return fig, f"{total_ms:.2f} ms"

# ============================================================
# UI
# ============================================================

with gr.Blocks() as demo:
    "# Maternal Health Risk Prediction Dashboard\n"
    "**Model:** XGBoost‚ÄìTabPFN Weighted Soft-Voting Ensemble (k-means Lookup)"
    
    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            gr.Markdown("### üßç Patient Inputs")
            age = gr.Number(label="Age", value=30)
            sbp = gr.Number(label="Systolic BP", value=120)
            dbp = gr.Number(label="Diastolic BP", value=80)
            sugar = gr.Number(label="Blood Sugar (mmol/L)", value=7.0)
            temp = gr.Number(label="Temperature (¬∞F)", value=98.6)
            hr = gr.Number(label="Heart Rate", value=75)
            btn = gr.Button("üîç Predict Risk", variant="primary")

        with gr.Column(scale=1):
            risk_out = gr.Label(label="Risk Level")
            metrics_out = gr.Markdown()
            shap_plot = gr.Plot(label="Risk Factor Contribution")
            shap_time = gr.Textbox(label="SHAP End-to-End Time", interactive=False)

    btn.click(
        predict,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[risk_out, metrics_out, shap_time, shap_plot, shap_time],
        queue=False,
    ).then(
        compute_shap,
        inputs=[age, sbp, dbp, sugar, temp, hr],
        outputs=[shap_plot, shap_time],
    )

if __name__ == "__main__":
    demo.launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.
