<a href="https://colab.research.google.com/github/prksh830/Healthcare/blob/main/EE_WSN_MH.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder, label_binarize
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, ConfusionMatrixDisplay,
    roc_curve, auc, precision_recall_curve
)

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC

from xgboost import XGBClassifier
!pip install catboost
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier




In [2]:
# Figure settings
IMAGE_DIR = "results_figures"
os.makedirs(IMAGE_DIR, exist_ok=True)

plt.rcParams.update({
    "figure.dpi": 300,
    "savefig.dpi": 300,
    "font.size": 12
})


In [3]:
df = pd.read_csv("WSN_Latency_Categorical_Dataset.csv")

le = LabelEncoder()
df["Latency_Category"] = le.fit_transform(df["Latency_Category"])

categorical_cols = ["Congestion_Status", "Traffic_Class", "Routing_Algorithm"]
numerical_cols = [c for c in df.columns if c not in categorical_cols + ["Latency_Category"]]

for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

X = df.drop("Latency_Category", axis=1).values
y = df["Latency_Category"].values
n_classes = len(np.unique(y))


In [4]:
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.30, random_state=42)
for tr_idx, tmp_idx in sss1.split(X, y):
    X_train, X_temp = X[tr_idx], X[tmp_idx]
    y_train, y_temp = y[tr_idx], y[tmp_idx]

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.50, random_state=42)
for v_idx, te_idx in sss2.split(X_temp, y_temp):
    X_val, X_test = X_temp[v_idx], X_temp[te_idx]
    y_val, y_test = y_temp[v_idx], y_temp[te_idx]


In [5]:
def stratified_kfold_metrics(model, X, y, k):
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
    acc, prec, rec, f1 = [], [], [], []

    for tr, vl in skf.split(X, y):
        model.fit(X[tr], y[tr])
        preds = model.predict(X[vl])

        acc.append(accuracy_score(y[vl], preds))
        prec.append(precision_score(y[vl], preds, average="weighted"))
        rec.append(recall_score(y[vl], preds, average="weighted"))
        f1.append(f1_score(y[vl], preds, average="weighted"))

    return np.mean(acc), np.mean(prec), np.mean(rec), np.mean(f1)


In [6]:
models = {
    "Decision_Tree": DecisionTreeClassifier(),
    "Random_Forest": RandomForestClassifier(n_estimators=100),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient_Boosting": GradientBoostingClassifier(),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="mlogloss"),
    "CatBoost": CatBoostClassifier(verbose=0),
    "LightGBM": LGBMClassifier()
}


In [7]:
def save_confusion_matrix(model, X_test, y_test, name):
    preds = model.predict(X_test)
    cm = confusion_matrix(y_test, preds)

    fig, ax = plt.subplots(figsize=(6,5))
    ConfusionMatrixDisplay(cm).plot(ax=ax, cmap="Blues", colorbar=False)
    ax.set_title(f"Confusion Matrix – {name}")
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/CM_{name}.tiff", format="tiff")
    plt.close()


In [8]:
def save_roc_pr(model, X_test, y_test, name):
    y_bin = label_binarize(y_test, classes=range(n_classes))
    y_score = model.predict_proba(X_test)

    # ROC
    fig, ax = plt.subplots(figsize=(6,5))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_bin[:, i], y_score[:, i])
        ax.plot(fpr, tpr, label=f"Class {i} (AUC={auc(fpr,tpr):.2f})")
    ax.set_title(f"ROC – {name}")
    ax.set_xlabel("FPR")
    ax.set_ylabel("TPR")
    ax.legend()
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/ROC_{name}.tiff", format="tiff")
    plt.close()

    # Precision–Recall
    fig, ax = plt.subplots(figsize=(6,5))
    for i in range(n_classes):
        p, r, _ = precision_recall_curve(y_bin[:, i], y_score[:, i])
        ax.plot(r, p, label=f"Class {i}")
    ax.set_title(f"Precision–Recall – {name}")
    ax.set_xlabel("Recall")
    ax.set_ylabel("Precision")
    ax.legend()
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/PR_{name}.tiff", format="tiff")
    plt.close()


In [9]:
final_results = {}

for name, model in models.items():
    print(f"\n===== {name} =====")

    print("K=5 :", stratified_kfold_metrics(model, X_train, y_train, 5))
    print("K=10:", stratified_kfold_metrics(model, X_train, y_train, 10))

    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    final_results[name] = {
        "Accuracy": accuracy_score(y_test, preds),
        "Precision": precision_score(y_test, preds, average="weighted"),
        "Recall": recall_score(y_test, preds, average="weighted"),
        "F1": f1_score(y_test, preds, average="weighted")
    }

    save_confusion_matrix(model, X_test, y_test, name)
    save_roc_pr(model, X_test, y_test, name)



===== Decision_Tree =====
K=5 : (np.float64(0.8957142857142857), np.float64(0.8977632163762337), np.float64(0.8957142857142857), np.float64(0.895748830562124))
K=10: (np.float64(0.9057142857142859), np.float64(0.9084323477483643), np.float64(0.9057142857142859), np.float64(0.9055249960369475))

===== Random_Forest =====
K=5 : (np.float64(0.9085714285714287), np.float64(0.9110576368106049), np.float64(0.9085714285714287), np.float64(0.908910269771934))
K=10: (np.float64(0.9114285714285714), np.float64(0.9146137406997689), np.float64(0.9114285714285714), np.float64(0.9117489452127119))

===== AdaBoost =====
K=5 : (np.float64(0.8742857142857143), np.float64(0.8813279402561495), np.float64(0.8742857142857143), np.float64(0.8752208656246946))
K=10: (np.float64(0.8742857142857143), np.float64(0.8818632057528063), np.float64(0.8742857142857143), np.float64(0.8751157316503336))

===== Gradient_Boosting =====
K=5 : (np.float64(0.9385714285714286), np.float64(0.9398376202765102), np.float64(0.9

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


K=5 : (np.float64(0.9385714285714286), np.float64(0.939369136397403), np.float64(0.9385714285714286), np.float64(0.9385585519246818))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


K=10: (np.float64(0.9514285714285714), np.float64(0.9528226963056821), np.float64(0.9514285714285714), np.float64(0.9514551013925026))

===== CatBoost =====
K=5 : (np.float64(0.9471428571428572), np.float64(0.9479960886369085), np.float64(0.9471428571428572), np.float64(0.9470510689948055))
K=10: (np.float64(0.9514285714285714), np.float64(0.9523566233960971), np.float64(0.9514285714285714), np.float64(0.9513809444914594))

===== LightGBM =====
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -0.938865




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.943442
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.943442
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.029619
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.938865




K=5 : (np.float64(0.9357142857142857), np.float64(0.9377065604222622), np.float64(0.9357142857142857), np.float64(0.9357952606032451))
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1496
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Sta



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388








[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1496
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1498
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.029619
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.940388




K=10: (np.float64(0.9400000000000001), np.float64(0.9412729731219247), np.float64(0.9400000000000001), np.float64(0.9399539186709909))
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1661
[LightGBM] [Info] Number of data points in the train set: 700, number of used features: 12
[LightGBM] [Info] Start training from score -1.025627
[LightGBM] [Info] Start training from score -1.380596
[LightGBM] [Info] Start training from score -0.941609








In [10]:
!pip install lime
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import shap
from lime.lime_tabular import LimeTabularExplainer




In [11]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_test_t  = torch.tensor(X_test, dtype=torch.float32)
y_test_t  = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=32,
    shuffle=True
)

In [12]:
class AFG_TabNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.attn = nn.MultiheadAttention(input_dim, 4, batch_first=True)
        self.gate = nn.Linear(input_dim, input_dim)
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.GELU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = x.unsqueeze(1)
        attn_out, _ = self.attn(x, x, x)
        g = torch.sigmoid(self.gate(attn_out.squeeze(1)))
        x = attn_out.squeeze(1) * g
        return self.fc(x)


In [13]:
class GatedMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.gate = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        h = torch.relu(self.fc1(x))
        g = torch.sigmoid(self.gate(x))
        return self.fc2(h * g)


In [14]:
class FT_MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LayerNorm(256),
            nn.GELU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.net(x)


In [15]:
def train_dl(model, train_loader, epochs=30):
    start = time.time()
    model.train()
    opt = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.CrossEntropyLoss()

    for _ in range(epochs):
        for xb, yb in train_loader:
            opt.zero_grad()
            loss = loss_fn(model(xb), yb)
            loss.backward()
            opt.step()

    return time.time() - start


In [16]:
def evaluate_dl(model, name):
    model.eval()
    with torch.no_grad():
        logits = model(X_test_t)
        probs = torch.softmax(logits, dim=1).numpy()
        preds = np.argmax(probs, axis=1)

    # Metrics
    final_results[name] = {
        "Accuracy": accuracy_score(y_test, preds),
        "Precision": precision_score(y_test, preds, average="weighted"),
        "Recall": recall_score(y_test, preds, average="weighted"),
        "F1": f1_score(y_test, preds, average="weighted")
    }

    # Confusion Matrix
    save_confusion_matrix(
        lambda x: np.argmax(probs, axis=1),
        X_test, y_test, name
    )

    # ROC & PR
    y_bin = label_binarize(y_test, classes=range(n_classes))

    # ROC
    plt.figure(figsize=(6,5))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_bin[:, i], probs[:, i])
        plt.plot(fpr, tpr, label=f"Class {i}")
    plt.legend()
    plt.title(f"ROC – {name}")
    plt.savefig(f"{IMAGE_DIR}/ROC_{name}.tiff", format="tiff")
    plt.close()

    # PR
    plt.figure(figsize=(6,5))
    for i in range(n_classes):
        p, r, _ = precision_recall_curve(y_bin[:, i], probs[:, i])
        plt.plot(r, p, label=f"Class {i}")
    plt.legend()
    plt.title(f"PR – {name}")
    plt.savefig(f"{IMAGE_DIR}/PR_{name}.tiff", format="tiff")
    plt.close()


In [17]:
def save_confusion_matrix_dl(y_true, y_pred, name):
    cm = confusion_matrix(y_true, y_pred)

    fig, ax = plt.subplots(figsize=(6,5))
    disp = ConfusionMatrixDisplay(cm)
    disp.plot(ax=ax, cmap="Blues", colorbar=False)

    ax.set_title(f"Confusion Matrix – {name}")
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/CM_{name}.tiff", format="tiff", dpi=300)
    plt.close()


In [18]:
def evaluate_dl(model, name):
    model.eval()
    with torch.no_grad():
        logits = model(X_test_t)
        probs = torch.softmax(logits, dim=1).cpu().numpy()
        preds = np.argmax(probs, axis=1)

    # ---- Metrics ----
    final_results[name] = {
        "Accuracy": accuracy_score(y_test, preds),
        "Precision": precision_score(y_test, preds, average="weighted"),
        "Recall": recall_score(y_test, preds, average="weighted"),
        "F1": f1_score(y_test, preds, average="weighted")
    }

    # ---- Confusion Matrix (DL-safe) ----
    save_confusion_matrix_dl(y_test, preds, name)

    # ---- ROC & Precision–Recall ----
    y_bin = label_binarize(y_test, classes=range(n_classes))

    # ROC
    plt.figure(figsize=(6,5))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_bin[:, i], probs[:, i])
        plt.plot(fpr, tpr, label=f"Class {i} (AUC={auc(fpr,tpr):.2f})")
    plt.legend()
    plt.title(f"ROC – {name}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/ROC_{name}.tiff", format="tiff", dpi=300)
    plt.close()

    # Precision–Recall
    plt.figure(figsize=(6,5))
    for i in range(n_classes):
        p, r, _ = precision_recall_curve(y_bin[:, i], probs[:, i])
        plt.plot(r, p, label=f"Class {i}")
    plt.legend()
    plt.title(f"Precision–Recall – {name}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.tight_layout()
    plt.savefig(f"{IMAGE_DIR}/PR_{name}.tiff", format="tiff", dpi=300)
    plt.close()


In [19]:
dl_models = {
    "AFG_TabNet": AFG_TabNet(X.shape[1], n_classes),
    "Gated_MLP": GatedMLP(X.shape[1], n_classes),
    "FT_MLP": FT_MLP(X.shape[1], n_classes)
}

time_results = {}

for name, model in dl_models.items():
    t = train_dl(model, train_loader)
    time_results[name] = t
    evaluate_dl(model, name)


In [20]:
for name, model in models.items():
    start = time.time()
    model.fit(X_train, y_train)
    time_results[name] = time.time() - start


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1661
[LightGBM] [Info] Number of data points in the train set: 700, number of used features: 12
[LightGBM] [Info] Start training from score -1.025627
[LightGBM] [Info] Start training from score -1.380596
[LightGBM] [Info] Start training from score -0.941609


In [21]:
time_df = pd.DataFrame.from_dict(
    time_results, orient="index", columns=["Training_Time_sec"]
)
print(time_df)


                   Training_Time_sec
AFG_TabNet                  7.174929
Gated_MLP                   2.743437
FT_MLP                      3.446426
Decision_Tree               0.025764
Random_Forest               0.718453
AdaBoost                    0.260251
Gradient_Boosting           1.536279
SVM                         0.087413
XGBoost                     0.320605
CatBoost                   14.690241
LightGBM                    0.405601


In [22]:
explainer = shap.TreeExplainer(models["XGBoost"])
shap_vals = explainer.shap_values(X_test)

plt.figure()
shap.summary_plot(shap_vals, X_test, show=False)
plt.savefig(f"{IMAGE_DIR}/SHAP_XGBoost.tiff", format="tiff", dpi=300)
plt.close()


<Figure size 1920x1440 with 0 Axes>

In [23]:
lime_exp = LimeTabularExplainer(
    X_train,
    feature_names=df.drop("Latency_Category", axis=1).columns,
    class_names=le.classes_,
    mode="classification"
)

exp = lime_exp.explain_instance(
    X_test[0],
    models["XGBoost"].predict_proba
)

fig = exp.as_pyplot_figure()
plt.tight_layout()
plt.savefig(f"{IMAGE_DIR}/LIME_XGBoost.tiff", format="tiff", dpi=300)
plt.close()


In [24]:
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import numpy as np
import torch

def generate_final_roc_comparison(models, dl_models):
    y_test_bin = label_binarize(y_test, classes=range(n_classes))

    fig, ax = plt.subplots(figsize=(8, 7))

    # ============================
    # Classical ML Models
    # ============================
    for name, model in models.items():
        y_score = model.predict_proba(X_test)

        # Macro-average ROC
        fpr, tpr, _ = roc_curve(
            y_test_bin.ravel(),
            y_score.ravel()
        )
        roc_auc = auc(fpr, tpr)

        ax.plot(
            fpr, tpr,
            linewidth=2,
            label=f"{name} (AUC={roc_auc:.3f})"
        )

    # ============================
    # Deep Learning Models
    # ============================
    for name, model in dl_models.items():
        model.eval()
        with torch.no_grad():
            logits = model(X_test_t)
            probs = torch.softmax(logits, dim=1).cpu().numpy()

        fpr, tpr, _ = roc_curve(
            y_test_bin.ravel(),
            probs.ravel()
        )
        roc_auc = auc(fpr, tpr)

        ax.plot(
            fpr, tpr,
            linestyle="--",
            linewidth=2,
            label=f"{name} (AUC={roc_auc:.3f})"
        )

    # ============================
    # Plot Formatting
    # ============================
    ax.plot([0, 1], [0, 1], 'k--', linewidth=1)
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.set_title("AU-ROC Comparison of reported models")
    ax.legend(loc="lower right", fontsize=9)
    ax.grid(alpha=0.3)

    plt.tight_layout()
    plt.savefig(
        f"{IMAGE_DIR}/AU-ROC Comparison of reported models.tiff",
        format="tiff",
        dpi=300
    )
    plt.close()


In [25]:
generate_final_roc_comparison(models, dl_models)




In [26]:
# Create comparison table
comparison_df = pd.DataFrame.from_dict(final_results, orient="index")

# Optional: add training time column
if 'time_results' in globals():
    comparison_df["Training_Time_sec"] = comparison_df.index.map(time_results)

# Reorder columns for paper
comparison_df = comparison_df[
    ["Accuracy", "Precision", "Recall", "F1", "Training_Time_sec"]
]

# Sort by F1-score (descending) – common journal practice
comparison_df = comparison_df.sort_values(by="F1", ascending=False)

# Display table
print(comparison_df)


                   Accuracy  Precision    Recall        F1  Training_Time_sec
AFG_TabNet         1.000000   1.000000  1.000000  1.000000           7.174929
Gated_MLP          0.980000   0.980046  0.980000  0.979952           2.743437
FT_MLP             0.973333   0.973820  0.973333  0.973395           3.446426
CatBoost           0.973333   0.975054  0.973333  0.973336          14.690241
LightGBM           0.946667   0.950916  0.946667  0.946711           0.405601
Gradient_Boosting  0.940000   0.941396  0.940000  0.940071           1.536279
XGBoost            0.940000   0.943704  0.940000  0.940066           0.320605
Decision_Tree      0.940000   0.940067  0.940000  0.939808           0.025764
Random_Forest      0.926667   0.929889  0.926667  0.926539           0.718453
SVM                0.920000   0.922908  0.920000  0.920564           0.087413
AdaBoost           0.866667   0.872530  0.866667  0.868000           0.260251


In [27]:
comparison_df.to_excel("Final_Model_Comparison_Table.xlsx")


In [28]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def kfold_metrics(model, X, y, k):
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

    acc, prec, rec, f1 = [], [], [], []

    for train_idx, val_idx in skf.split(X, y):
        model.fit(X[train_idx], y[train_idx])
        preds = model.predict(X[val_idx])

        acc.append(accuracy_score(y[val_idx], preds))
        prec.append(precision_score(y[val_idx], preds, average="weighted"))
        rec.append(recall_score(y[val_idx], preds, average="weighted"))
        f1.append(f1_score(y[val_idx], preds, average="weighted"))

    return {
        "Accuracy": np.mean(acc),
        "Precision": np.mean(prec),
        "Recall": np.mean(rec),
        "F1": np.mean(f1)
    }


In [29]:
kfold_results = {}

for name, model in models.items():
    kfold_results[name] = {
        "k=5": kfold_metrics(model, X_train, y_train, k=5),
        "k=10": kfold_metrics(model, X_train, y_train, k=10)
    }


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -0.938865
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.943442




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.943442




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.024632
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.943442








[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1334
[LightGBM] [Info] Number of data points in the train set: 560, number of used features: 12
[LightGBM] [Info] Start training from score -1.029619
[LightGBM] [Info] Start training from score -1.379177
[LightGBM] [Info] Start training from score -0.938865
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1496
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388








[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1496
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.383125
[LightGBM] [Info] Start training from score -0.940388




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1497
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1498
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.025185
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.944462
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 630, number of used features: 12
[LightGBM] [Info] Start training from score -1.029619
[LightGBM] [Info] Start training from score -1.376816
[LightGBM] [Info] Start training from score -0.940388








In [30]:
rows = []

for model_name, folds in kfold_results.items():
    for k_value, metrics in folds.items():
        rows.append({
            "Model": model_name,
            "K-Fold": k_value,
            "Accuracy": metrics["Accuracy"],
            "Precision": metrics["Precision"],
            "Recall": metrics["Recall"],
            "F1-score": metrics["F1"]
        })

kfold_comparison_df = pd.DataFrame(rows)
print(kfold_comparison_df)


                Model K-Fold  Accuracy  Precision    Recall  F1-score
0       Decision_Tree    k=5  0.888571   0.891031  0.888571  0.888696
1       Decision_Tree   k=10  0.901429   0.903307  0.901429  0.901063
2       Random_Forest    k=5  0.905714   0.908748  0.905714  0.906078
3       Random_Forest   k=10  0.914286   0.918300  0.914286  0.914506
4            AdaBoost    k=5  0.874286   0.881328  0.874286  0.875221
5            AdaBoost   k=10  0.874286   0.881863  0.874286  0.875116
6   Gradient_Boosting    k=5  0.938571   0.939838  0.938571  0.938529
7   Gradient_Boosting   k=10  0.954286   0.955160  0.954286  0.954166
8                 SVM    k=5  0.917143   0.920881  0.917143  0.917223
9                 SVM   k=10  0.925714   0.931028  0.925714  0.925700
10            XGBoost    k=5  0.938571   0.939369  0.938571  0.938559
11            XGBoost   k=10  0.951429   0.952823  0.951429  0.951455
12           CatBoost    k=5  0.947143   0.947996  0.947143  0.947051
13           CatBoos

In [31]:
kfold_comparison_df = kfold_comparison_df.sort_values(
    by=["Model", "K-Fold"]
)

# Save for manuscript
kfold_comparison_df.to_csv("KFold_Comparison_Table.csv", index=False)
kfold_comparison_df.to_excel("KFold_Comparison_Table.xlsx", index=False)
