<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20251209%5D%20IoMT%20Pj_Clean-condition%20ablation%3A%20LR%20/%20MLP%20/%20RF%20/%20XGB%20/%20LR%E2%86%92MLP%20(Temp/Isotonic).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[20251030] Train_all_in_one — Full revision with derived features (2025-10-01)

This script trains a calibrated LR→MLP IDS on NF-ToN-IoT and CIC_IoMT_2024_WiFi_MQTT
with:
    - Protocol-aware preprocessing & derived flow features
    - SMOTE-based imbalance handling
    - Temperature scaling (default) and isotonic regression (ablation)
    - Clean vs adversarial (FGSM/PGD, ℓ∞-bounded) evaluation
    - Clean-condition ablation of LR, MLP, LR→MLP (uncal/Temp/Isotonic)
    - PLUS classical baselines: Random Forest and XGBoost/GBDT-style ensemble
      included in the clean ablation table.

Outputs:
    - Metrics JSONs for clean/adversarial runs
    - clean_ablation_seedavg.csv for manuscript Table~\ref{tab:ablation-clean}

Complete training/evaluation script for LR-MLP IDS:
- NF-ToN-IoT in-domain
- CIC_IoMT_2024 WiFi-MQTT in-domain (full and tiny-slice benign)
- Cross-domain NF<->CIC with feature automap
- Calibration (temperature / isotonic)
- Adversarial evaluation (FGSM/PGD)
- Resource profiling (CPU + Pi4 emulation)
- Clean-condition ablation: LR / MLP / RF / XGB / LR→MLP (Temp/Isotonic)

In [None]:
import os
import sys
import json
import time
import math
import glob
import argparse
from pathlib import Path
from zipfile import ZipFile

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    log_loss,
    brier_score_loss,
)
from sklearn.calibration import calibration_curve
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
try:
    from xgboost import XGBClassifier
    HAVE_XGB = True
except Exception:
    HAVE_XGB = False

from sklearn.metrics import classification_report

from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek

import matplotlib.pyplot as plt

In [None]:
# ----------------------
# Global configuration
# ----------------------
CFG = {
    "paths": {
        "nf": "/content/Dataset_NF-ToN-IoT.csv",
        "cic_train": "/content/CIC_IoMT_2024_WiFi_MQTT_train.csv",
        "cic_test": "/content/CIC_IoMT_2024_WiFi_MQTT_test.csv",
        "outdir": "./outputs",
    },
    "columns": {
        "nf_label_binary": "Label",
        "nf_label_mc": "Class",
        "cic_label_binary": "label",
        "cic_label_mc": "Class",
    },
    "train": {
        "test_size": 0.2,
        "random_state": 42,
        "use_smote": True,
        "smote_kind": "smote_tomek",  # "smote" or "smote_tomek"
        "mlp_hidden_units": 64,
        "mlp_dropout": 0.2,
        "batch_size": 2048,
        "max_epochs": 25,
        "early_stopping_patience": 5,
    },
    "adv": {
        "eps_list": [0.01, 0.02, 0.05, 0.10],
        "pgd_steps": 10,
        "pgd_alpha_factor": 0.02,
    },
    "calibration": {
        "method": "temperature",  # "temperature" or "isotonic"
        "n_bins_ece": 15,
        "calib_frac": 0.1,
    },
    "tiny_slice": {
        "enabled": True,
        "benign_frac": 0.015,
        "seed": 42,
    },
}

In [None]:
# ====================
# Utility helpers
# ====================
def ensure_outdir(path: str) -> str:
    out = Path(path)
    out.mkdir(parents=True, exist_ok=True)
    return str(out)


def get_run_dir(base_dir: str, tag: str) -> str:
    ts = time.strftime("%Y%m%d_%H%M%S")
    run_dir = os.path.join(base_dir, f"{ts}__{tag}")
    os.makedirs(run_dir, exist_ok=True)
    return run_dir


def save_json(obj, path):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f, indent=2)


def load_csv(path, **kwargs):
    print(f"[INFO] Loading CSV: {path}")
    return pd.read_csv(path, **kwargs)

In [None]:
# =======================================================
# Dataset loading and preprocessing
# =======================================================
def load_nf_ton_iot(path: str) -> pd.DataFrame:
    df = load_csv(path)
    # Assume binary label column "Label_binary" and multi-class "Label_multi" already exist
    return df


def load_cic_iomt_train_test(train_path: str, test_path: str) -> tuple[pd.DataFrame, pd.DataFrame]:
    df_tr = load_csv(train_path)
    df_te = load_csv(test_path)
    return df_tr, df_te


def build_feature_list(df: pd.DataFrame, drop_cols) -> list[str]:
    return [c for c in df.columns if c not in drop_cols]


def build_binary_labels(df: pd.DataFrame, bin_col: str, pos_label=None) -> np.ndarray:
    y = df[bin_col].values
    if y.dtype == object:  # Check if labels are strings
        # Assuming any non-'Benign' string is an attack.
        # If 'Benign' string exists, it will be mapped to 0.
        # We also specifically check for 'Benign_test' for CIC_IoMT test set.
        unique_labels = np.unique(y)
        if 'Benign' in unique_labels or 'Benign_test' in unique_labels:
            # Map 'Benign' or 'Benign_test' to 0, others to 1
            y_mapped = np.where((y == 'Benign') | (y == 'Benign_test'), 0, 1)
        else:
            # If no benign string is found, assume all are attack types and map to 1.
            y_mapped = np.ones_like(y, dtype=int)
        return y_mapped
    else:
        # if label is already {0,1}, keep as is
        return y.astype(int)

In [None]:
# ===================
# Metric helpers
# ===================
def fpr_at_dr(y_true, scores, target_dr=0.95):
    """
    Compute FPR at a fixed detection rate (recall for the positive class).
    Returns (fpr, threshold).
    """
    y_true = np.asarray(y_true).astype(int)
    scores = np.asarray(scores)
    pos_scores = scores[y_true == 1]
    if len(pos_scores) == 0:
        return np.nan, np.nan
    thr = np.quantile(pos_scores, 1.0 - target_dr)
    y_hat = (scores >= thr).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_hat, labels=[0, 1]).ravel()
    fpr = fp / (fp + tn + 1e-12)
    return fpr, thr


def expected_calibration_error(y_true, probas, n_bins=15):
    """
    Multiclass/binary ECE. `probas` shape: (n_samples, n_classes).
    """
    y_true = np.asarray(y_true)
    probas = np.asarray(probas)
    confidences = probas.max(axis=1)
    predictions = probas.argmax(axis=1)

    bins = np.linspace(0.0, 1.0, n_bins + 1)
    ece = 0.0
    n = len(y_true)

    for i in range(n_bins):
        in_bin = (confidences > bins[i]) & (confidences <= bins[i + 1])
        prop_in_bin = in_bin.mean()
        if prop_in_bin > 0:
            acc_in_bin = (y_true[in_bin] == predictions[in_bin]).mean()
            avg_conf_in_bin = confidences[in_bin].mean()
            ece += np.abs(acc_in_bin - avg_conf_in_bin) * prop_in_bin

    return ece


def plot_reliability_diagram(y_true, prob_pos, n_bins=15, title="", out_path=None):
    prob_true, prob_pred = calibration_curve(y_true, prob_pos, n_bins=n_bins, strategy="uniform")
    plt.figure(figsize=(4, 4))
    plt.plot(prob_pred, prob_true, "s-", label="LR-MLP")
    plt.plot([0, 1], [0, 1], "k--", label="Perfectly calibrated")
    plt.xlabel("Predicted probability")
    plt.ylabel("Empirical accuracy")
    plt.title(title)
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.5)
    if out_path is not None:
        plt.tight_layout()
        plt.savefig(out_path, dpi=300)
        plt.close()
    else:
        plt.show()

In [None]:
# =======================================================
# Adversarial generation (FGSM / PGD in feature space)
# =======================================================
def fgsm_attack(x, y, model, eps, clip_min, clip_max):
    """
    Perform FGSM in normalized feature space.
    Here `model` is expected to expose a gradient; in practice, we approximate
    via the LR-MLP surrogate as per Section 3.7.
    """
    # Placeholder: in the current workflow we use the trained LR-MLP and autograd
    # from TF/Keras in the notebook; this script focuses on evaluation of
    # already prepared adversarial datasets. If you later integrate TF here,
    # you would compute ∂L/∂x and apply sign(eps * grad).
    raise NotImplementedError("FGSM attack is implemented in the notebook pipeline.")


def pgd_attack(x, y, model, eps, alpha, steps, clip_min, clip_max):
    """
    PGD in normalized feature space.
    """
    raise NotImplementedError("PGD attack is implemented in the notebook pipeline.")

In [None]:
# ===========================
# LR-MLP hybrid training
# ===========================
def make_smote(kind="smote_tomek", random_state=42):
    if kind == "smote":
        return SMOTE(random_state=random_state)
    elif kind == "smote_tomek":
        return SMOTETomek(random_state=random_state)
    else:
        return None


def fit_lr_then_mlp(
    X_train,
    y_train,
    X_eval,
    y_eval,
    use_smote=True,
    seed=42,
    max_epochs=25,
    batch_size=2048,
    hidden_units=64,
):
    """
    Fit logistic-regression front end + MLP head. Returns (pipe, mlp_model).
    Here we keep it in sklearn MLP for simplicity; in your notebook you may have
    a TF/Keras variant for adversarial training.
    """
    # Standardize first
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_eval_s = scaler.transform(X_eval)

    if use_smote:
        if len(np.unique(y_train)) > 1:
            sampler = make_smote(CFG["train"]["smote_kind"], random_state=seed)
            X_train_s, y_train = sampler.fit_resample(X_train_s, y_train)
        else:
            print(f"[WARN] SMOTE requested but target has {len(np.unique(y_train))} class(es). Skipping SMOTE.")

    # Logistic regression front end
    lr = LogisticRegression(
        penalty="l2",
        C=1.0,
        solver="lbfgs",
        max_iter=1000,
        class_weight="balanced",
        n_jobs=-1,
        random_state=seed,
    )

    # Workaround: If y_train has only one class, LogisticRegression will fail.
    # For CIC_IoMT_2024, the training data for binary classification appears to be all attacks (class 1).
    # We add a synthetic benign sample (class 0) to allow LR to fit.
    synthetic_sample_added = False
    if len(np.unique(y_train)) < 2:
        if np.unique(y_train)[0] == 1:  # If y_train consists only of attack samples
            print("[WARN] Training data (y_train) contains only one class (attack). Adding a synthetic benign sample to allow LogisticRegression to fit.")
            # Create a synthetic benign sample: mean of X_train_s features, label 0
            synthetic_x = X_train_s.mean(axis=0).reshape(1, -1)
            synthetic_y = np.array([0])
            X_train_s = np.vstack([X_train_s, synthetic_x])
            y_train = np.append(y_train, synthetic_y)
            synthetic_sample_added = True
        else:
            # If y_train consists only of benign samples, it would still cause issues
            # but based on problem context, it's expected to be all attacks if single-class.
            print(f"[WARN] Training data (y_train) contains only one class ({np.unique(y_train)[0]}). LogisticRegression might not train meaningfully for binary classification.")

    lr.fit(X_train_s, y_train)

    # Transform training / eval logits
    z_train = lr.decision_function(X_train_s).reshape(-1, 1)
    z_eval = lr.decision_function(X_eval_s).reshape(-1, 1)

    # MLP classifier on top of LR logits
    mlp = MLPClassifier(
        hidden_layer_sizes=(hidden_units,),
        activation="relu",
        solver="adam",
        alpha=0.0,
        batch_size=batch_size,
        learning_rate_init=1e-3,
        max_iter=max_epochs,
        early_stopping=not synthetic_sample_added, # Disable early stopping if synthetic sample was added
        n_iter_no_change=CFG["train"]["early_stopping_patience"], # Always provide an int, ignored if early_stopping=False
        validation_fraction=0.1 if not synthetic_sample_added else 0.0, # Disable validation fraction if synthetic sample was added
        random_state=seed,
    )

    mlp.fit(z_train, y_train)

    class Pipe:
        def __init__(self, scaler, lr):
            self.scaler = scaler
            self.lr = lr

        def transform(self, X):
            Xs = self.scaler.transform(X)
            return self.lr.decision_function(Xs).reshape(-1, 1)

    pipe = Pipe(scaler, lr)
    return pipe, mlp

In [None]:
# =========================
# In-domain NF-ToN-IoT
# =========================
def run_in_domain_nf(df: pd.DataFrame, outdir: str):
    print("[NF] In-domain binary + multi-class evaluation ...")
    # Binary
    bin_col = CFG["columns"]["nf_label_binary"]
    mc_col = CFG["columns"]["nf_label_mc"]

    # Feature list: drop labels and any non-feature columns
    drop_cols = [bin_col, mc_col, "Attack"] # Added 'Attack' to drop_cols
    feats = build_feature_list(df, drop_cols)

    X = df[feats].values
    y_bin = build_binary_labels(df, bin_col, None)

    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y_bin,
        test_size=CFG["train"]["test_size"],
        random_state=CFG["train"]["random_state"],
        stratify=y_bin,
    )

    pipe, mlp = fit_lr_then_mlp(
        X_train,
        y_train,
        X_test,
        y_test,
        use_smote=CFG["train"]["use_smote"],
        seed=CFG["train"]["random_state"],
        max_epochs=CFG["train"]["max_epochs"],
        batch_size=CFG["train"]["batch_size"],
        hidden_units=CFG["train"]["mlp_hidden_units"],
    )

    z_test = pipe.transform(X_test)
    p_test = mlp.predict_proba(z_test)[:, 1]

    auroc = roc_auc_score(y_test, p_test)
    aupr = average_precision_score(y_test, p_test)
    fpr95, thr95 = fpr_at_dr(y_test, p_test, target_dr=0.95)
    nll = log_loss(y_test, p_test)
    brier = brier_score_loss(y_test, p_test)

    print(f"[NF] AUROC={auroc:.4f} AUPR={aupr:.4f} FPR@95%DR={fpr95:.4f} thr={thr95:.4f}")
    print(f"[NF] NLL={nll:.4f} Brier={brier:.4f}")

    metrics = {
        "AUROC": auroc,
        "AUPR": aupr,
        "FPR@95%DR": fpr95,
        "thr95": thr95,
        "NLL": nll,
        "Brier": brier,
    }
    save_json(metrics, os.path.join(outdir, "NF_in_domain_binary_metrics.json"))

    # Multi-class evaluation (confusion matrix, report)
    y_mc = df[mc_col].values
    X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(
        X,
        y_mc,
        test_size=CFG["train"]["test_size"],
        random_state=CFG["train"]["random_state"],
        stratify=y_mc,
    )

    # For simplicity, reuse MLP on raw standardized features for multi-class
    scaler = StandardScaler()
    X_train_mc_s = scaler.fit_transform(X_train_mc)
    X_test_mc_s = scaler.transform(X_test_mc)

    mlp_mc = MLPClassifier(
        hidden_layer_sizes=(CFG["train"]["mlp_hidden_units"],),
        activation="relu",
        solver="adam",
        alpha=0.0,
        batch_size=CFG["train"]["batch_size"],
        learning_rate_init=1e-3,
        max_iter=CFG["train"]["max_epochs"],
        early_stopping=True,
        n_iter_no_change=CFG["train"]["early_stopping_patience"],
        validation_fraction=0.1,
        random_state=CFG["train"]["random_state"],
    )
    mlp_mc.fit(X_train_mc_s, y_train_mc)
    y_pred_mc = mlp_mc.predict(X_test_mc_s)

    report = classification_report(y_test_mc, y_pred_mc, output_dict=True)
    save_json(report, os.path.join(outdir, "NF_in_domain_multiclass_report.json"))

    return feats, bin_col, mc_col

In [None]:
# =======================================================
# CIC_IoMT_2024 WiFi-MQTT in-domain (full + tiny-slice)
# =======================================================
def run_in_domain_cic(
    df_tr: pd.DataFrame,
    df_te: pd.DataFrame,
    outdir: str,
    calib_method: str = "temperature",
    tiny_slice_cfg: dict | None = None,
):
    print("[CIC] In-domain WiFi-MQTT (train→test) ...")
    bin_col = CFG["columns"]["cic_label_binary"]
    mc_col = CFG["columns"]["cic_label_mc"]

    drop_cols = [bin_col, mc_col]
    feats_tr = build_feature_list(df_tr, drop_cols)
    feats_te = build_feature_list(df_te, drop_cols)

    # Align columns between train and test
    feats = [f for f in feats_tr if f in feats_te]
    X_train = df_tr[feats].values
    X_test = df_te[feats].values

    y_train = build_binary_labels(df_tr, bin_col, None)
    y_test = build_binary_labels(df_te, bin_col, None)

    pipe, mlp = fit_lr_then_mlp(
        X_train,
        y_train,
        X_test,
        y_test,
        use_smote=False, # Set to False for CIC as it only has attack labels
        seed=CFG["train"]["random_state"],
        max_epochs=CFG["train"]["max_epochs"],
        batch_size=CFG["train"]["batch_size"],
        hidden_units=CFG["train"]["mlp_hidden_units"],
    )

    z_test = pipe.transform(X_test)
    p_test_uncal = mlp.predict_proba(z_test)[:, 1]

    # Calibration split from training (10% by default)
    n = len(X_train)
    idx = np.arange(n)
    rng = np.random.default_rng(CFG["train"]["random_state"])
    rng.shuffle(idx)
    split = int((1.0 - CFG["calibration"]["calib_frac"]) * n)
    train_idx, calib_idx = idx[:split], idx[split:]

    X_cal = X_train[calib_idx]
    y_cal = y_train[calib_idx]

    z_cal = pipe.transform(X_cal)
    p_cal = mlp.predict_proba(z_cal)[:, 1]

    if calib_method == "temperature":
        # Temperature scaling on logits
        logits_cal = np.log(np.clip(p_cal, 1e-8, 1 - 1e-8)) - np.log(1 - np.clip(p_cal, 1e-8, 1 - 1e-8))

        def _nll_temp(temp):
            temp = float(temp)
            zt = logits_cal / temp
            pt = 1.0 / (1.0 + np.exp(-zt))
            return log_loss(y_cal, pt, labels=[0, 1])

        best_temp = 1.0
        best_nll = _nll_temp(1.0)
        for t in [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]:
            nll_t = _nll_temp(t)
            if nll_t < best_nll:
                best_nll = nll_t
                best_temp = t

        logits_test = np.log(np.clip(p_test_uncal, 1e-8, 1 - 1e-8)) - np.log(
            1 - np.clip(p_test_uncal, 1e-8, 1 - 1e-8)
        )
        zt = logits_test / best_temp
        p_test = 1.0 / (1.0 + np.exp(-zt))
        calib_info = {"method": "temperature", "T": best_temp, "NLL_cal": best_nll}
    else:
        # Isotonic regression on probabilities
        from sklearn.isotonic import IsotonicRegression

        ir = IsotonicRegression(out_of_bounds="clip")
        ir.fit(p_cal, y_cal)
        p_test = ir.transform(p_test_uncal)
        calib_info = {"method": "isotonic"}

    auroc = roc_auc_score(y_test, p_test)
    aupr = average_precision_score(y_test, p_test)
    fpr95, thr95 = fpr_at_dr(y_test, p_test, target_dr=0.95)
    nll = log_loss(y_test, p_test, labels=[0, 1])
    brier = brier_score_loss(y_test, p_test)

    print(f"[CIC] AUROC={auroc:.4f} AUPR={aupr:.4f} FPR@95%DR={fpr95:.4f} thr={thr95:.4f}")
    print(f"[CIC] NLL={nll:.4f} Brier={brier:.4f}")

    metrics = {
        "AUROC": auroc,
        "AUPR": aupr,
        "FPR@95%DR": fpr95,
        "thr95": thr95,
        "NLL": nll,
        "Brier": brier,
        "calibration": calib_info,
    }
    save_json(metrics, os.path.join(outdir, "CIC_IoMT_train_to_test__binary_metrics.json"))

    # Tiny-slice benign experiment
    if tiny_slice_cfg and tiny_slice_cfg.get("enabled", True):
        print("[CIC] Tiny-slice benign calibration experiment ...")
        frac = tiny_slice_cfg.get("benign_frac", 0.015)
        seed = tiny_slice_cfg.get("seed", 42)

        # Use y_test to correctly identify benign samples (mapped to 0)
        # as df_te contains raw string labels.
        df_te_benign = df_te[y_test == 0].copy()
        n_benign = len(df_te_benign)
        n_slice = max(1, int(frac * n_benign))

        rng = np.random.default_rng(seed)
        idx_slice = rng.choice(df_te_benign.index.values, size=n_slice, replace=False)
        df_slice = df_te.loc[idx_slice].copy()

        # Remaining test set excludes slice
        df_rest = df_te.drop(idx_slice).copy()

        feats_slice = [f for f in feats if f in df_slice.columns]
        feats_rest = [f for f in feats if f in df_rest.columns]

        X_slice = df_slice[feats_slice].values
        y_slice = build_binary_labels(df_slice, bin_col, None)

        X_rest = df_rest[feats_rest].values
        y_rest = build_binary_labels(df_rest, bin_col, None)

        # Combine benign slice with original training data (which is all attacks)
        X_train_tiny = np.vstack([X_train, X_slice])
        y_train_tiny = np.concatenate([y_train, y_slice])

        pipe_tiny, mlp_tiny = fit_lr_then_mlp(
            X_train_tiny,
            y_train_tiny,
            X_rest,
            y_rest,
            use_smote=False,
            seed=seed,
            max_epochs=CFG["train"]["max_epochs"],
            batch_size=CFG["train"]["batch_size"],
            hidden_units=CFG["train"]["mlp_hidden_units"],
        )

        z_rest = pipe_tiny.transform(X_rest)
        p_rest_uncal = mlp_tiny.predict_proba(z_rest)[:, 1]

        # Temperature scaling on slice
        logits_slice = np.log(np.clip(p_rest_uncal, 1e-8, 1 - 1e-8)) - np.log(
            1 - np.clip(p_rest_uncal, 1e-8, 1 - 1e-8)
        )

        def _nll_temp_slice(temp):
            temp = float(temp)
            zt = logits_slice / temp
            pt = 1.0 / (1.0 + np.exp(-zt))
            return log_loss(y_rest, pt, labels=[0, 1])

        best_temp_slice = 1.0
        best_nll_slice = _nll_temp_slice(1.0)
        for t in [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]:
            nll_t = _nll_temp_slice(t)
            if nll_t < best_nll_slice:
                best_nll_slice = nll_t
                best_temp_slice = t

        logits_rest = logits_slice
        zt_rest = logits_rest / best_temp_slice
        p_rest = 1.0 / (1.0 + np.exp(-zt_rest))

        auroc_rest = roc_auc_score(y_rest, p_rest)
        aupr_rest = average_precision_score(y_rest, p_rest)
        fpr95_rest, thr95_rest = fpr_at_dr(y_rest, p_rest, target_dr=0.95)

        metrics_tiny = {
            "AUROC": auroc_rest,
            "AUPR": aupr_rest,
            "FPR@95%DR": fpr95_rest,
            "thr95": thr95_rest,
            "slice_frac": frac,
            "slice_seed": seed,
        }
        save_json(metrics_tiny, os.path.join(outdir, "CIC_IoMT_tiny_benign_slice__binary_metrics.json"))

    return feats, bin_col, mc_col

In [None]:
# =====================================
# Cross-domain with feature automap
# =====================================
def build_feature_automap(src_feats, tgt_feats, automap_min=5):
    common = sorted(set(src_feats).intersection(set(tgt_feats)))
    if len(common) < automap_min:
        print(f"[WARN] Only {len(common)} common features (< {automap_min}); cross-domain may be unstable.")
    return common


def run_cross_domain(
    nf_df,
    nf_feats,
    nf_bin_col,
    nf_mc_col,
    cic_tr_df,
    cic_tr_feats,
    cic_tr_bin_col,
    cic_tr_mc_col,
    cic_te_df,
    cic_te_feats,
    cic_te_bin_col,
    cic_te_mc_col,
    outdir: str,
    automap_min: int = 5,
):
    print("[XFER] Cross-domain evaluation NF↔CIC using feature automap ...")
    # NF -> CIC
    feats_nf_to_cic = build_feature_automap(nf_feats, cic_te_feats, automap_min=automap_min)

    if len(feats_nf_to_cic) > 0:
        X_nf = nf_df[feats_nf_to_cic].values
        y_nf = build_binary_labels(nf_df, nf_bin_col, None)

        X_cic_te = cic_te_df[feats_nf_to_cic].values
        y_cic_te = build_binary_labels(cic_te_df, cic_te_bin_col, None)

        pipe_nf, mlp_nf = fit_lr_then_mlp(
            X_nf,
            y_nf,
            X_cic_te,
            y_cic_te,
            use_smote=CFG["train"]["use_smote"],
            seed=CFG["train"]["random_state"],
            max_epochs=CFG["train"]["max_epochs"],
            batch_size=CFG["train"]["batch_size"],
            hidden_units=CFG["train"]["mlp_hidden_units"],
        )

        z_cic_te = pipe_nf.transform(X_cic_te)
        p_cic_te = mlp_nf.predict_proba(z_cic_te)[:, 1]

        auroc_xfer = roc_auc_score(y_cic_te, p_cic_te)
        aupr_xfer = average_precision_score(y_cic_te, p_cic_te)
        fpr95_xfer, thr95_xfer = fpr_at_dr(y_cic_te, p_cic_te, target_dr=0.95)

        metrics_nf_to_cic = {
            "AUROC": auroc_xfer,
            "AUPR": aupr_xfer,
            "FPR@95%DR": fpr95_xfer,
            "thr95": thr95_xfer,
            "n_common_feats": len(feats_nf_to_cic),
        }
        save_json(metrics_nf_to_cic, os.path.join(outdir, "NF_ToN_IoT__to__CIC_IoMT__binary_xfer__metrics.json"))
    else:
        print("[WARN] Skipping NF -> CIC evaluation (0 common features).")

    # CIC -> NF
    feats_cic_to_nf = build_feature_automap(cic_tr_feats, nf_feats, automap_min=automap_min)

    if len(feats_cic_to_nf) > 0:
        X_cic_tr = cic_tr_df[feats_cic_to_nf].values
        y_cic_tr = build_binary_labels(cic_tr_df, cic_tr_bin_col, None)

        X_nf_te = nf_df[feats_cic_to_nf].values
        y_nf_te = build_binary_labels(nf_df, nf_bin_col, None)

        pipe_cic, mlp_cic = fit_lr_then_mlp(
            X_cic_tr,
            y_cic_tr,
            X_nf_te,
            y_nf_te,
            use_smote=False, # CIC has only attacks, so SMOTE would fail
            seed=CFG["train"]["random_state"],
            max_epochs=CFG["train"]["max_epochs"],
            batch_size=CFG["train"]["batch_size"],
            hidden_units=CFG["train"]["mlp_hidden_units"],
        )

        z_nf_te = pipe_cic.transform(X_nf_te)
        p_nf_te = mlp_cic.predict_proba(z_nf_te)[:, 1]

        auroc_xfer2 = roc_auc_score(y_nf_te, p_nf_te)
        aupr_xfer2 = average_precision_score(y_nf_te, p_nf_te)
        fpr95_xfer2, thr95_xfer2 = fpr_at_dr(y_nf_te, p_nf_te, target_dr=0.95)

        metrics_cic_to_nf = {
            "AUROC": auroc_xfer2,
            "AUPR": aupr_xfer2,
            "FPR@95%DR": fpr95_xfer2,
            "thr95": thr95_xfer2,
            "n_common_feats": len(feats_cic_to_nf),
        }
        save_json(metrics_cic_to_nf, os.path.join(outdir, "CIC_IoMT__to__NF_ToN_IoT__binary_xfer__metrics.json"))
    else:
        print("[WARN] Skipping CIC -> NF evaluation (0 common features).")

In [None]:
# ==================================================================================
# ========= CLEAN ABLATION: LR / MLP / RF / XGB / LR→MLP (Temp / Isotonic) =========
# ==================================================================================

SEEDS_FOR_ABLATION = [0, 1, 2, 3, 4]


def _train_lr(Xtr, ytr, seed):
    """Plain logistic-regression baseline."""
    if len(np.unique(ytr)) < 2:
        synthetic_x = Xtr.mean(axis=0).reshape(1, -1)
        synthetic_y = np.array([0])
        Xtr = np.vstack([Xtr, synthetic_x])
        ytr = np.append(ytr, synthetic_y)

    clf = LogisticRegression(
        penalty="l2",
        C=1.0,
        solver="lbfgs",
        max_iter=1000,
        class_weight="balanced",
        n_jobs=-1,
        random_state=seed,
    )
    return clf.fit(Xtr, ytr)


def _train_mlp(Xtr, ytr, seed, hidden=64, dropout=0.2):
    """Plain MLP baseline (same head as hybrid, but without LR front-end)."""
    synthetic_sample_added = False
    if len(np.unique(ytr)) < 2:
        synthetic_x = Xtr.mean(axis=0).reshape(1, -1)
        synthetic_y = np.array([0])
        Xtr = np.vstack([Xtr, synthetic_x])
        ytr = np.append(ytr, synthetic_y)
        synthetic_sample_added = True

    mlp = MLPClassifier(
        hidden_layer_sizes=(hidden,),
        activation="relu",
        solver="adam",
        alpha=0.0,
        batch_size=2048,
        learning_rate_init=1e-3,
        max_iter=50,
        early_stopping=not synthetic_sample_added,
        n_iter_no_change=5,
        validation_fraction=0.1 if not synthetic_sample_added else 0.0,
        random_state=seed,
    )
    return mlp.fit(Xtr, ytr)


def _train_rf(Xtr, ytr, seed):
    """Random Forest baseline for tabular IDS flows."""
    if len(np.unique(ytr)) < 2:
        synthetic_x = Xtr.mean(axis=0).reshape(1, -1)
        synthetic_y = np.array([0])
        Xtr = np.vstack([Xtr, synthetic_x])
        ytr = np.append(ytr, synthetic_y)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        n_jobs=-1,
        class_weight="balanced_subsample",
        random_state=seed,
    )
    return rf.fit(Xtr, ytr)


def _train_xgb(Xtr, ytr, seed):
    """XGBoost baseline; returns None if xgboost is not installed."""
    if "HAVE_XGB" not in globals() or not HAVE_XGB:
        return None

    if len(np.unique(ytr)) < 2:
        synthetic_x = Xtr.mean(axis=0).reshape(1, -1)
        synthetic_y = np.array([0])
        Xtr = np.vstack([Xtr, synthetic_x])
        ytr = np.append(ytr, synthetic_y)

    xgb = XGBClassifier(
        n_estimators=300,
        max_depth=6,
        learning_rate=0.1,
        subsample=0.9,
        colsample_bytree=0.9,
        objective="binary:logistic",
        eval_metric="logloss",
        tree_method="hist",
        n_jobs=-1,
        random_state=seed,
    )
    return xgb.fit(Xtr, ytr)


def _hybrid_lr_then_mlp_preds(Xtr, ytr, Xte, seed, use_isotonic=True):
    """Fit LR→MLP hybrid and return (uncalibrated, temp, isotonic) prob arrays."""
    pipe, mlp = fit_lr_then_mlp(
        Xtr,
        ytr,
        Xte,
        np.zeros(len(Xte)),
        CFG["train"]["use_smote"],
        seed,
        CFG["train"]["max_epochs"],
        CFG["train"]["batch_size"],
        CFG["train"]["mlp_hidden_units"],
    )
    if pipe is None or mlp is None:
        return None, None, None

    # Uncalibrated LR→MLP probabilities
    z_te = pipe.transform(Xte)
    p_uncal = mlp.predict_proba(z_te)[:, 1]

    # Calibration split from training data (simple 90/10 split)
    n = len(Xtr)
    idx = np.arange(n)
    rng = np.random.default_rng(seed)
    rng.shuffle(idx)
    split = int(0.9 * n)
    train_idx, calib_idx = idx[:split], idx[split:]

    X_cal = Xtr[calib_idx]
    y_cal = ytr[calib_idx]

    z_cal = pipe.transform(X_cal)
    p_cal = mlp.predict_proba(z_cal)[:, 1]

    # Temperature scaling on logits
    logits_cal = np.log(np.clip(p_cal, 1e-8, 1 - 1e-8)) - np.log(1 - np.clip(p_cal, 1e-8, 1 - 1e-8))

    def _nll_temp(temp):
        temp = float(temp)
        zt = logits_cal / temp
        pt = 1.0 / (1.0 + np.exp(-zt))
        return log_loss(y_cal, pt, labels=[0, 1])

    best_temp = 1.0
    best_nll = _nll_temp(1.0)
    for t in [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]:
        nll_t = _nll_temp(t)
        if nll_t < best_nll:
            best_nll = nll_t
            best_temp = t

    logits_uncal = np.log(np.clip(p_uncal, 1e-8, 1 - 1e-8)) - np.log(1 - np.clip(p_uncal, 1e-8, 1 - 1e-8))
    z_temp = logits_uncal / best_temp
    p_temp = 1.0 / (1.0 + np.exp(-z_temp))

    # Optional isotonic regression on top of uncalibrated probabilities
    if use_isotonic:
        from sklearn.isotonic import IsotonicRegression

        ir = IsotonicRegression(out_of_bounds="clip")
        ir.fit(p_cal, y_cal)
        p_iso = ir.transform(p_uncal)
    else:
        p_iso = None

    return p_uncal, p_temp, p_iso


def _metrics_binary(y_true, prob_pos, target_dr=0.95):
    """Compute AUROC, AUPR, FPR@DR, and ECE/NLL for a binary problem."""
    auroc = roc_auc_score(y_true, prob_pos)
    aupr = average_precision_score(y_true, prob_pos)
    fpr95, _ = fpr_at_dr(y_true, prob_pos, target_dr=target_dr)
    # For ECE we use two-column probabilities [P(y=0), P(y=1)]
    prob_two = np.column_stack([1.0 - prob_pos, prob_pos])
    ece = expected_calibration_error(y_true, prob_two, n_bins=15)
    nll = log_loss(y_true, prob_pos, labels=[0, 1])
    return dict(AUROC=auroc, AUPR=aupr, FPR95=fpr95, ECE=ece, NLL=nll)


def _eval_variant_set(dataset_name, Xtr, ytr, Xte, yte):
    """Evaluate LR / MLP / RF / XGB / LR→MLP variants for a given dataset."""
    rows = []
    for seed in SEEDS_FOR_ABLATION:
        # Standardize once per seed
        scaler = StandardScaler()
        Xtrs = scaler.fit_transform(Xtr)
        Xtes = scaler.transform(Xte)

        # LR baseline
        lr = _train_lr(Xtrs, ytr, seed)
        plr = lr.predict_proba(Xtes)[:, 1]
        rows.append(dict(Dataset=dataset_name, Model="LR", Seed=seed, **_metrics_binary(yte, plr)))

        # MLP baseline
        mlp = _train_mlp(Xtrs, ytr, seed)
        pmlp = mlp.predict_proba(Xtes)[:, 1]
        rows.append(dict(Dataset=dataset_name, Model="MLP", Seed=seed, **_metrics_binary(yte, pmlp)))

        # RF baseline
        rf = _train_rf(Xtrs, ytr, seed)
        prf = rf.predict_proba(Xtes)[:, 1]
        rows.append(dict(Dataset=dataset_name, Model="RF", Seed=seed, **_metrics_binary(yte, prf)))

        # XGBoost baseline (if available)
        xgb = _train_xgb(Xtrs, ytr, seed)
        if xgb is not None:
            pxgb = xgb.predict_proba(Xtes)[:, 1]
            rows.append(dict(Dataset=dataset_name, Model="XGBoost", Seed=seed, **_metrics_binary(yte, pxgb)))

        # Hybrid LR→MLP (uncalibrated + calibrated)
        p_uncal, p_temp, p_iso = _hybrid_lr_then_mlp_preds(Xtrs, ytr, Xtes, seed, use_isotonic=True)
        if p_uncal is None:
            continue

        rows.append(
            dict(Dataset=dataset_name, Model="LR→MLP (uncal.)", Seed=seed, **_metrics_binary(yte, p_uncal))
        )
        rows.append(
            dict(Dataset=dataset_name, Model="LR→MLP (Temp)", Seed=seed, **_metrics_binary(yte, p_temp))
        )
        if p_iso is not None:
            rows.append(
                dict(Dataset=dataset_name, Model="LR→MLP (Isot.)", Seed=seed, **_metrics_binary(yte, p_iso))
            )

    return pd.DataFrame(rows)


def build_clean_ablation_tables(
    nf_df,
    nf_feats,
    nf_bin_col,
    cic_tr_df,
    cic_tr_feats,
    cic_te_df,
    cic_te_feats,
    outdir,
):
    """Run clean-condition ablation and write CSV + LaTeX table."""
    os.makedirs(outdir, exist_ok=True)

    # NF-ToN-IoT (train→test = full df for now)
    Xtr_nf = nf_df[nf_feats].values
    ytr_nf = build_binary_labels(nf_df, nf_bin_col, None)
    Xte_nf = Xtr_nf
    yte_nf = ytr_nf
    df_nf = _eval_variant_set("NF-ToN-IoT", Xtr_nf, ytr_nf, Xte_nf, yte_nf)

    # CIC_IoMT_2024 WiFi-MQTT (train→test)
    bin_col = CFG["columns"]["cic_label_binary"]
    Xtr_cic = cic_tr_df[cic_tr_feats].values
    ytr_cic = build_binary_labels(cic_tr_df, bin_col, None)
    Xte_cic = cic_te_df[cic_te_feats].values
    yte_cic = build_binary_labels(cic_te_df, bin_col, None)
    df_cic = _eval_variant_set("CIC_IoMT_2024_WiFi_MQTT", Xtr_cic, ytr_cic, Xte_cic, yte_cic)

    df_all = pd.concat([df_nf, df_cic], ignore_index=True)
    csv_path = os.path.join(outdir, "clean_ablation_seedavg.csv")
    df_all.to_csv(csv_path, index=False)
    print(f"[OK] Wrote clean ablation CSV → {csv_path}")

    # Seed averages for LaTeX
    agg = (
        df_all.groupby(["Dataset", "Model"])
        .agg(
            AUROC_mean=("AUROC", "mean"),
            AUPR_mean=("AUPR", "mean"),
            FPR95_mean=("FPR95", "mean"),
            ECE_mean=("ECE", "mean"),
        )
        .reset_index()
    )

    # Order rows for presentation
    order = [
        "LR",
        "MLP",
        "RF",
        "XGBoost",
        "LR→MLP (uncal.)",
        "LR→MLP (Temp)",
        "LR→MLP (Isot.)",
    ]
    agg["Model_order"] = agg["Model"].apply(lambda m: order.index(m) if m in order else len(order))
    agg = agg.sort_values(["Dataset", "Model_order"]).drop(columns=["Model_order"])

    tex_lines = []
    tex_lines.append("\\begin{table}[!t]")
    tex_lines.append("\\centering")
    tex_lines.append(
        "\\caption{Seed-averaged clean-condition performance ablation of classical baselines (LR, MLP, RF, XGBoost) "
        "and the hybrid LR$\\\rightarrow$MLP with/without calibration on CIC\\_IoMT\\_2024\\_WiFi\\_MQTT "
        "(train$\\\rightarrow$test) and NF-ToN-IoT.}"
    )
    tex_lines.append("\\label{tab:ablation-clean}")
    tex_lines.append("\\small")
    tex_lines.append("\\begin{tabular}{l l c c c c}")
    tex_lines.append("\\toprule")
    tex_lines.append(
        "Dataset & Model & AUROC $\\\\uparrow$ & AUPR $\\\\uparrow$ & FPR@95\\%DR $\\\\downarrow$ & ECE $\\\\downarrow$\\\\\\"
    )
    tex_lines.append("\\midrule")

    for _, row in agg.iterrows():
        dataset = row["Dataset"]
        model = row["Model"]
        tex_lines.append(
            f"{dataset} & {model} & {row["AUROC_mean"]:.3f} & {row["AUPR_mean"]:.3f} & "
            f"{row["FPR95_mean"]:.3f} & {row["ECE_mean"]:.3f}\\"
        )

    tex_lines.append("\\bottomrule")
    tex_lines.append("\\end{tabular}")
    tex_lines.append("\\end{table}")

    tex_path = os.path.join(outdir, "ablation_clean_seedavg_filled.tex")
    with open(tex_path, "w", encoding="utf-8") as f:
        f.write("\n".join(tex_lines))

In [None]:
# -----------------
# ZIP helper
# -----------------
def zip_outputs(outdir: str, zip_path: str):
    with ZipFile(zip_path, "w") as zf:
        for root, _, files in os.walk(outdir):
            for fn in files:
                fpath = os.path.join(root, fn)
                arcname = os.path.relpath(fpath, outdir)
                zf.write(fpath, arcname)

In [None]:
# ----------
# Main
# ----------
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--grid", action="store_true", help="(unused here)")
    parser.add_argument("--adv-train-eps", type=float, default=0.0)
    parser.add_argument("--adv-train-frac", type=float, default=0.3)
    parser.add_argument("--cic-calib", type=str, choices=["temperature", "isotonic"], default="temperature")
    parser.add_argument("--automap-min", type=int, default=5, help="Minimum shared features for cross-domain metrics")
    parser.add_argument(
        "--no-cic-tiny-slice",
        action="store_true",
        help="Disable the CIC tiny benign slice experiment (enabled by default).",
    )
    parser.add_argument(
        "--cic-slice-frac",
        type=float,
        default=0.015,
        help="Fraction of CIC_test benign to use for training+calibration (default 1.5%).",
    )
    parser.add_argument("--cic-slice-seed", type=int, default=42, help="Random seed for benign-slice sampling.")
    parser.add_argument("--zip", action="store_true", help="Also compress outputs to a ZIP bundle.")
    args, _ = parser.parse_known_args()

    outdir = ensure_outdir(CFG["paths"]["outdir"])

    # Load datasets
    nf_df = load_nf_ton_iot(CFG["paths"]["nf"])
    cic_tr_df, cic_te_df = load_cic_iomt_train_test(CFG["paths"]["cic_train"], CFG["paths"]["cic_test"])

    # NF in-domain
    nf_feats, nf_bin_col, nf_mc_col = run_in_domain_nf(nf_df, outdir)

    # CIC in-domain + tiny-slice
    tiny_cfg = {
        "enabled": not args.no_cic_tiny_slice,
        "benign_frac": args.cic_slice_frac,
        "seed": args.cic_slice_seed,
    }
    cic_feats, cic_bin_col, cic_mc_col = run_in_domain_cic(
        cic_tr_df,
        cic_te_df,
        outdir,
        calib_method=args.cic_calib,
        tiny_slice_cfg=tiny_cfg,
    )

    # Cross-domain NF↔CIC with feature automap
    run_cross_domain(
        nf_df,
        nf_feats,
        nf_bin_col,
        nf_mc_col,
        cic_tr_df,
        cic_feats,
        cic_bin_col,
        cic_mc_col,
        cic_te_df,
        cic_feats,
        cic_bin_col,
        cic_mc_col,
        outdir,
        automap_min=args.automap_min,
    )

    # Clean-condition ablation for classical baselines vs LR→MLP
    build_clean_ablation_tables(
        nf_df,
        nf_feats,
        nf_bin_col,
        cic_tr_df,
        cic_feats,
        cic_te_df,
        cic_feats,
        outdir=os.path.join(CFG["paths"]["outdir"], "paper_exports"),
    )

    if args.zip:
        zip_path = os.path.join(Path(outdir).parent, "outputs_bundle.zip")
        zip_outputs(outdir, zip_path)
        print(f"[OK] Wrote ZIP bundle → {zip_path}")


if __name__ == "__main__":
    main()