In [None]:
##### ANOMALY DETECTION WITH AUTOENCODER #####
#
# Density-based clustering that finds arbitrary shapes and outliers.
# DBSCAN groups by density, not distance or counts
#
# *cluster is the same as *interval

In [None]:
# Maybe have to do this
!pip uninstall -y sympy
!pip install sympy==1.12
import sympy, importlib; importlib.reload(sympy)

In [None]:
import os, glob, time, tracemalloc, warnings, gc
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from numpy.lib.stride_tricks import sliding_window_view

warnings.filterwarnings("ignore")

# -----------------------------
# CONFIG - PATHS and PARAMETERS
# -----------------------------
RESIDENCES = [
    "REFIT_House01",
    "REFIT_House02","REFIT_House03","REFIT_House05",
    "REFIT_House07","REFIT_House09","REFIT_House15",
    "UKDALE_House01","UKDALE_House02","UKDALE_House05",
    "AMPds2_House01",
    "GREEND_House00","GREEND_House01","GREEND_House03"
]

INPUT_TRAIN_PATTERN = "/content/drive/MyDrive/Paper02_14Datasets/MERGED/{residence}_Fridge_15minutes_StepChange_MERGED.csv"
INPUT_ALL_PATTERN   = "/content/drive/MyDrive/Paper02_14Datasets/MERGED/{residence}*.csv"

OUTPUT_DIR_PRED     = "/content/drive/MyDrive/Paper02_14Datasets/ANOMALY_AE"   # keep same folder for continuity
OUTPUT_DIR_SUMMARY  = os.path.join(OUTPUT_DIR_PRED, "Percentiles_Summary")
MODEL_NAME          = "AE"  # <- changed

# ---- Windowing / training ----
WIN                 = 96
FAST_WINDOW_STRIDE  = 4
INFER_WINDOW_STRIDE = 1

# ---- AE (MLP) hyperparams ----
AE_HIDDEN1          = 128
AE_HIDDEN2          = 64
AE_LATENT           = 32
DROPOUT             = 0.1

LR                  = 1e-3
EPOCHS              = 20
BATCH               = 256
EARLY_STOP          = True
PATIENCE            = 4
MIN_DELTA           = 1e-5

# ---- Thresholding & post-processing ----
THR_MODE            = "mad"        # "mad" or "percentile"
THR_PCT             = 40
K_MAD               = 2.0
USE_EMA             = True
EMA_ALPHA           = 0.2
DILATE_STEPS        = 2

USE_STANDARDIZE     = True
SEED                = 42

# Simpler stable defaults
NUM_WORKERS         = 0
PIN_MEMORY          = True
PERSISTENT_WORKERS  = False
USE_AMP             = True
TRY_TORCH_COMPILE   = False

# Inference batching (#windows per batch)
INFER_WIN_BATCH     = 100_000
INFER_WIN_BATCH_MIN = 20_000

# Ensure output dirs exist
os.makedirs(OUTPUT_DIR_PRED, exist_ok=True)
os.makedirs(OUTPUT_DIR_SUMMARY, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
rng = np.random.default_rng(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
    try:
        torch.set_float32_matmul_precision("medium")
    except Exception:
        pass

# -----------------------------
# Helpers
# -----------------------------
# Ensure correct formatting for timestamp, active_power, and ground_truth_anomaly
def read_csv_safe(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    if "timestamp" in df.columns:
        try:
            ts = pd.to_datetime(df["timestamp"], errors="coerce")
            df["timestamp"] = ts.dt.strftime("%Y-%m-%d %H:%M:%S")
        except Exception:
            pass
    if "active_power" in df.columns:
        df["active_power"] = pd.to_numeric(df["active_power"], errors="coerce")
    if "ground_truth_anomaly" in df.columns:
        df["ground_truth_anomaly"] = df["ground_truth_anomaly"].astype(str).str.strip()
        df.loc[~df["ground_truth_anomaly"].eq("Anomaly"), "ground_truth_anomaly"] = "Normal"
    return df

# Get megabyte value from bytes
def mb(bytes_val: int) -> float:
    return round(bytes_val / (1024 * 1024), 3)

# Overwrite the CSV file
def save_csv_overwrite(df: pd.DataFrame, path: str):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df.to_csv(path, index=False)

# Calculate the metrics: Total/TP/TN/FB/FN/ActualNormal/ActualAnomaly/Accuracy/Precision/Recall/F1-Score/Normal_%/Anomaly_%
def safe_metrics(y_true: pd.Series, y_pred: pd.Series):
    yt = y_true.fillna("Normal").astype(str)
    yp = y_pred.fillna("Normal").astype(str)
    actual_anom_mask  = yt.eq("Anomaly")
    actual_norm_mask  = yt.eq("Normal")
    pred_anom_mask    = yp.eq("Anomaly")
    pred_norm_mask    = yp.eq("Normal")
    TP = int(((actual_anom_mask) & (pred_anom_mask)).sum())
    TN = int(((actual_norm_mask) & (pred_norm_mask)).sum())
    FP = int(((actual_norm_mask) & (pred_anom_mask)).sum())
    FN = int(((actual_anom_mask) & (pred_norm_mask)).sum())
    total = int(len(yt))
    actual_anom = int(actual_anom_mask.sum())
    actual_norm = int(actual_norm_mask.sum())
    accuracy  = (TP + TN) / total if total else 0.0
    precision = TP / (TP + FP) if (TP + FP) else 0.0
    recall    = TP / (TP + FN) if (TP + FN) else 0.0
    f1        = (2 * precision * recall / (precision + recall)) if (precision + recall) else 0.0
    normal_pct  = (TN / actual_norm * 100.0) if actual_norm else 0.0
    anomaly_pct = (TP / actual_anom * 100.0) if actual_anom else 0.0
    return {"Total": total, "TP": TP, "TN": TN, "FP": FP, "FN": FN,
            "ActualNormal": actual_norm, "ActualAnomaly": actual_anom,
            "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1-Score": f1,
            "Normal_%": normal_pct, "Anomaly_%": anomaly_pct}

# -----------------------------
# Autoencoder Architecture + Processes
# -----------------------------
class AE_MLP(nn.Module):
    def __init__(self, win: int, h1=128, h2=64, latent=32, dropout=0.1):
        super().__init__()
        self.win = win
        self.encoder = nn.Sequential(
            nn.Linear(win, h1), nn.ReLU(inplace=True), nn.Dropout(dropout),
            nn.Linear(h1, h2),  nn.ReLU(inplace=True),
            nn.Linear(h2, latent)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent, h2), nn.ReLU(inplace=True),
            nn.Linear(h2, h1),     nn.ReLU(inplace=True), nn.Dropout(dropout),
            nn.Linear(h1, win)
        )

    def forward(self, x_winB_T1: torch.Tensor) -> torch.Tensor:
        # x_winB_T1: [B, WIN, 1] -> flatten to [B, WIN]
        x = x_winB_T1.squeeze(-1)
        z = self.encoder(x)
        y = self.decoder(z)
        # return reconstructed as [B, WIN, 1] to match aggregation code if needed
        return y.unsqueeze(-1)

# Compile if allowed.
def maybe_compile(model: nn.Module) -> nn.Module:
    if TRY_TORCH_COMPILE:
        try:
            model = torch.compile(model)
        except Exception:
            pass
    return model


# Windows your signal and also keeps a map back to original timestamps/rows.
def build_windows_fast(series: np.ndarray, win: int, stride: int):
    series = series.astype(np.float32, copy=False)
    n = len(series)
    if n < win:
        return np.empty((0, win, 1), dtype=np.float32), np.empty((0, win), dtype=np.int32), n
    sw = sliding_window_view(series, window_shape=win)      # [n-win+1, win] (view)
    if stride > 1:
        sw = sw[::stride]
    X = sw[..., None]                                       # [Nwin, win, 1] (view)
    starts = np.arange(0, n - win + 1, stride, dtype=np.int32)
    covers = starts[:, None] + np.arange(win, dtype=np.int32)[None, :]
    return X, covers, n

# It runs the model on overlapping windows and averages MSE reconstruction error
# Computes per-timestep reconstruction error from overlapping windows safely.
@torch.inference_mode()
def pointwise_mse_batched(model: nn.Module,
                          X_view: np.ndarray,
                          covers: np.ndarray,
                          length: int,
                          device: str,
                          use_amp: bool = True,
                          win_batch: int = 100_000,
                          win_batch_min: int = 20_000) -> np.ndarray:

    sums = np.zeros(length, dtype=np.float64)
    cnts = np.zeros(length, dtype=np.int64)

    Nwin = X_view.shape[0]
    i = 0
    cur_bs = int(win_batch)

    while i < Nwin:
        j = min(i + cur_bs, Nwin)
        Xb = X_view[i:j]
        covb = covers[i:j]

        xt = torch.from_numpy(Xb).to(device, non_blocking=True)
        try:
            if device == "cuda" and use_amp:
                with torch.cuda.amp.autocast():
                    rt = model(xt)
            else:
                rt = model(xt)

            ### point-wise squared error within each window
            err = (xt - rt).pow(2).squeeze(-1).float().cpu().numpy()

            np.add.at(sums, covb.ravel(), err.ravel())
            np.add.at(cnts, covb.ravel(), 1)

            i = j
            del xt, rt, err, covb, Xb
            if device == "cuda":
                torch.cuda.empty_cache()
        except RuntimeError as e:
            msg = str(e).lower()
            if ("out of memory" in msg or "cuda" in msg) and cur_bs > win_batch_min:
                cur_bs = max(win_batch_min, cur_bs // 2)
                if device == "cuda":
                    torch.cuda.empty_cache()
                gc.collect()
                print(f"[OOM-avoid] reduce window-batch to {cur_bs}")
            else:
                raise

        if (i % max(win_batch_min, 1)) == 0:
            gc.collect()

    cnts = np.maximum(cnts, 1)

    # point-wise (per-timestep) mean squared error
    return (sums / cnts).astype(np.float64)

# Performs z-score standardization on training data
def standardize_train_then_apply(train_vals: np.ndarray, test_vals: np.ndarray):
    mu = np.nanmean(train_vals)
    sd = np.nanstd(train_vals)
    if not np.isfinite(sd) or sd == 0.0:
        sd = 1e-9
    tr = (train_vals - mu) / sd
    te = (test_vals  - mu) / sd
    return tr.astype(np.float32), te.astype(np.float32), (mu, sd)

# -----------------------------
# Error smoothing & thresholding
# A time point is labeled Anomaly if its reconstruction error (point-wise MSE)
# is greater than the MAD-based threshold.
# -----------------------------
# Smooths error using exponential moving average
def ema_1d(x: np.ndarray, alpha: float) -> np.ndarray:
    x = np.asarray(x, dtype=np.float64)
    if not np.isfinite(x).any():
        return np.zeros_like(x, dtype=np.float64)
    x = np.nan_to_num(x, nan=0.0, posinf=np.nanmax(x[np.isfinite(x)]), neginf=0.0)
    y = np.empty_like(x, dtype=np.float64)
    s = 0.0
    a = float(alpha)
    for i, v in enumerate(x):
        s = a * v + (1.0 - a) * (s if i > 0 else v)
        y[i] = s
    return y

# Computes robust threshold using median deviation
# A robust threshold is a cutoff that is not easily distorted by outliers or extreme values
def mad_threshold(err: np.ndarray, k: float) -> float:
    med = np.nanmedian(err)
    mad = np.nanmedian(np.abs(err - med))
    if not np.isfinite(mad) or mad == 0.0:
        mad = 1e-9
    return float(med + k * mad)

# Learns anomaly threshold from training errors - either MAD / percentile based
# MAD: Median Absolute Deviation, a robust alternative to mean ± std
def learn_threshold(train_point_err: np.ndarray) -> float:
    te = ema_1d(train_point_err, EMA_ALPHA) if USE_EMA else train_point_err
    if THR_MODE.lower() == "mad":
        return mad_threshold(te, K_MAD)
    else:
        return float(np.nanpercentile(te, THR_PCT))

# Applies threshold, outputs anomaly labels as "Anomal" or "Normal"
def apply_threshold(point_err: np.ndarray, thr: float) -> np.ndarray:
    pe = ema_1d(point_err, EMA_ALPHA) if USE_EMA else point_err
    labels = (pe > thr).astype(np.int8)
    if DILATE_STEPS > 0:
        k = 2 * DILATE_STEPS + 1
        kernel = np.ones(k, dtype=np.int8)
        conv = np.convolve(labels, kernel, mode="same")
        labels = (conv > 0).astype(np.int8)
    return np.where(labels == 1, "Anomaly", "Normal")

# -----------------------------
# Main loop per residence
# -----------------------------
for residence in RESIDENCES:
    print(f"\n==== Processing {residence} (Model={MODEL_NAME}, THR_MODE={THR_MODE}, STRIDE(TR/TE)={FAST_WINDOW_STRIDE}/{INFER_WINDOW_STRIDE}) ====")

    # -----------------------------
    # TRAINING PROCESS
    # -----------------------------
    # Get appropriate data
    train_path = INPUT_TRAIN_PATTERN.format(residence=residence)
    if not os.path.exists(train_path):
        print(f"[WARN] Training file not found: {train_path}. Skipping residence.")
        continue

    df_train_full = read_csv_safe(train_path)
    if "timestamp" in df_train_full.columns:
        try:
            df_train_full["_ts_sort"] = pd.to_datetime(df_train_full["timestamp"], errors="coerce")
            df_train_full = df_train_full.sort_values("_ts_sort").drop(columns=["_ts_sort"])
        except Exception:
            pass

    n = len(df_train_full)
    if n < WIN:
        print(f"[WARN] Not enough samples ({n}) for window={WIN}. Skipping residence.")
        continue

    # Determine 80% split
    split_idx = int(n * 0.8)
    s_train_full = pd.to_numeric(df_train_full["active_power"], errors="coerce").replace([np.inf, -np.inf], np.nan).fillna(0.0).values
    s_train = s_train_full[:split_idx]

    # Standardize data and create windows
    if USE_STANDARDIZE:
        s_train_std, _, (mu_ap, sd_ap) = standardize_train_then_apply(s_train, s_train)
        s_model_train = s_train_std
    else:
        mu_ap, sd_ap = 0.0, 1.0
        s_model_train = s_train.astype(np.float32)

    # Vectorized training windows (stride=4)
    Xtr_np, covers_tr, len_tr = build_windows_fast(s_model_train, WIN, FAST_WINDOW_STRIDE)
    if Xtr_np.shape[0] == 0:
        print(f"[WARN] Windowing returned 0 windows. Skipping residence.")
        continue

    ds = TensorDataset(torch.from_numpy(Xtr_np))  # stores [N, WIN, 1]
    dl = DataLoader(
        ds, batch_size=BATCH, shuffle=True, drop_last=False,
        num_workers=NUM_WORKERS, pin_memory=(PIN_MEMORY and DEVICE=="cuda"),
        persistent_workers=PERSISTENT_WORKERS
    )

    # Create the Autoencoder model and compile
    model = AE_MLP(WIN, AE_HIDDEN1, AE_HIDDEN2, AE_LATENT, DROPOUT).to(DEVICE)
    model = maybe_compile(model)

    fused_ok = (DEVICE == "cuda")
    try:
        opt = torch.optim.AdamW(model.parameters(), lr=LR, fused=fused_ok)
    except TypeError:
        opt = torch.optim.AdamW(model.parameters(), lr=LR)
    loss_fn = nn.MSELoss()

    best_loss = float("inf"); bad = 0
    scaler = torch.cuda.amp.GradScaler(enabled=(USE_AMP and DEVICE=="cuda"))

    tracemalloc.start()
    t0 = time.perf_counter()

    model.train()
    # Training Loop
    for epoch in range(1, EPOCHS + 1):
        epoch_loss = 0.0
        for (xb,) in dl:  # xb: [B, WIN, 1]
            xb = xb.to(DEVICE, non_blocking=True)
            opt.zero_grad(set_to_none=True)
            if USE_AMP and DEVICE == "cuda":
                with torch.cuda.amp.autocast():
                    recon = model(xb)         # [B, WIN, 1]
                    loss  = loss_fn(recon, xb)
                scaler.scale(loss).backward()
                scaler.step(opt)
                scaler.update()
            else:
                recon = model(xb)
                loss  = loss_fn(recon, xb)
                loss.backward()
                opt.step()
            epoch_loss += loss.item() * xb.size(0)

        epoch_loss /= len(ds)
        if epoch % max(1, EPOCHS // 5) == 0:
            print(f"  Epoch {epoch}/{EPOCHS}  |  MSE={epoch_loss:.6f}")

        if EARLY_STOP:
            if epoch_loss + MIN_DELTA < best_loss:
                best_loss = epoch_loss; bad = 0
            else:
                bad += 1
                if bad >= PATIENCE:
                    print(f"  Early stopping at epoch {epoch} (best MSE={best_loss:.6f})")
                    break

    train_time_sec = time.perf_counter() - t0
    train_current, train_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    train_peak_mb = mb(train_peak)

    #### This block learns an anomaly threshold from training data, using the model’s reconstruction error.
    model.eval()
    with torch.no_grad():
        xtr = torch.from_numpy(Xtr_np).to(DEVICE)
        if USE_AMP and DEVICE == "cuda":
            with torch.cuda.amp.autocast():
                rtr = model(xtr)
        else:
            rtr = model(xtr)

    # aggregate MSE per index
    def pointwise_mse_from_windows_fast(x_windows: torch.Tensor, x_recon: torch.Tensor, covers: np.ndarray, length: int):
        err = (x_windows - x_recon).pow(2).squeeze(-1).float().cpu().numpy()
        sums = np.zeros(length, dtype=np.float64); cnts = np.zeros(length, dtype=np.int64)
        np.add.at(sums, covers.ravel(), err.ravel())
        np.add.at(cnts, covers.ravel(), 1)
        cnts = np.maximum(cnts, 1)
        return (sums / cnts).astype(np.float64)

    train_point_err = pointwise_mse_from_windows_fast(xtr, rtr, covers_tr, length=len_tr)

    # Threshold is learned
    thr = learn_threshold(train_point_err)
    print(f" Trained {MODEL_NAME}: train_point_err[{len(train_point_err)}], threshold={thr:.6e}")
    print(f" TrainingTimeSec={train_time_sec:.3f}, TrainPeakMB={train_peak_mb}")

    # -----------------------------
    # INFERENCE PROCESS
    # -----------------------------
    # Get the files for the residence
    pattern = INPUT_ALL_PATTERN.format(residence=residence)
    all_files = sorted(glob.glob(pattern))
    if not all_files:
        print(f"[WARN] No files found for pattern: {pattern}")
        continue

    summary_rows = []
    tracemalloc.start()
    t_inf0 = time.perf_counter()

    for in_path in all_files:
        try:
            base = os.path.basename(in_path)
            out_path = os.path.join(OUTPUT_DIR_PRED, f"{os.path.splitext(base)[0]}_{MODEL_NAME}.csv")

            # Loads clean active_power to the right format
            df = read_csv_safe(in_path).copy()
            ap = pd.to_numeric(df["active_power"], errors="coerce").replace([np.inf, -np.inf], np.nan).fillna(0.0).values

            # Skips files that are too short
            if len(ap) < WIN:
                df["prediction_anomaly"] = "Normal"
                save_csv_overwrite(df, out_path)
                if "ground_truth_anomaly" in df.columns:
                    m = safe_metrics(df["ground_truth_anomaly"], df["prediction_anomaly"])
                else:
                    m = {"Total": len(df), "TP":0,"TN":0,"FP":0,"FN":0,
                         "ActualNormal":0,"ActualAnomaly":0,
                         "Accuracy":np.nan,"Precision":np.nan,"Recall":np.nan,"F1-Score":np.nan,
                         "Normal_%":np.nan,"Anomaly_%":np.nan}
                summary_rows.append({
                    "Filename": base, "Accuracy": m.get("Accuracy", np.nan),
                    "Precision": m.get("Precision", np.nan), "Recall": m.get("Recall", np.nan),
                    "F1-Score": m.get("F1-Score", np.nan), "Normal_%": m.get("Normal_%", np.nan),
                    "Anomaly_%": m.get("Anomaly_%", np.nan), "Total": m.get("Total", 0),
                    "TP": m.get("TP",0), "TN": m.get("TN",0), "FP": m.get("FP",0), "FN": m.get("FN",0),
                    "ActualNormal": m.get("ActualNormal",0), "ActualAnomaly": m.get("ActualAnomaly",0),
                    "TrainingTimeSec": None, "InferenceTimeSec": None,
                    "TrainPeakMB": train_peak_mb, "InferencePeakMB": None,
                })
                print(f"  [SKIP short] {base} (len<{WIN})")
                continue

            # Standardize using TRAIN stats
            s_te = ((ap - mu_ap) / (sd_ap if sd_ap != 0 else 1e-9)).astype(np.float32) if USE_STANDARDIZE else ap.astype(np.float32)

            # Build Windows
            Xte_view, covers_te, len_te = build_windows_fast(s_te, WIN, INFER_WINDOW_STRIDE)

            # Run the trained autoencoder
            point_err = pointwise_mse_batched(
                model, Xte_view, covers_te, length=len_te, device=DEVICE,
                use_amp=(USE_AMP and DEVICE=="cuda"),
                win_batch=INFER_WIN_BATCH, win_batch_min=INFER_WIN_BATCH_MIN
            )

            # Apply the thresholds and save
            pred_labels = apply_threshold(point_err, thr)
            df["prediction_anomaly"] = pd.Series(pred_labels, index=df.index).astype(str)
            save_csv_overwrite(df, out_path)

            if "ground_truth_anomaly" in df.columns:
                m = safe_metrics(df["ground_truth_anomaly"], df["prediction_anomaly"])
            else:
                m = {"Total": len(df), "TP":0,"TN":0,"FP":0,"FN":0,
                     "ActualNormal":0,"ActualAnomaly":0,
                     "Accuracy":np.nan,"Precision":np.nan,"Recall":np.nan,"F1-Score":np.nan,
                     "Normal_%":np.nan,"Anomaly_%":np.nan}

            summary_rows.append({
                "Filename": base,
                "Accuracy": m["Accuracy"],
                "Precision": m["Precision"],
                "Recall": m["Recall"],
                "F1-Score": m["F1-Score"],
                "Normal_%": m["Normal_%"],
                "Anomaly_%": m["Anomaly_%"],
                "Total": m["Total"],
                "TP": m["TP"], "TN": m["TN"], "FP": m["FP"], "FN": m["FN"],
                "ActualNormal": m["ActualNormal"], "ActualAnomaly": m["ActualAnomaly"],
                "TrainingTimeSec": None,
                "InferenceTimeSec": None,
                "TrainPeakMB": train_peak_mb,
                "InferencePeakMB": None,
            })

            print(f"  ✔ {base}  |  Acc={m['Accuracy']:.4f}  N%={m['Normal_%']:.2f}  A%={m['Anomaly_%']:.2f}")

            if DEVICE == "cuda":
                torch.cuda.empty_cache()
            gc.collect()

        except Exception as e:
            print(f"  [ERROR] {in_path}: {e}")

    inference_time_sec = time.perf_counter() - t_inf0
    inf_current, inf_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    inference_peak_mb = mb(inf_peak)

    for row in summary_rows:
        row["TrainingTimeSec"] = round(train_time_sec, 6)
        row["InferenceTimeSec"] = round(inference_time_sec, 6)
        row["InferencePeakMB"] = inference_peak_mb

    summary_df = pd.DataFrame(summary_rows)
    col_order = [
        "Filename",
        "Accuracy", "Precision", "Recall", "F1-Score",
        "Normal_%", "Anomaly_%", "Total",
        "TP", "TN", "FP", "FN", "ActualNormal", "ActualAnomaly",
        "TrainingTimeSec", "InferenceTimeSec", "TrainPeakMB", "InferencePeakMB"
    ]
    summary_df = summary_df[col_order]
    summary_path = os.path.join(OUTPUT_DIR_SUMMARY, f"{residence}_ANOMALY_{MODEL_NAME}_OUTLINE.csv")
    save_csv_overwrite(summary_df, summary_path)

    print(f" InferenceTimeSec={inference_time_sec:.3f}, InferencePeakMB={inference_peak_mb}")
    print(f" ✔ Summary saved: {summary_path}")

print("\nDone.")
