# Benchmark MLP – configurations et seuils

Ce notebook compare plusieurs configurations de MLP (profondeur, activation, dropout, batch norm) sur un même split, et évalue différents seuils de décision.

Guide:
- Ajustez les chemins et options dans la cellule de configuration.
- Définissez la liste `CONFIGS` (chaque dict = une config modèle).
- `THRESHOLDS` contrôle les seuils testés (par ex. F1 au meilleur seuil sur validation).
- Les meilleurs résultats sont affichés et, en option, sauvegardés dans `Pytorch_models/`.


In [None]:
# --- Configuration ---
from pathlib import Path
import torch

# Données
DATA_CSV = Path("DONNES_MLP/train_data_all.csv")  # ajustez si besoin
HEALTHY_TAG = "HC"
RUN_PREFIX = "bench_mlp"
SEED = 41

# Entraînement
TRAIN_CFG = {
    "batch_size": 64,
    "lr": 1e-3,
    "weight_decay": 1e-4,
    "epochs": 80,
    "patience": 10,
    "neg_weight": 10.0,
}

# Seuils à évaluer
THRESHOLDS = [0.3, 0.4, 0.5, 0.6, 0.7]

# Augmentation
AUGMENT = True
AUG_TRAIN_N = 5
AUG_VAL_N = 8
AUG_TEST_N = 8

# Sauvegarde des meilleurs modèles
SAVE_TOP_K = 1

# Configurations MLP à comparer (modifiez/ajoutez librement)
CONFIGS = [
    {
        "name": "relu_256x128x64_do0.5_bn",
        "in_features": 430,
        "hidden_dims": [256, 128, 64],
        "activation": "relu",
        "batch_norm": True,
        "dropout": 0.5,
    },
    {
        "name": "gelu_512x256_do0.4_bn",
        "in_features": 430,
        "hidden_dims": [512, 256],
        "activation": "gelu",
        "batch_norm": True,
        "dropout": 0.4,
    },
    {
        "name": "leaky_512x256x128x64_do_0.1_0.2_0.2_0.1_bn",
        "in_features": 430,
        "hidden_dims": [512, 256, 128, 64],
        "activation": "leaky_relu",
        "batch_norm": True,
        "dropout": [0.1, 0.2, 0.2, 0.1],
    },
    {
        "name": "elu_256x64_do0.3_noBN",
        "in_features": 430,
        "hidden_dims": [256, 64],
        "activation": "elu",
        "batch_norm": False,
        "dropout": 0.3,
    },
]

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)


In [None]:
# --- Imports ---
import numpy as np
import pandas as pd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, f1_score
from tqdm.auto import tqdm
import json, time

from robust_evaluation_tools.robust_MLP import build_mlp_from_config, MODEL_DIR
from robust_evaluation_tools.synthectic_sites_generations import augment_df
from robust_evaluation_tools.MLP_train import (
    PatientDataset, make_loaders, train_epoch, eval_epoch, fit
)


In [None]:
# --- Chargement et préparation des données ---
assert DATA_CSV.exists(), f"Fichier introuvable: {DATA_CSV}"
df_raw = pd.read_csv(DATA_CSV)
# Retire les ventricules si présents
if 'bundle' in df_raw.columns:
    df_raw = df_raw[~df_raw['bundle'].isin(['left_ventricle', 'right_ventricle'])].copy()
print('Shape après filtre:', df_raw.shape)

# Helpers de features
def compute_zscore(df, value_col="mean_no_cov"):
    stats = (df.groupby("metric_bundle")[value_col]
               .agg(['mean', 'std'])
               .rename(columns={'mean': 'global_mean', 'std': 'global_std'}))
    stats['global_std'] = stats['global_std'].replace(0, 1e-6)
    df = df.merge(stats, on="metric_bundle", how="left")
    df["zscore"] = (df[value_col] - df["global_mean"]) / df["global_std"]
    return df.drop(columns=["global_mean", "global_std"])

def build_feature_matrix(df, value_col="zscore", bundle_col="metric_bundle", healthy_tag=HEALTHY_TAG):
    features = df.pivot(index="sid", columns=bundle_col, values=value_col)
    label = (df.groupby("sid")["disease"].first().ne(healthy_tag).astype(int))
    mat = features.assign(label=label).reset_index(drop=False)
    return mat

def make_X_Y(df, value_col="zscore"):
    df = compute_zscore(df, value_col="mean_no_cov")
    df_mat = build_feature_matrix(df, value_col=value_col)
    df_mat = df_mat.drop(columns=["sid"])
    X = df_mat.drop(columns="label").values.astype(np.float32)
    y = df_mat["label"].values.astype(np.float32)
    return X, y

# Split train/val/test (50/25/25) stratifie si possible
from sklearn.model_selection import train_test_split
sid_label = (df_raw.groupby('sid')['disease'].first().ne(HEALTHY_TAG).astype(int))
sids = sid_label.index.values
labels = sid_label.values
s_train, s_temp, y_train_sid, y_temp_sid = train_test_split(sids, labels, test_size=0.5, stratify=labels, random_state=SEED)
s_val, s_test, y_val_sid, y_test_sid = train_test_split(s_temp, y_temp_sid, test_size=0.5, stratify=y_temp_sid, random_state=SEED)

df_train = df_raw[df_raw['sid'].isin(s_train)].copy()
df_val   = df_raw[df_raw['sid'].isin(s_val)].copy()
df_test  = df_raw[df_raw['sid'].isin(s_test)].copy()

# Augmentation optionnelle
if AUGMENT:
    df_train = augment_df(df_train, int(AUG_TRAIN_N))
    df_val   = augment_df(df_val,   int(AUG_VAL_N))
    df_test  = augment_df(df_test,  int(AUG_TEST_N))

X_train, y_train = make_X_Y(df_train)
X_val,   y_val   = make_X_Y(df_val)
X_test,  y_test  = make_X_Y(df_test)

print('Train:', X_train.shape, 'Val:', X_val.shape, 'Test:', X_test.shape)


In [None]:
# --- DataLoader (importés) ---
from robust_evaluation_tools.MLP_train import make_loaders
train_dl, val_dl, test_dl = make_loaders(X_train, y_train, X_val, y_val, X_test, y_test, int(TRAIN_CFG["batch_size"]))


In [None]:
# --- Entraînement (importés) ---
from robust_evaluation_tools.MLP_train import train_epoch, eval_epoch, fit
NEG_WEIGHT = float(TRAIN_CFG.get("neg_weight", 10.0))


In [None]:
# --- Benchmark des configurations ---
results = []
crit = nn.BCEWithLogitsLoss(reduction='none')
MODEL_DIR.mkdir(parents=True, exist_ok=True)

for cfg in CONFIGS:
    cfg_name = cfg.get('name', 'cfg')
    run_name = f"{RUN_PREFIX}_{cfg_name}"
    model = build_mlp_from_config(cfg).to(device)
    model, tr_losses, val_losses, best_auc = fit(
        model, train_dl, val_dl,
        epochs=int(TRAIN_CFG['epochs']),
        lr=float(TRAIN_CFG['lr']),
        wd=float(TRAIN_CFG['weight_decay']),
        patience=int(TRAIN_CFG['patience']),
    )
    # Validation metrics + threshold sweep
    val_loss, val_auc, _, val_probs, val_labels = eval_epoch(model, val_dl, crit)
    best_f1, best_thr = -1.0, None
    for thr in THRESHOLDS:
        f1 = f1_score(val_labels, (val_probs > thr).astype(int))
        if f1 > best_f1:
            best_f1, best_thr = f1, thr
    # Test metrics at best validation threshold
    test_loss, test_auc, _, test_probs, test_labels = eval_epoch(model, test_dl, crit)
    test_f1 = f1_score(test_labels, (test_probs > best_thr).astype(int)) if best_thr is not None else float('nan')

    results.append({
        'name': cfg_name,
        'val_auc': float(val_auc),
        'val_best_f1': float(best_f1),
        'val_best_thr': float(best_thr if best_thr is not None else 0.5),
        'test_auc': float(test_auc),
        'test_f1_at_best_thr': float(test_f1),
        'config': cfg,
        'run_name': run_name,
        'state_dict': model.state_dict(),
    })

# Tri des résultats par val_auc puis val_best_f1
results_sorted = sorted(results, key=lambda r: (r['val_auc'], r['val_best_f1']), reverse=True)
pd.DataFrame([{k: v for k, v in r.items() if k not in ('config','state_dict')} for r in results_sorted])


In [None]:
# --- Sauvegarde des meilleurs modèles (optionnel) ---
to_save = results_sorted[:int(SAVE_TOP_K)] if SAVE_TOP_K and len(results_sorted) else []
saved = []
for r in to_save:
    run_name = r['run_name']
    state = r['state_dict']
    torch.save(state, MODEL_DIR / f"{run_name}_weights.pt")
    params_to_save = {**r['config'], 'lr': float(TRAIN_CFG['lr']), 'weight_decay': float(TRAIN_CFG['weight_decay'])}
    with open(MODEL_DIR / f"{run_name}_params.json", 'w') as fp:
        json.dump(params_to_save, fp, indent=2)
    saved.append(run_name)

print('Saved runs:', saved)
