
# Benchmark de modelos para *late fusion* con **LazyPredict**
Usa `X.npy`, `y.npy` y (opcional) `video_ids.npy` de tu pipeline (RLV + UCF).  
Calcula *features* tabulares (estadísticos + velocidad + distancias por pares), ejecuta **LazyPredict** en *split* por **video**, y reporta **F1 macro** y **ROC AUC** (ventana y agregación a nivel **video** por top-k).


In [None]:
# ==== LazyPredict con datos de pose_sequences (solo X.npy/y.npy/video_ids.npy) ====
import numpy as np, warnings
from pathlib import Path
from sklearn.model_selection import GroupShuffleSplit, StratifiedGroupKFold
from sklearn.metrics import average_precision_score, roc_auc_score, f1_score, precision_recall_curve, log_loss
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from lazypredict.Supervised import LazyClassifier

warnings.filterwarnings("ignore")

DATA = Path("pose_sequences")

# ---- Carga SOLO tus datos de preproceso ----
X_seq = np.load(DATA/"X.npy")         # (N,2,T,17) normalizado [0,1]
y      = np.load(DATA/"y.npy")
vids   = np.load(DATA/"video_ids.npy")
N, _, T, K = X_seq.shape
X = X_seq.reshape(N, 2*T*K).astype(np.float32)   # aplanado para late-fusion simple
print(f"[LOAD] X: {X.shape}  y: {y.shape}  videos únicos: {len(np.unique(vids))}")

# ---- Split holdout SIN fuga por video (para LazyPredict) ----
gss = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=13)
tr, te = next(gss.split(X, y, groups=vids))
X_tr, y_tr = X[tr], y[tr]
X_te, y_te = X[te], y[te]
vids_te = vids[te]
print(f"[SPLIT] train={len(tr)}  test={len(te)}  grupos_test={len(np.unique(vids_te))}")

# ---- 1) LazyPredict en holdout (criba rápida) ----
print("\n=== LazyPredict: ranking rápido en holdout ===")
clf = LazyClassifier(verbose=0, ignore_warnings=True, random_state=13, classifiers='all')
models_df, preds = clf.fit(X_tr, X_te, y_tr, y_te)
display(models_df.sort_values(by="ROC AUC", ascending=False).head(15))

# ---- 2) Re-evaluación rigurosa de Top-N con AP (AUC-PR) y Group-KFold (sin fuga) ----
TOP_N = 6
top_names = list(models_df.sort_values(by="ROC AUC", ascending=False).head(TOP_N).index)
print(f"\n[TOP-{TOP_N}] para validación rigurosa (AP):", top_names)

# Mapeo de nombres LazyPredict -> modelos comparables
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier

def make_model(name: str):
    if name.lower().startswith("logistic"):
        return Pipeline([("scaler", StandardScaler()),
                         ("clf", LogisticRegression(max_iter=4000, class_weight="balanced", n_jobs=-1))])
    if "RidgeClassifier" in name:
        return Pipeline([("scaler", StandardScaler()),
                         ("clf", RidgeClassifier(class_weight="balanced"))])
    if "LinearSVC" in name:
        return Pipeline([("scaler", StandardScaler()),
                         ("clf", LinearSVC(class_weight="balanced"))])
    if "SVC" in name and "Linear" not in name:
        return Pipeline([("scaler", StandardScaler()),
                         ("clf", SVC(kernel="rbf", probability=True, class_weight="balanced"))])
    if "ExtraTrees" in name:
        return ExtraTreesClassifier(n_estimators=600, class_weight="balanced_subsample",
                                    n_jobs=-1, random_state=13)
    if "RandomForest" in name:
        return RandomForestClassifier(n_estimators=600, class_weight="balanced_subsample",
                                      n_jobs=-1, random_state=13)
    if "GradientBoosting" in name:
        return GradientBoostingClassifier()
    if "AdaBoost" in name:
        return AdaBoostClassifier()
    # fallback razonable
    return Pipeline([("scaler", StandardScaler()),
                     ("clf", LogisticRegression(max_iter=4000, class_weight="balanced", n_jobs=-1))])

def rigorous_ap(model, X, y, vids, n_splits=5):
    cv = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=13)
    oof = np.zeros(len(y), dtype=float)
    taus = []
    for tr, va in cv.split(X, y, groups=vids):
        X_tr, y_tr, X_va, y_va = X[tr], y[tr], X[va], y[va]
        # Calibrar si el estimador final no tiene predict_proba
        needs_cal = True
        if isinstance(model, Pipeline):
            last = model.steps[-1][1]
            needs_cal = not hasattr(last, "predict_proba") or isinstance(last, LinearSVC)
        else:
            needs_cal = not hasattr(model, "predict_proba") or isinstance(model, LinearSVC)
        est = CalibratedClassifierCV(model, cv=3, method="sigmoid") if needs_cal else model

        est.fit(X_tr, y_tr)
        p = est.predict_proba(X_va)[:,1] if hasattr(est, "predict_proba") else est.decision_function(X_va)
        oof[va] = p

        prec, rec, thr = precision_recall_curve(y_va, p)
        f1s = 2*prec*rec/(prec+rec+1e-9)
        idx = int(np.nanargmax(f1s))
        tau = float(thr[idx-1]) if idx>0 and idx-1<len(thr) else 0.5
        taus.append(tau)

    ap  = average_precision_score(y, oof)
    auc = roc_auc_score(y, oof)
    try:
        ll = log_loss(y, np.c_[1-oof, oof], labels=[0,1])
    except:
        ll = np.nan
    tau_g = float(np.mean(taus))
    f1 = f1_score(y, (oof >= tau_g).astype(int))
    return {"AP": ap, "ROC_AUC": auc, "LogLoss": ll, "F1@tau": f1, "tau": tau_g}

rig = []
for name in top_names:
    m = make_model(name)
    met = rigorous_ap(m, X, y, vids, n_splits=5)
    rig.append((name, met["AP"], met["ROC_AUC"], met["LogLoss"], met["F1@tau"], met["tau"]))
    print(f"[RIG] {name:20s}  AP={met['AP']:.5f}  ROC_AUC={met['ROC_AUC']:.5f}  F1@tau={met['F1@tau']:.4f}  tau≈{met['tau']:.3f}")

rig = sorted(rig, key=lambda z: (-z[1], z[3]))
print("\n=== RANKING FINAL (AP, Group-KFold) ===")
for r in rig:
    print(f"{r[0]:20s}  AP={r[1]:.5f}  ROC_AUC={r[2]:.5f}  LogLoss={r[3]:.5f}  F1@tau={r[4]:.4f}  tau={r[5]:.3f}")


[LOAD] X: (50304, 1088)  y: (50304,)  videos únicos: 2127
[SPLIT] train=40046  test=10258  grupos_test=426

=== LazyPredict: ranking rápido en holdout ===


 97%|█████████▋| 28/29 [56:43<02:21, 141.92s/it]  

[LightGBM] [Info] Number of positive: 23558, number of negative: 16488
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.201460 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 276717
[LightGBM] [Info] Number of data points in the train set: 40046, number of used features: 1088
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.588273 -> initscore=0.356833
[LightGBM] [Info] Start training from score 0.356833


100%|██████████| 29/29 [56:55<00:00, 117.76s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.79,0.77,0.77,0.79,24.06
LGBMClassifier,0.79,0.77,0.77,0.78,11.33
SVC,0.78,0.76,0.76,0.78,732.97
ExtraTreesClassifier,0.77,0.74,0.74,0.76,28.22
RandomForestClassifier,0.77,0.74,0.74,0.76,130.99
QuadraticDiscriminantAnalysis,0.74,0.74,0.74,0.74,7.69
NuSVC,0.76,0.73,0.73,0.75,1091.58
BaggingClassifier,0.74,0.73,0.73,0.74,559.57
LogisticRegression,0.7,0.68,0.68,0.7,4.49
AdaBoostClassifier,0.7,0.68,0.68,0.7,161.0



[TOP-6] para validación rigurosa (AP): ['XGBClassifier', 'LGBMClassifier', 'SVC', 'ExtraTreesClassifier', 'RandomForestClassifier', 'QuadraticDiscriminantAnalysis']
[RIG] XGBClassifier         AP=0.79984  ROC_AUC=0.76824  F1@tau=0.7899  tau≈0.237
[RIG] LGBMClassifier        AP=0.79984  ROC_AUC=0.76824  F1@tau=0.7899  tau≈0.237
