# Phase 4 — Event-level evaluation + Multi-label training + Robustness package

Goal of Phase 4:
1) Convert your **pair-level** table into **event-level** predictions (no “multiple pairs per event” confusion)
2) Train and evaluate **multiple HLT labels** (1 model per label) with the SAME GroupKFold splits
3) Add robustness: calibration, threshold selection, drift vs run/lumi, and clean artifacts export

Input: `/kaggle/working/parquet_dimuon/*.parquet` (from Phase 2)

Output (saved to `/kaggle/working/phase4_artifacts/`):
- `metrics_per_label.csv` (pair-level + event-level)
- `thresholds.csv` (operating points)
- `models/` (joblib models per label)
- `plots/` (ROC/PR, stability, calibration)
- `config.json`


In [1]:
# Cell 1 — Install deps
!pip -q install lightgbm scikit-learn shap pyarrow fastparquet matplotlib seaborn joblib


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h

In [None]:
# Cell 2 — Imports & config
from pathlib import Path
import glob, json
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GroupKFold
from sklearn.metrics import (
    roc_auc_score, average_precision_score,
    roc_curve, precision_recall_curve,
    brier_score_loss
)
from sklearn.calibration import calibration_curve
import lightgbm as lgb
import joblib

SEED = 42
np.random.seed(SEED)

PARQUET_DIR = Path("/kaggle/input/datasets/katakuricharlotte/parquet-triggeremu/parquet_dimuon")
OUT = Path("/kaggle/working/phase4_artifacts")
(OUT / "models").mkdir(parents=True, exist_ok=True)
(OUT / "plots").mkdir(parents=True, exist_ok=True)

parquet_files = sorted(glob.glob(str(PARQUET_DIR / "*.parquet")))
len(parquet_files), parquet_files[:3]
