1 — Imports, configuración y rutas

In [53]:
# 1 — Imports, configuración y rutas
import json, os, warnings, time, re, glob
from pathlib import Path
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    average_precision_score, precision_recall_curve, roc_auc_score, roc_curve,
    f1_score, recall_score, balanced_accuracy_score, confusion_matrix, precision_score
)
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import mutual_info_classif

# Balanceo (SMOTENC)
from imblearn.over_sampling import SMOTENC, SMOTE

# LightGBM
from lightgbm import LGBMClassifier
try:
    from lightgbm import early_stopping, log_evaluation
    _LGBM_CB_OK = True
except Exception:
    _LGBM_CB_OK = False

# === Toggles de experimento ===
USE_REDUCED = False
USE_BALANCED_TRAIN = True
BALANCE_IN_CV = True
RANDOM_STATE = 42
DO_TUNE = True
DO_CV_BASELINE = True
DO_CV_TUNED = True
CV_FOLDS = 5
MI_TOPK = 30

# === Nombres y rutas ===
ROOT = Path.cwd().parent
EXP_NAME = f"LGBM_{'REDUCED' if USE_REDUCED else 'FULL'}_{'SMOTENC' if USE_BALANCED_TRAIN else 'IMB'}"
ARTIF_DIR = ROOT / "artifacts" / EXP_NAME
OUT_RESULTS = ARTIF_DIR / "results"
OUT_FIGS    = ARTIF_DIR / "figs"
OUT_PREDS   = ARTIF_DIR / "preds"
OUT_PARAMS  = ARTIF_DIR / "best_params"
for p in [OUT_RESULTS, OUT_FIGS, OUT_PREDS, OUT_PARAMS]:
    p.mkdir(parents=True, exist_ok=True)

# Dataset preprocesado
DATA_DIR = ROOT / "preproc_datasets" / "full"

print("Exp:", EXP_NAME)
print("DATA_DIR:", DATA_DIR)
print("ARTIF_DIR:", ARTIF_DIR)

Exp: LGBM_FULL_SMOTENC
DATA_DIR: /Users/luistejada/Downloads/TFE Churn Bancario/preproc_datasets/full
ARTIF_DIR: /Users/luistejada/Downloads/TFE Churn Bancario/artifacts/LGBM_FULL_SMOTENC


2 — Carga de artefactos (X, y, features)

In [54]:
def load_xy_full(dir_full: Path):
    X_train = np.load(dir_full / "X_train_full.npy")
    X_val   = np.load(dir_full / "X_val_full.npy")
    X_test  = np.load(dir_full / "X_test_full.npy")

    y_train = pd.read_parquet(dir_full / "y_train.parquet")["Exited"].to_numpy()
    y_val   = pd.read_parquet(dir_full / "y_val.parquet")["Exited"].to_numpy()
    y_test  = pd.read_parquet(dir_full / "y_test.parquet")["Exited"].to_numpy()

    feat = pd.read_parquet(dir_full / "feature_names_full.parquet")["feature"].tolist()
    return X_train, y_train, X_val, y_val, X_test, y_test, feat

X_train, y_train, X_val, y_val, X_test, y_test, feature_names = load_xy_full(DATA_DIR)
print("Shapes:", X_train.shape, X_val.shape, X_test.shape)
print("y train/val/test:", y_train.shape, y_val.shape, y_test.shape)
print("n features:", len(feature_names))

Shapes: (6000, 15) (2000, 15) (2000, 15)
y train/val/test: (6000,) (2000,) (2000,)
n features: 15


3 — Métricas y selección de umbral

In [55]:
def pr_auc(y_true, y_proba): 
    return float(average_precision_score(y_true, y_proba))

def roc_auc(y_true, y_proba): 
    return float(roc_auc_score(y_true, y_proba))

def find_best_threshold(y_true, y_proba, metric="f1"):
    thr_grid = np.linspace(0.0, 1.0, 1001)
    best_thr, best_score = 0.5, -1.0
    for thr in thr_grid:
        y_pred = (y_proba >= thr).astype(int)
        if metric == "f1":
            score = f1_score(y_true, y_pred, zero_division=0)
        elif metric == "recall":
            score = recall_score(y_true, y_pred, zero_division=0)
        else:
            raise ValueError("metric no soportada")
        if score > best_score:
            best_score, best_thr = score, thr
    return float(best_thr), float(best_score)

def compute_all_metrics(y_true, y_proba, thr):
    y_pred = (y_proba >= thr).astype(int)
    return {
        "pr_auc": pr_auc(y_true, y_proba),
        "roc_auc": roc_auc(y_true, y_proba),
        "precision": precision_score(y_true, y_pred, zero_division=0),
        "f1": f1_score(y_true, y_pred, zero_division=0),
        "recall": recall_score(y_true, y_pred, zero_division=0),
        "bal_acc": balanced_accuracy_score(y_true, y_pred)
    }

def sanitize_params(p, use_balanced_train=True):
    p = dict(p)
    p.pop("verbose", None)
    p.setdefault("verbosity", -1)
    p["metric"] = "average_precision"
    if use_balanced_train:
        p["class_weight"] = None
    return p

def pred_proba_best(mdl, X):
    """Usa siempre la mejor iteración si existe (blindaje para reloads)."""
    it = getattr(mdl, "best_iteration_", None)
    if it is not None:
        return mdl.predict_proba(X, num_iteration=it)[:, 1]
    return mdl.predict_proba(X)[:, 1]

4 - Helpers MI Top-K y balanceo in-memory

In [56]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from imblearn.over_sampling import SMOTENC, SMOTE

def fit_mi_selector(X_tr, y_tr, topk=30, seed=42):
    mi = mutual_info_classif(X_tr, y_tr, random_state=seed)
    order = np.argsort(mi)[::-1][:min(topk, X_tr.shape[1])]
    return order, mi

def apply_keep_idx(X, keep_idx):
    return X[:, keep_idx] if keep_idx is not None else X

def _bool_mask_from_indices(indices, n_features):
    mask = np.zeros(n_features, dtype=bool)
    mask[np.asarray(indices, dtype=int)] = True
    return mask

def load_categorical_mask(dir_full, feature_names, X_sample=None):
    n_features = len(feature_names)

    # 1) feature_names_full.parquet con is_cat
    fn_parq = dir_full / "feature_names_full.parquet"
    if fn_parq.exists():
        df = pd.read_parquet(fn_parq)
        if "is_cat" in df.columns:
            m = df["is_cat"].astype(bool).to_numpy()
            if len(m) == n_features:
                print("[CAT] Cargado is_cat desde feature_names_full.parquet")
                return m

    # 2) JSONs con índices o máscara
    for jname in ["categorical_indices_full.json", "categorical_indices.json",
                  "categorical_features_full.json", "categorical_features.json",
                  "preproc_meta.json", "enc_meta.json", "metadata.json"]:
        f = dir_full / jname
        if f.exists():
            try:
                data = json.loads(f.read_text())
                for key in ["categorical_indices", "cat_indices", "categorical_features", "categorical_cols_idx"]:
                    if key in data:
                        idx = data[key]
                        if isinstance(idx, dict) and "indices" in idx:
                            idx = idx["indices"]
                        mask = _bool_mask_from_indices(idx, n_features)
                        print(f"[CAT] Cargado índices categóricos desde {jname} clave '{key}'")
                        return mask
                for key in ["categorical_mask", "categorical_features_mask", "is_cat_mask"]:
                    if key in data and len(data[key]) == n_features:
                        print(f"[CAT] Cargada máscara categórica desde {jname} clave '{key}'")
                        return np.asarray(data[key], dtype=bool)
            except Exception:
                pass

    # 3) NPY de máscara
    for nname in ["categorical_mask_full.npy", "categorical_mask.npy", "cat_mask_full.npy", "cat_mask.npy"]:
        f = dir_full / nname
        if f.exists():
            m = np.load(f)
            if m.shape[0] == n_features:
                print(f"[CAT] Cargada máscara categórica desde {nname}")
                return m.astype(bool)

    # 4) Fallback heurístico
    print("[CAT] Metadatos categóricos no encontrados. Usando heurística...")
    if X_sample is None:
        return np.zeros(n_features, dtype=bool)

    n = min(20000, X_sample.shape[0])
    sample = X_sample[:n]
    mask = np.zeros(n_features, dtype=bool)
    for j in range(n_features):
        vals = np.unique(sample[:, j])
        if vals.size <= 2 and np.all(np.isin(vals, [0, 1])):
            mask[j] = True
        elif vals.size <= 10 and np.allclose(vals, np.round(vals), atol=1e-6):
            mask[j] = True
    print(f"[CAT] Heurística: {mask.sum()} columnas categóricas detectadas de {n_features}")
    return mask

def reduce_mask(mask, keep_idx):
    if keep_idx is None:
        return mask
    if mask is None:
        return None
    mask = np.asarray(mask)
    return mask[keep_idx]

def _to_cat_indices(categorical_mask, n_features):
    if categorical_mask is None:
        return []
    arr = np.asarray(categorical_mask)

    if arr.dtype == bool:
        if arr.shape[0] != n_features:
            raise ValueError(
                f"categorical_mask length ({arr.shape[0]}) != n_features ({n_features})"
            )
        return np.flatnonzero(arr).tolist()

    return [int(i) for i in arr.ravel().tolist()]

def maybe_resample(X_tr, y_tr, categorical_mask=None, RANDOM_STATE=42):

    cat_idx = _to_cat_indices(categorical_mask, X_tr.shape[1]) if categorical_mask is not None else []

    if len(cat_idx) == 0:
        sm = SMOTE(random_state=RANDOM_STATE)
        return sm.fit_resample(X_tr, y_tr)
    else:
        sm = SMOTENC(categorical_features=cat_idx, random_state=RANDOM_STATE)
        return sm.fit_resample(X_tr, y_tr)

5 — Hiperparámetros persistentes

In [57]:
VIEW_TAG = "REDUCED" if USE_REDUCED else "FULL"
BAL_TAG  = "SMOTENC" if USE_BALANCED_TRAIN else "IMB"
BEST_HP_FILE = OUT_PARAMS / f"BEST_LGBM_{VIEW_TAG}_{BAL_TAG}.json"

def get_lgbm_defaults(seed=RANDOM_STATE):
    mdl = LGBMClassifier(random_state=seed)
    return mdl.get_params()

def load_best_or_default():
    if BEST_HP_FILE.exists():
        try:
            best = json.loads(BEST_HP_FILE.read_text())
            print("[HP] Cargando mejores hiperparámetros previos:", BEST_HP_FILE.name)
            base = get_lgbm_defaults()
            base.update(best)
            return base, True
        except Exception as e:
            print("[HP] Aviso: no se pudo leer BEST (uso defaults).", e)
    print("[HP] Usando hiperparámetros DEFAULT.")
    return get_lgbm_defaults(), False

seed_params, loaded_best_flag = load_best_or_default()

[HP] Cargando mejores hiperparámetros previos: BEST_LGBM_FULL_SMOTENC.json


6 — Entrenamiento BASELINE partiendo del seed

In [58]:
# 6 — Entrenamiento BASELINE partiendo del seed
_fit_kwargs = {}
_callbacks = []
if _LGBM_CB_OK:
    _callbacks = [early_stopping(stopping_rounds=200), log_evaluation(period=50)]
else:
    _fit_kwargs["early_stopping_rounds"] = 200

# Asegurar compatibilidad de parámetros y evitar class_weight cuando hay sobre-muestreo
seed_params = sanitize_params(seed_params, use_balanced_train=USE_BALANCED_TRAIN)

# === Cargar máscara categórica desde metadatos del preprocesamiento ===
CAT_MASK_FULL = load_categorical_mask(DATA_DIR, feature_names, X_sample=X_train)

# === Reducción MI Top-K ===
keep_idx_global = None
feature_names_used = feature_names
X_train_fit, X_val_fit, X_test_fit = X_train, X_val, X_test

if USE_REDUCED:
    keep_idx_global, _mi = fit_mi_selector(X_train, y_train, topk=MI_TOPK, seed=RANDOM_STATE)
    X_train_fit = apply_keep_idx(X_train, keep_idx_global)
    X_val_fit   = apply_keep_idx(X_val,   keep_idx_global)
    X_test_fit  = apply_keep_idx(X_test,  keep_idx_global)
    feature_names_used = [feature_names[i] for i in keep_idx_global]

# máscara alineada con las features
CAT_MASK_USED = reduce_mask(CAT_MASK_FULL, keep_idx_global)

# === Balanceo SOLO en train ===
X_train_final, y_train_final = X_train_fit, y_train
if USE_BALANCED_TRAIN:
    X_train_final, y_train_final = maybe_resample(X_train_fit, y_train, categorical_mask=CAT_MASK_USED)

model = LGBMClassifier(**seed_params)

_fit_call = dict(
    eval_set=[(X_val_fit, y_val)],
    eval_metric="aucpr",
    **_fit_kwargs
)
if _LGBM_CB_OK:
    _fit_call["callbacks"] = _callbacks

model.fit(X_train_final, y_train_final, **_fit_call)

best_iter = getattr(model, "best_iteration_", None)
print(f"[BASELINE] best_iteration: {best_iter}")

# Validación y umbral óptimo
proba_val = pred_proba_best(model, X_val_fit)
thr_val, best_f1_val = find_best_threshold(y_val, proba_val, metric="f1")
print(f"[BASELINE] Mejor umbral (val) por F1: {thr_val:.3f} | F1(val)={best_f1_val:.4f}")

val_metrics = compute_all_metrics(y_val, proba_val, thr_val)
val_metrics

[CAT] Metadatos categóricos no encontrados. Usando heurística...
[CAT] Heurística: 10 columnas categóricas detectadas de 15
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.669681
[100]	valid_0's average_precision: 0.68309
[150]	valid_0's average_precision: 0.691147
[200]	valid_0's average_precision: 0.694647
[250]	valid_0's average_precision: 0.69566
[300]	valid_0's average_precision: 0.69594
[350]	valid_0's average_precision: 0.693782
[400]	valid_0's average_precision: 0.692243
Early stopping, best iteration is:
[236]	valid_0's average_precision: 0.696435
[BASELINE] best_iteration: 236
[BASELINE] Mejor umbral (val) por F1: 0.392 | F1(val)=0.6394


{'pr_auc': 0.6964347470013181,
 'roc_auc': 0.8585935704579772,
 'precision': 0.5953389830508474,
 'f1': 0.6393629124004551,
 'recall': 0.6904176904176904,
 'bal_acc': 0.7852590649200819}

7 - Optimización incremental

In [59]:
import optuna
from optuna.samplers import TPESampler

tuned_model = None

N_TRIALS = 40
STUDY_NAME = f"LGBM_{VIEW_TAG}_{BAL_TAG}_AP"
SAMPLER = TPESampler(seed=RANDOM_STATE, multivariate=True, group=False)
study = optuna.create_study(direction="maximize", study_name=STUDY_NAME, sampler=SAMPLER)

SEARCH_KEYS = [
    "learning_rate", "n_estimators", "num_leaves", "max_depth",
    "min_child_samples", "subsample", "colsample_bytree",
    "reg_alpha", "reg_lambda", "min_split_gain", "class_weight"
]

def suggest_lgbm_params(trial, base=None):
    p = {}
    p["learning_rate"]     = trial.suggest_float("learning_rate", 0.005, 0.2, log=True)
    p["n_estimators"]      = trial.suggest_int("n_estimators", 800, 3000, step=50)
    p["num_leaves"]        = trial.suggest_int("num_leaves", 16, 256)
    p["max_depth"]         = trial.suggest_categorical("max_depth", [-1, 4, 6, 8, 10])
    p["min_child_samples"] = trial.suggest_int("min_child_samples", 10, 200)
    p["subsample"]         = trial.suggest_float("subsample", 0.6, 1.0)
    p["colsample_bytree"]  = trial.suggest_float("colsample_bytree", 0.6, 1.0)
    p["reg_alpha"]         = trial.suggest_float("reg_alpha", 1e-4, 10.0, log=True)
    p["reg_lambda"]        = trial.suggest_float("reg_lambda", 1e-4, 10.0, log=True)
    p["min_split_gain"]    = trial.suggest_float("min_split_gain", 0.0, 1.0)
    if USE_BALANCED_TRAIN:
        p["class_weight"] = None
    else:
        p["class_weight"] = trial.suggest_categorical("class_weight", [None, "balanced"])

    p["random_state"] = RANDOM_STATE
    p["n_jobs"] = -1
    p["metric"] = "average_precision"
    p["verbosity"] = -1
    return p

# máscara a usar en Optuna
_MASK_FOR_TUNING = CAT_MASK_USED if USE_REDUCED else CAT_MASK_FULL
_X_FOR_TUNING = X_train_fit
_Y_FOR_TUNING = y_train
_X_VAL_FOR_TUNING = X_val_fit
_Y_VAL_FOR_TUNING = y_val

def objective(trial):
    hp = suggest_lgbm_params(trial)
    hp = sanitize_params(hp, use_balanced_train=USE_BALANCED_TRAIN)

    mdl = LGBMClassifier(**hp)

    X_tr, y_tr = _X_FOR_TUNING, _Y_FOR_TUNING
    if USE_BALANCED_TRAIN:
        X_tr, y_tr = maybe_resample(X_tr, y_tr, categorical_mask=_MASK_FOR_TUNING)

    fit_call = dict(
        eval_set=[(_X_VAL_FOR_TUNING, _Y_VAL_FOR_TUNING)],
        eval_metric="aucpr",
        **_fit_kwargs
    )
    if _LGBM_CB_OK:
        fit_call["callbacks"] = _callbacks

    mdl.fit(X_tr, y_tr, **fit_call)
    proba_val_t = pred_proba_best(mdl, _X_VAL_FOR_TUNING)
    ap = average_precision_score(_Y_VAL_FOR_TUNING, proba_val_t)
    trial.set_user_attr("best_iteration", getattr(mdl, "best_iteration_", None))
    return ap

print(f"[OPTUNA] Iniciando estudio '{STUDY_NAME}' con {N_TRIALS} pruebas...")
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)

best = study.best_trial
print(f"[OPTUNA] Mejor AP(val): {best.value:.6f}")
print(f"[OPTUNA] Params ganadores:", best.params)
print(f"[OPTUNA] best_iteration (del trial):", best.user_attrs.get("best_iteration"))

best_params = dict(best.params)
best_params.update({
    "random_state": RANDOM_STATE,
    "n_jobs": -1,
    "metric": "average_precision",
    "verbosity": -1
})
with open(BEST_HP_FILE, "w", encoding="utf-8") as f:
    json.dump(best_params, f, indent=2, ensure_ascii=False)
print("[OPTUNA] Guardado BEST en:", BEST_HP_FILE.name)

tuned_model = LGBMClassifier(**best_params)

fit_call = dict(
    eval_set=[(_X_VAL_FOR_TUNING, _Y_VAL_FOR_TUNING)],
    eval_metric="aucpr",
    **_fit_kwargs
)
if _LGBM_CB_OK:
    fit_call["callbacks"] = _callbacks

X_train_tuned, y_train_tuned = _X_FOR_TUNING, _Y_FOR_TUNING
if USE_BALANCED_TRAIN:
    X_train_tuned, y_train_tuned = maybe_resample(_X_FOR_TUNING, _Y_FOR_TUNING, categorical_mask=_MASK_FOR_TUNING)

tuned_model.fit(X_train_tuned, y_train_tuned, **fit_call)
print("[OPTUNA] Reentreno final completado. best_iteration_ =", getattr(tuned_model, "best_iteration_", None))

[I 2025-12-09 14:45:45,998] A new study created in memory with name: LGBM_FULL_SMOTENC_AP


[OPTUNA] Iniciando estudio 'LGBM_FULL_SMOTENC_AP' con 40 pruebas...
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.649198
[100]	valid_0's average_precision: 0.659042
[150]	valid_0's average_precision: 0.66209
[200]	valid_0's average_precision: 0.665066
[250]	valid_0's average_precision: 0.66948
[300]	valid_0's average_precision: 0.672239
[350]	valid_0's average_precision: 0.673856
[400]	valid_0's average_precision: 0.674555
[450]	valid_0's average_precision: 0.674824
[500]	valid_0's average_precision: 0.674914


[I 2025-12-09 14:45:48,338] Trial 0 finished with value: 0.6756544443881454 and parameters: {'learning_rate': 0.019906996673933378, 'n_estimators': 2900, 'num_leaves': 192, 'max_depth': 10, 'min_child_samples': 124, 'subsample': 0.8832290311184181, 'colsample_bytree': 0.608233797718321, 'reg_alpha': 7.072114131472227, 'reg_lambda': 1.452824663751602, 'min_split_gain': 0.21233911067827616}. Best is trial 0 with value: 0.6756544443881454.


[550]	valid_0's average_precision: 0.675518
[600]	valid_0's average_precision: 0.675257
[650]	valid_0's average_precision: 0.675257
[700]	valid_0's average_precision: 0.675257
[750]	valid_0's average_precision: 0.675257
Early stopping, best iteration is:
[559]	valid_0's average_precision: 0.675654
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.661508
[100]	valid_0's average_precision: 0.658636
[150]	valid_0's average_precision: 0.659526
[200]	valid_0's average_precision: 0.660313


[I 2025-12-09 14:45:50,016] Trial 1 finished with value: 0.6626238031642654 and parameters: {'learning_rate': 0.009778325945801386, 'n_estimators': 1200, 'num_leaves': 89, 'max_depth': 8, 'min_child_samples': 65, 'subsample': 0.7465447373174767, 'colsample_bytree': 0.7824279936868144, 'reg_alpha': 0.8431013932082461, 'reg_lambda': 0.0009962513222055108, 'min_split_gain': 0.5142344384136116}. Best is trial 0 with value: 0.6756544443881454.


Early stopping, best iteration is:
[47]	valid_0's average_precision: 0.662624
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.660876
[100]	valid_0's average_precision: 0.670874
[150]	valid_0's average_precision: 0.676532
[200]	valid_0's average_precision: 0.67804


[I 2025-12-09 14:45:51,907] Trial 2 finished with value: 0.680014195168313 and parameters: {'learning_rate': 0.04446862319918233, 'n_estimators': 900, 'num_leaves': 162, 'max_depth': 8, 'min_child_samples': 68, 'subsample': 0.6390688456025535, 'colsample_bytree': 0.8736932106048627, 'reg_alpha': 0.015876781526923997, 'reg_lambda': 0.0004075596440072873, 'min_split_gain': 0.4951769101112702}. Best is trial 2 with value: 0.680014195168313.


[250]	valid_0's average_precision: 0.679721
[300]	valid_0's average_precision: 0.679721
[350]	valid_0's average_precision: 0.679721
[400]	valid_0's average_precision: 0.679721
Early stopping, best iteration is:
[227]	valid_0's average_precision: 0.680014
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.591641
[100]	valid_0's average_precision: 0.602881
[150]	valid_0's average_precision: 0.608848
[200]	valid_0's average_precision: 0.618294
[250]	valid_0's average_precision: 0.626954
[300]	valid_0's average_precision: 0.63324
[350]	valid_0's average_precision: 0.644292
[400]	valid_0's average_precision: 0.645347
[450]	valid_0's average_precision: 0.646027
[500]	valid_0's average_precision: 0.654557
[550]	valid_0's average_precision: 0.659983
[600]	valid_0's average_precision: 0.6624
[650]	valid_0's average_precision: 0.664146
[700]	valid_0's average_precision: 0.666175
[750]	valid_0's average_precision: 0.667311
[800]	valid_0's average_pre

[I 2025-12-09 14:45:56,507] Trial 3 finished with value: 0.6701583199848199 and parameters: {'learning_rate': 0.005676262589955587, 'n_estimators': 2800, 'num_leaves': 78, 'max_depth': -1, 'min_child_samples': 195, 'subsample': 0.9100531293444458, 'colsample_bytree': 0.9757995766256756, 'reg_alpha': 2.9794544625913595, 'reg_lambda': 0.09761125443110447, 'min_split_gain': 0.9218742350231168}. Best is trial 2 with value: 0.680014195168313.


Early stopping, best iteration is:
[997]	valid_0's average_precision: 0.670158
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.660945
[100]	valid_0's average_precision: 0.662264
[150]	valid_0's average_precision: 0.665067
[200]	valid_0's average_precision: 0.66269
[250]	valid_0's average_precision: 0.663326


[I 2025-12-09 14:45:57,902] Trial 4 finished with value: 0.6660339046544103 and parameters: {'learning_rate': 0.006930112765148064, 'n_estimators': 1200, 'num_leaves': 26, 'max_depth': 8, 'min_child_samples': 63, 'subsample': 0.8170784332632994, 'colsample_bytree': 0.6563696899899051, 'reg_alpha': 1.025616274847307, 'reg_lambda': 0.0002359137306347715, 'min_split_gain': 0.9868869366005173}. Best is trial 2 with value: 0.680014195168313.


[300]	valid_0's average_precision: 0.665186
Early stopping, best iteration is:
[144]	valid_0's average_precision: 0.666034
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.67059
[100]	valid_0's average_precision: 0.685216
[150]	valid_0's average_precision: 0.68706
[200]	valid_0's average_precision: 0.686142
[250]	valid_0's average_precision: 0.686534
[300]	valid_0's average_precision: 0.688838
[350]	valid_0's average_precision: 0.68758
[400]	valid_0's average_precision: 0.685525


[I 2025-12-09 14:45:59,551] Trial 5 finished with value: 0.6892733705293801 and parameters: {'learning_rate': 0.08632815369661433, 'n_estimators': 1200, 'num_leaves': 17, 'max_depth': -1, 'min_child_samples': 78, 'subsample': 0.6463476238100518, 'colsample_bytree': 0.9452413703502374, 'reg_alpha': 0.13076473382928538, 'reg_lambda': 0.004513257622008946, 'min_split_gain': 0.06355835028602363}. Best is trial 5 with value: 0.6892733705293801.


[450]	valid_0's average_precision: 0.682962
[500]	valid_0's average_precision: 0.683155
Early stopping, best iteration is:
[316]	valid_0's average_precision: 0.689273
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.626329
[100]	valid_0's average_precision: 0.633323
[150]	valid_0's average_precision: 0.646379
[200]	valid_0's average_precision: 0.655292
[250]	valid_0's average_precision: 0.658691
[300]	valid_0's average_precision: 0.66157
[350]	valid_0's average_precision: 0.663362
[400]	valid_0's average_precision: 0.665246
[450]	valid_0's average_precision: 0.666775
[500]	valid_0's average_precision: 0.667339
[550]	valid_0's average_precision: 0.669083
[600]	valid_0's average_precision: 0.671129
[650]	valid_0's average_precision: 0.673024
[700]	valid_0's average_precision: 0.67374
[750]	valid_0's average_precision: 0.6748
[800]	valid_0's average_precision: 0.675739
[850]	valid_0's average_precision: 0.676864
[900]	valid_0's average_prec

[I 2025-12-09 14:46:01,724] Trial 6 finished with value: 0.6814142348264602 and parameters: {'learning_rate': 0.015746438450976667, 'n_estimators': 1500, 'num_leaves': 191, 'max_depth': 4, 'min_child_samples': 155, 'subsample': 0.8245108790277985, 'colsample_bytree': 0.9083868719818244, 'reg_alpha': 0.02944272359149678, 'reg_lambda': 0.04108318894699928, 'min_split_gain': 0.42754101835854963}. Best is trial 5 with value: 0.6892733705293801.


[1300]	valid_0's average_precision: 0.680951
[1350]	valid_0's average_precision: 0.680575
[1400]	valid_0's average_precision: 0.680271
Early stopping, best iteration is:
[1207]	valid_0's average_precision: 0.681414
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.659389
[100]	valid_0's average_precision: 0.65661
[150]	valid_0's average_precision: 0.658476


[I 2025-12-09 14:46:02,772] Trial 7 finished with value: 0.6624091151648945 and parameters: {'learning_rate': 0.005491525066424382, 'n_estimators': 1000, 'num_leaves': 23, 'max_depth': 8, 'min_child_samples': 88, 'subsample': 0.9022204554172195, 'colsample_bytree': 0.6915192661966489, 'reg_alpha': 0.00024260488932164486, 'reg_lambda': 0.0028103296447636083, 'min_split_gain': 0.16122128725400442}. Best is trial 5 with value: 0.6892733705293801.


[200]	valid_0's average_precision: 0.657563
Early stopping, best iteration is:
[42]	valid_0's average_precision: 0.662409
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.669481


[I 2025-12-09 14:46:03,504] Trial 8 finished with value: 0.6704573832756178 and parameters: {'learning_rate': 0.15431261011638706, 'n_estimators': 2600, 'num_leaves': 168, 'max_depth': 8, 'min_child_samples': 164, 'subsample': 0.9584365199693973, 'colsample_bytree': 0.7272013899887455, 'reg_alpha': 0.000355025561231308, 'reg_lambda': 0.001379354235277248, 'min_split_gain': 0.4271077886262563}. Best is trial 5 with value: 0.6892733705293801.


[100]	valid_0's average_precision: 0.667988
[150]	valid_0's average_precision: 0.664768
[200]	valid_0's average_precision: 0.664999
[250]	valid_0's average_precision: 0.664999
Early stopping, best iteration is:
[87]	valid_0's average_precision: 0.670457
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.671118


[I 2025-12-09 14:46:04,120] Trial 9 finished with value: 0.6765909067417198 and parameters: {'learning_rate': 0.10220655100897388, 'n_estimators': 2700, 'num_leaves': 17, 'max_depth': -1, 'min_child_samples': 190, 'subsample': 0.7292811728083021, 'colsample_bytree': 0.8075162486973464, 'reg_alpha': 0.3274121520988885, 'reg_lambda': 0.0065788201191231774, 'min_split_gain': 0.9717820827209607}. Best is trial 5 with value: 0.6892733705293801.


[100]	valid_0's average_precision: 0.67527
[150]	valid_0's average_precision: 0.675066
[200]	valid_0's average_precision: 0.674836
[250]	valid_0's average_precision: 0.674836
Early stopping, best iteration is:
[83]	valid_0's average_precision: 0.676591
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.676968
[100]	valid_0's average_precision: 0.671785
[150]	valid_0's average_precision: 0.669748


[I 2025-12-09 14:46:05,065] Trial 10 finished with value: 0.6793339259556741 and parameters: {'learning_rate': 0.11160520780208899, 'n_estimators': 1200, 'num_leaves': 25, 'max_depth': -1, 'min_child_samples': 154, 'subsample': 0.6304655048035832, 'colsample_bytree': 0.9486757327801338, 'reg_alpha': 0.4625809500171002, 'reg_lambda': 0.02958571414754742, 'min_split_gain': 0.24741350827669434}. Best is trial 5 with value: 0.6892733705293801.


[200]	valid_0's average_precision: 0.669024
[250]	valid_0's average_precision: 0.669024
Early stopping, best iteration is:
[54]	valid_0's average_precision: 0.679334
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.628984
[100]	valid_0's average_precision: 0.64279
[150]	valid_0's average_precision: 0.653763
[200]	valid_0's average_precision: 0.657911
[250]	valid_0's average_precision: 0.661286
[300]	valid_0's average_precision: 0.664635
[350]	valid_0's average_precision: 0.667544
[400]	valid_0's average_precision: 0.669038
[450]	valid_0's average_precision: 0.670893
[500]	valid_0's average_precision: 0.672265
[550]	valid_0's average_precision: 0.673249
[600]	valid_0's average_precision: 0.674043
[650]	valid_0's average_precision: 0.674651
[700]	valid_0's average_precision: 0.674684
[750]	valid_0's average_precision: 0.675102
[800]	valid_0's average_precision: 0.675216
[850]	valid_0's average_precision: 0.675532
[900]	valid_0's average_pr

[I 2025-12-09 14:46:06,789] Trial 11 finished with value: 0.6759279316878005 and parameters: {'learning_rate': 0.023266614206805883, 'n_estimators': 2000, 'num_leaves': 245, 'max_depth': 4, 'min_child_samples': 163, 'subsample': 0.9019902779453758, 'colsample_bytree': 0.9351456153675048, 'reg_alpha': 2.815473529852417, 'reg_lambda': 0.003557587072605683, 'min_split_gain': 0.39560419853905165}. Best is trial 5 with value: 0.6892733705293801.


Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.667203
[100]	valid_0's average_precision: 0.683339
[150]	valid_0's average_precision: 0.683832
[200]	valid_0's average_precision: 0.684878
[250]	valid_0's average_precision: 0.683704
[300]	valid_0's average_precision: 0.67796
[350]	valid_0's average_precision: 0.677414


[I 2025-12-09 14:46:08,153] Trial 12 finished with value: 0.6857574516292847 and parameters: {'learning_rate': 0.12639865324310517, 'n_estimators': 1150, 'num_leaves': 43, 'max_depth': 4, 'min_child_samples': 49, 'subsample': 0.7038072679919442, 'colsample_bytree': 0.9797447789375463, 'reg_alpha': 0.0698312618749519, 'reg_lambda': 0.001149422634021616, 'min_split_gain': 0.09326545536393184}. Best is trial 5 with value: 0.6892733705293801.


[400]	valid_0's average_precision: 0.674205
Early stopping, best iteration is:
[240]	valid_0's average_precision: 0.685757
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.671538
[100]	valid_0's average_precision: 0.685001
[150]	valid_0's average_precision: 0.691138
[200]	valid_0's average_precision: 0.691121
[250]	valid_0's average_precision: 0.687963
[300]	valid_0's average_precision: 0.686182


[I 2025-12-09 14:46:09,035] Trial 13 finished with value: 0.6922081052076836 and parameters: {'learning_rate': 0.0968972456365368, 'n_estimators': 2000, 'num_leaves': 80, 'max_depth': 4, 'min_child_samples': 37, 'subsample': 0.6369743380933038, 'colsample_bytree': 0.950519614978828, 'reg_alpha': 0.06058181602847856, 'reg_lambda': 0.002215571184493588, 'min_split_gain': 0.1688131998720223}. Best is trial 13 with value: 0.6922081052076836.


[350]	valid_0's average_precision: 0.6821
Early stopping, best iteration is:
[187]	valid_0's average_precision: 0.692208
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.683067
[100]	valid_0's average_precision: 0.680267
[150]	valid_0's average_precision: 0.664416
[200]	valid_0's average_precision: 0.658774


[I 2025-12-09 14:46:11,189] Trial 14 finished with value: 0.6905876083409045 and parameters: {'learning_rate': 0.09819167862083925, 'n_estimators': 1600, 'num_leaves': 146, 'max_depth': -1, 'min_child_samples': 126, 'subsample': 0.6147083677176132, 'colsample_bytree': 0.8568057696843706, 'reg_alpha': 0.027560179756205217, 'reg_lambda': 0.0002969077237519454, 'min_split_gain': 0.025869914068978477}. Best is trial 13 with value: 0.6922081052076836.


[250]	valid_0's average_precision: 0.656679
Early stopping, best iteration is:
[73]	valid_0's average_precision: 0.690588
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.664604


[I 2025-12-09 14:46:12,235] Trial 15 finished with value: 0.6709473165171316 and parameters: {'learning_rate': 0.08815961994797127, 'n_estimators': 2450, 'num_leaves': 75, 'max_depth': 10, 'min_child_samples': 53, 'subsample': 0.6040568606200313, 'colsample_bytree': 0.9387070577400086, 'reg_alpha': 0.5331481964372673, 'reg_lambda': 0.040302749561892705, 'min_split_gain': 0.35087159055922157}. Best is trial 13 with value: 0.6922081052076836.


[100]	valid_0's average_precision: 0.670748
[150]	valid_0's average_precision: 0.669532
[200]	valid_0's average_precision: 0.669532
[250]	valid_0's average_precision: 0.669532
Early stopping, best iteration is:
[80]	valid_0's average_precision: 0.670947
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.673066
[100]	valid_0's average_precision: 0.689406
[150]	valid_0's average_precision: 0.683034
[200]	valid_0's average_precision: 0.678564
[250]	valid_0's average_precision: 0.674028


[I 2025-12-09 14:46:14,650] Trial 16 finished with value: 0.6894064595249314 and parameters: {'learning_rate': 0.0629050400921003, 'n_estimators': 1100, 'num_leaves': 232, 'max_depth': -1, 'min_child_samples': 115, 'subsample': 0.6338595983121055, 'colsample_bytree': 0.8144677967905564, 'reg_alpha': 0.0006258047230043463, 'reg_lambda': 0.00018033939383481294, 'min_split_gain': 0.02117032378431194}. Best is trial 13 with value: 0.6922081052076836.


[300]	valid_0's average_precision: 0.671125
Early stopping, best iteration is:
[100]	valid_0's average_precision: 0.689406
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.67798


[I 2025-12-09 14:46:15,483] Trial 17 finished with value: 0.6809676514139752 and parameters: {'learning_rate': 0.09178207143054895, 'n_estimators': 2000, 'num_leaves': 167, 'max_depth': -1, 'min_child_samples': 114, 'subsample': 0.6787905946961714, 'colsample_bytree': 0.7496292654704676, 'reg_alpha': 1.843367640988326, 'reg_lambda': 0.0008639079649502969, 'min_split_gain': 0.33673333296394103}. Best is trial 13 with value: 0.6922081052076836.


[100]	valid_0's average_precision: 0.677669
[150]	valid_0's average_precision: 0.677656
[200]	valid_0's average_precision: 0.677656
[250]	valid_0's average_precision: 0.677656
Early stopping, best iteration is:
[73]	valid_0's average_precision: 0.680968
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.650636
[100]	valid_0's average_precision: 0.665222
[150]	valid_0's average_precision: 0.674329
[200]	valid_0's average_precision: 0.683186
[250]	valid_0's average_precision: 0.685536
[300]	valid_0's average_precision: 0.685243
[350]	valid_0's average_precision: 0.683019
[400]	valid_0's average_precision: 0.683937


[I 2025-12-09 14:46:17,142] Trial 18 finished with value: 0.6859261316525435 and parameters: {'learning_rate': 0.04525788185174506, 'n_estimators': 2600, 'num_leaves': 197, 'max_depth': 6, 'min_child_samples': 78, 'subsample': 0.6893281617223777, 'colsample_bytree': 0.9347650346135907, 'reg_alpha': 0.058236980239855074, 'reg_lambda': 0.0002875635681518399, 'min_split_gain': 0.04411923996036053}. Best is trial 13 with value: 0.6922081052076836.


Early stopping, best iteration is:
[240]	valid_0's average_precision: 0.685926
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.639362
[100]	valid_0's average_precision: 0.646999
[150]	valid_0's average_precision: 0.654103
[200]	valid_0's average_precision: 0.658954
[250]	valid_0's average_precision: 0.664097
[300]	valid_0's average_precision: 0.668155
[350]	valid_0's average_precision: 0.669002
[400]	valid_0's average_precision: 0.673648
[450]	valid_0's average_precision: 0.675185
[500]	valid_0's average_precision: 0.678257
[550]	valid_0's average_precision: 0.680624
[600]	valid_0's average_precision: 0.680932
[650]	valid_0's average_precision: 0.680897
[700]	valid_0's average_precision: 0.681933
[750]	valid_0's average_precision: 0.682096
[800]	valid_0's average_precision: 0.682545
[850]	valid_0's average_precision: 0.68205
[900]	valid_0's average_precision: 0.681961


[I 2025-12-09 14:46:18,812] Trial 19 finished with value: 0.6827522784178828 and parameters: {'learning_rate': 0.0204496765699653, 'n_estimators': 2250, 'num_leaves': 33, 'max_depth': 4, 'min_child_samples': 109, 'subsample': 0.6591641187804929, 'colsample_bytree': 0.866226768018493, 'reg_alpha': 0.5353725497629153, 'reg_lambda': 0.00512395342222733, 'min_split_gain': 0.287559581283407}. Best is trial 13 with value: 0.6922081052076836.


[950]	valid_0's average_precision: 0.681993
Early stopping, best iteration is:
[790]	valid_0's average_precision: 0.682752
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.674464
[100]	valid_0's average_precision: 0.687748
[150]	valid_0's average_precision: 0.677735
[200]	valid_0's average_precision: 0.673473
[250]	valid_0's average_precision: 0.668267
Early stopping, best iteration is:
[80]	valid_0's average_precision: 0.690169


[I 2025-12-09 14:46:20,973] Trial 20 finished with value: 0.6901688974397252 and parameters: {'learning_rate': 0.07226639920449827, 'n_estimators': 2550, 'num_leaves': 71, 'max_depth': -1, 'min_child_samples': 98, 'subsample': 0.605297901568919, 'colsample_bytree': 0.7890836132067094, 'reg_alpha': 0.004132175623442268, 'reg_lambda': 0.0009805018679087218, 'min_split_gain': 0.07123314947054762}. Best is trial 13 with value: 0.6922081052076836.


Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.671246
[100]	valid_0's average_precision: 0.681638


[I 2025-12-09 14:46:22,339] Trial 21 finished with value: 0.6830664062793997 and parameters: {'learning_rate': 0.05584558535090659, 'n_estimators': 2200, 'num_leaves': 52, 'max_depth': -1, 'min_child_samples': 89, 'subsample': 0.6023163306184741, 'colsample_bytree': 0.7888307653615044, 'reg_alpha': 0.0003418157305580054, 'reg_lambda': 0.00580376845173071, 'min_split_gain': 0.4873171861017841}. Best is trial 13 with value: 0.6922081052076836.


[150]	valid_0's average_precision: 0.677007
[200]	valid_0's average_precision: 0.678192
[250]	valid_0's average_precision: 0.678192
Early stopping, best iteration is:
[84]	valid_0's average_precision: 0.683066
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.671678
[100]	valid_0's average_precision: 0.68583
[150]	valid_0's average_precision: 0.678915
[200]	valid_0's average_precision: 0.672307
[250]	valid_0's average_precision: 0.666681


[I 2025-12-09 14:46:25,823] Trial 22 finished with value: 0.685846433538468 and parameters: {'learning_rate': 0.07168456287527604, 'n_estimators': 2150, 'num_leaves': 89, 'max_depth': -1, 'min_child_samples': 96, 'subsample': 0.6449774839393205, 'colsample_bytree': 0.8353014619352742, 'reg_alpha': 0.014620699232622705, 'reg_lambda': 0.000442919238872098, 'min_split_gain': 0.022458110239471696}. Best is trial 13 with value: 0.6922081052076836.


Early stopping, best iteration is:
[98]	valid_0's average_precision: 0.685846
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.681824
[100]	valid_0's average_precision: 0.688774
[150]	valid_0's average_precision: 0.690137
[200]	valid_0's average_precision: 0.685243
[250]	valid_0's average_precision: 0.679766


[I 2025-12-09 14:46:27,078] Trial 23 finished with value: 0.6937671420019428 and parameters: {'learning_rate': 0.17988556126799463, 'n_estimators': 2700, 'num_leaves': 44, 'max_depth': 6, 'min_child_samples': 76, 'subsample': 0.6708774513067567, 'colsample_bytree': 0.7430244354277076, 'reg_alpha': 0.00010132038587194589, 'reg_lambda': 0.007185480757400133, 'min_split_gain': 0.047588177386444504}. Best is trial 23 with value: 0.6937671420019428.


[300]	valid_0's average_precision: 0.675301
Early stopping, best iteration is:
[135]	valid_0's average_precision: 0.693767
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.676943
[100]	valid_0's average_precision: 0.683306
[150]	valid_0's average_precision: 0.679542
[200]	valid_0's average_precision: 0.678233


[I 2025-12-09 14:46:28,251] Trial 24 finished with value: 0.6864966991036772 and parameters: {'learning_rate': 0.1516153316113088, 'n_estimators': 2700, 'num_leaves': 23, 'max_depth': 10, 'min_child_samples': 35, 'subsample': 0.7817494093209323, 'colsample_bytree': 0.840412009100321, 'reg_alpha': 0.00036953986635734486, 'reg_lambda': 0.0032708341675769465, 'min_split_gain': 0.10938694187575364}. Best is trial 23 with value: 0.6937671420019428.


[250]	valid_0's average_precision: 0.678021
Early stopping, best iteration is:
[85]	valid_0's average_precision: 0.686497
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.673985
[100]	valid_0's average_precision: 0.689181
[150]	valid_0's average_precision: 0.681507
[200]	valid_0's average_precision: 0.680428


[I 2025-12-09 14:46:29,108] Trial 25 finished with value: 0.6898070774498181 and parameters: {'learning_rate': 0.134048869237695, 'n_estimators': 2400, 'num_leaves': 66, 'max_depth': 6, 'min_child_samples': 154, 'subsample': 0.7046204922935544, 'colsample_bytree': 0.691126505587233, 'reg_alpha': 0.00031443221670244035, 'reg_lambda': 0.012232748735371628, 'min_split_gain': 0.1027844907809374}. Best is trial 23 with value: 0.6937671420019428.


[250]	valid_0's average_precision: 0.684215
Early stopping, best iteration is:
[90]	valid_0's average_precision: 0.689807
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.663888
[100]	valid_0's average_precision: 0.679662
[150]	valid_0's average_precision: 0.67751
[200]	valid_0's average_precision: 0.677562


[I 2025-12-09 14:46:30,531] Trial 26 finished with value: 0.6800945550622964 and parameters: {'learning_rate': 0.08488809213830671, 'n_estimators': 2650, 'num_leaves': 130, 'max_depth': 6, 'min_child_samples': 67, 'subsample': 0.7060846722925868, 'colsample_bytree': 0.8354677762395447, 'reg_alpha': 0.0009820433419143397, 'reg_lambda': 0.09605802729928539, 'min_split_gain': 0.17967898479600297}. Best is trial 23 with value: 0.6937671420019428.


[250]	valid_0's average_precision: 0.672
[300]	valid_0's average_precision: 0.672936
Early stopping, best iteration is:
[102]	valid_0's average_precision: 0.680095
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.673582
[100]	valid_0's average_precision: 0.678796
[150]	valid_0's average_precision: 0.682778
[200]	valid_0's average_precision: 0.687159
[250]	valid_0's average_precision: 0.689226
[300]	valid_0's average_precision: 0.691291
[350]	valid_0's average_precision: 0.693168
[400]	valid_0's average_precision: 0.692776
[450]	valid_0's average_precision: 0.689599
[500]	valid_0's average_precision: 0.687842


[I 2025-12-09 14:46:32,466] Trial 27 finished with value: 0.6940357332343345 and parameters: {'learning_rate': 0.04362872946154633, 'n_estimators': 2950, 'num_leaves': 84, 'max_depth': 6, 'min_child_samples': 39, 'subsample': 0.6150719443388198, 'colsample_bytree': 0.6495808754701029, 'reg_alpha': 0.00024231919624716984, 'reg_lambda': 0.001223974597996582, 'min_split_gain': 0.01893996490776941}. Best is trial 27 with value: 0.6940357332343345.


[550]	valid_0's average_precision: 0.68592
Early stopping, best iteration is:
[381]	valid_0's average_precision: 0.694036
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.669526
[100]	valid_0's average_precision: 0.674919
[150]	valid_0's average_precision: 0.674697
[200]	valid_0's average_precision: 0.675197
[250]	valid_0's average_precision: 0.679221
[300]	valid_0's average_precision: 0.681473
[350]	valid_0's average_precision: 0.685317
[400]	valid_0's average_precision: 0.686886
[450]	valid_0's average_precision: 0.686188
[500]	valid_0's average_precision: 0.687436
[550]	valid_0's average_precision: 0.688697
[600]	valid_0's average_precision: 0.688506
[650]	valid_0's average_precision: 0.689914
[700]	valid_0's average_precision: 0.689413
[750]	valid_0's average_precision: 0.689573


[I 2025-12-09 14:46:35,426] Trial 28 finished with value: 0.6903353815430644 and parameters: {'learning_rate': 0.020830362984007166, 'n_estimators': 2500, 'num_leaves': 118, 'max_depth': 6, 'min_child_samples': 19, 'subsample': 0.6818221021419233, 'colsample_bytree': 0.6120217137832059, 'reg_alpha': 0.014279518310770114, 'reg_lambda': 0.001269625992510396, 'min_split_gain': 0.06443704582651885}. Best is trial 27 with value: 0.6940357332343345.


[800]	valid_0's average_precision: 0.689224
[850]	valid_0's average_precision: 0.689654
Early stopping, best iteration is:
[658]	valid_0's average_precision: 0.690335
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.675675
[100]	valid_0's average_precision: 0.677451
[150]	valid_0's average_precision: 0.67651
[200]	valid_0's average_precision: 0.676849


[I 2025-12-09 14:46:38,415] Trial 29 finished with value: 0.6793937264850278 and parameters: {'learning_rate': 0.07209884338494811, 'n_estimators': 2600, 'num_leaves': 181, 'max_depth': 10, 'min_child_samples': 19, 'subsample': 0.6192431059200858, 'colsample_bytree': 0.6314745815632127, 'reg_alpha': 0.0001803066483556883, 'reg_lambda': 0.0053472754515608185, 'min_split_gain': 0.04976359843181262}. Best is trial 27 with value: 0.6940357332343345.


[250]	valid_0's average_precision: 0.675473
Early stopping, best iteration is:
[71]	valid_0's average_precision: 0.679394
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.671484
[100]	valid_0's average_precision: 0.683208
[150]	valid_0's average_precision: 0.686076
[200]	valid_0's average_precision: 0.684289
[250]	valid_0's average_precision: 0.682363
[300]	valid_0's average_precision: 0.681224


[I 2025-12-09 14:46:40,037] Trial 30 finished with value: 0.6878437444420145 and parameters: {'learning_rate': 0.06817630476056928, 'n_estimators': 2700, 'num_leaves': 36, 'max_depth': 8, 'min_child_samples': 51, 'subsample': 0.694731196260598, 'colsample_bytree': 0.6492666311404743, 'reg_alpha': 0.0002671864962099014, 'reg_lambda': 0.008723635787826842, 'min_split_gain': 0.10487058039352706}. Best is trial 27 with value: 0.6940357332343345.


Early stopping, best iteration is:
[141]	valid_0's average_precision: 0.687844
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.683897
[100]	valid_0's average_precision: 0.687873


[I 2025-12-09 14:46:40,688] Trial 31 finished with value: 0.6911917564819916 and parameters: {'learning_rate': 0.14403221776025815, 'n_estimators': 2550, 'num_leaves': 88, 'max_depth': 4, 'min_child_samples': 19, 'subsample': 0.6122307388993724, 'colsample_bytree': 0.8853299679820834, 'reg_alpha': 0.04141952314781954, 'reg_lambda': 0.0016696504765061544, 'min_split_gain': 0.32212421803682956}. Best is trial 27 with value: 0.6940357332343345.


[150]	valid_0's average_precision: 0.68454
[200]	valid_0's average_precision: 0.68278
[250]	valid_0's average_precision: 0.68278
Early stopping, best iteration is:
[78]	valid_0's average_precision: 0.691192
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.666917


[I 2025-12-09 14:46:41,481] Trial 32 finished with value: 0.6835260845348519 and parameters: {'learning_rate': 0.16517265200404338, 'n_estimators': 2600, 'num_leaves': 80, 'max_depth': 4, 'min_child_samples': 19, 'subsample': 0.6316274010382068, 'colsample_bytree': 0.9455562370804077, 'reg_alpha': 0.005922912094477865, 'reg_lambda': 0.004223683653328875, 'min_split_gain': 0.5140044725357775}. Best is trial 27 with value: 0.6940357332343345.


[100]	valid_0's average_precision: 0.680566
[150]	valid_0's average_precision: 0.681854
[200]	valid_0's average_precision: 0.681854
[250]	valid_0's average_precision: 0.681854
Early stopping, best iteration is:
[94]	valid_0's average_precision: 0.683526
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.677002
[100]	valid_0's average_precision: 0.689236
[150]	valid_0's average_precision: 0.691241
[200]	valid_0's average_precision: 0.69141
[250]	valid_0's average_precision: 0.689054


[I 2025-12-09 14:46:42,279] Trial 33 finished with value: 0.694277612698997 and parameters: {'learning_rate': 0.15678602416893928, 'n_estimators': 2800, 'num_leaves': 45, 'max_depth': 4, 'min_child_samples': 85, 'subsample': 0.6178428016850243, 'colsample_bytree': 0.752256862165041, 'reg_alpha': 0.00019797157674667282, 'reg_lambda': 0.03782582342164131, 'min_split_gain': 0.025267229083416713}. Best is trial 33 with value: 0.694277612698997.


[300]	valid_0's average_precision: 0.687669
[350]	valid_0's average_precision: 0.685665
Early stopping, best iteration is:
[169]	valid_0's average_precision: 0.694278
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.677944
[100]	valid_0's average_precision: 0.685818
[150]	valid_0's average_precision: 0.679097
[200]	valid_0's average_precision: 0.676799
[250]	valid_0's average_precision: 0.672709


[I 2025-12-09 14:46:43,012] Trial 34 finished with value: 0.6879969675864455 and parameters: {'learning_rate': 0.18715267807355368, 'n_estimators': 1800, 'num_leaves': 96, 'max_depth': 4, 'min_child_samples': 56, 'subsample': 0.6667380383542079, 'colsample_bytree': 0.7197188276286746, 'reg_alpha': 0.0005893563794702811, 'reg_lambda': 0.11622950123613142, 'min_split_gain': 0.032082759191435765}. Best is trial 33 with value: 0.694277612698997.


Early stopping, best iteration is:
[86]	valid_0's average_precision: 0.687997
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.648143
[100]	valid_0's average_precision: 0.649789
[150]	valid_0's average_precision: 0.655405
[200]	valid_0's average_precision: 0.659221
[250]	valid_0's average_precision: 0.662913
[300]	valid_0's average_precision: 0.666901
[350]	valid_0's average_precision: 0.667414
[400]	valid_0's average_precision: 0.669524
[450]	valid_0's average_precision: 0.672953
[500]	valid_0's average_precision: 0.675267
[550]	valid_0's average_precision: 0.67737
[600]	valid_0's average_precision: 0.678293
[650]	valid_0's average_precision: 0.680446
[700]	valid_0's average_precision: 0.682244
[750]	valid_0's average_precision: 0.684842
[800]	valid_0's average_precision: 0.68636
[850]	valid_0's average_precision: 0.687945
[900]	valid_0's average_precision: 0.688059
[950]	valid_0's average_precision: 0.687996
[1000]	valid_0's average_pr

[I 2025-12-09 14:46:45,688] Trial 35 finished with value: 0.6918427594251736 and parameters: {'learning_rate': 0.01570200993126221, 'n_estimators': 3000, 'num_leaves': 69, 'max_depth': 4, 'min_child_samples': 37, 'subsample': 0.6177680062239366, 'colsample_bytree': 0.7399345606198131, 'reg_alpha': 0.0003086541340937849, 'reg_lambda': 0.03146836029100747, 'min_split_gain': 0.018609338794905356}. Best is trial 33 with value: 0.694277612698997.


[1550]	valid_0's average_precision: 0.690272
[1600]	valid_0's average_precision: 0.690361
Early stopping, best iteration is:
[1411]	valid_0's average_precision: 0.691843
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.669681
[100]	valid_0's average_precision: 0.68309
[150]	valid_0's average_precision: 0.691147
[200]	valid_0's average_precision: 0.694647
[250]	valid_0's average_precision: 0.69566
[300]	valid_0's average_precision: 0.69594
[350]	valid_0's average_precision: 0.693782


[I 2025-12-09 14:46:46,750] Trial 36 finished with value: 0.6964347470013181 and parameters: {'learning_rate': 0.10766792175362039, 'n_estimators': 2750, 'num_leaves': 34, 'max_depth': 4, 'min_child_samples': 109, 'subsample': 0.7334701365259861, 'colsample_bytree': 0.7909280979547718, 'reg_alpha': 0.0008825496524176745, 'reg_lambda': 0.2832044524407928, 'min_split_gain': 0.009848973222124005}. Best is trial 36 with value: 0.6964347470013181.


[400]	valid_0's average_precision: 0.692243
Early stopping, best iteration is:
[236]	valid_0's average_precision: 0.696435
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.673255
[100]	valid_0's average_precision: 0.689808
[150]	valid_0's average_precision: 0.68427
[200]	valid_0's average_precision: 0.684468
[250]	valid_0's average_precision: 0.67945


[I 2025-12-09 14:46:47,479] Trial 37 finished with value: 0.6904881840219965 and parameters: {'learning_rate': 0.1770650514696194, 'n_estimators': 2800, 'num_leaves': 21, 'max_depth': 4, 'min_child_samples': 69, 'subsample': 0.6498930572522668, 'colsample_bytree': 0.85505616963291, 'reg_alpha': 0.002722860004096894, 'reg_lambda': 0.3095416105134431, 'min_split_gain': 0.045594663951182314}. Best is trial 36 with value: 0.6964347470013181.


[300]	valid_0's average_precision: 0.679321
Early stopping, best iteration is:
[102]	valid_0's average_precision: 0.690488
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.659568
[100]	valid_0's average_precision: 0.674276
[150]	valid_0's average_precision: 0.683339
[200]	valid_0's average_precision: 0.685121
[250]	valid_0's average_precision: 0.683872
[300]	valid_0's average_precision: 0.683449
[350]	valid_0's average_precision: 0.684369


[I 2025-12-09 14:46:48,252] Trial 38 finished with value: 0.6856806196633409 and parameters: {'learning_rate': 0.09439820685378629, 'n_estimators': 2000, 'num_leaves': 18, 'max_depth': 4, 'min_child_samples': 144, 'subsample': 0.7064279998953301, 'colsample_bytree': 0.7057054573600376, 'reg_alpha': 0.014523636493984624, 'reg_lambda': 0.9084844724424889, 'min_split_gain': 0.1276135795342389}. Best is trial 36 with value: 0.6964347470013181.


Early stopping, best iteration is:
[194]	valid_0's average_precision: 0.685681
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.676086
[100]	valid_0's average_precision: 0.685383
[150]	valid_0's average_precision: 0.684673
[200]	valid_0's average_precision: 0.684529
[250]	valid_0's average_precision: 0.684529


[I 2025-12-09 14:46:49,092] Trial 39 finished with value: 0.6863925291944197 and parameters: {'learning_rate': 0.10444636959696961, 'n_estimators': 2500, 'num_leaves': 55, 'max_depth': 6, 'min_child_samples': 56, 'subsample': 0.6047426688945986, 'colsample_bytree': 0.6359589101899661, 'reg_alpha': 0.00043006707647222093, 'reg_lambda': 0.00015744078163520723, 'min_split_gain': 0.35983464423894834}. Best is trial 36 with value: 0.6964347470013181.


[300]	valid_0's average_precision: 0.684529
Early stopping, best iteration is:
[132]	valid_0's average_precision: 0.686393
[OPTUNA] Mejor AP(val): 0.696435
[OPTUNA] Params ganadores: {'learning_rate': 0.10766792175362039, 'n_estimators': 2750, 'num_leaves': 34, 'max_depth': 4, 'min_child_samples': 109, 'subsample': 0.7334701365259861, 'colsample_bytree': 0.7909280979547718, 'reg_alpha': 0.0008825496524176745, 'reg_lambda': 0.2832044524407928, 'min_split_gain': 0.009848973222124005}
[OPTUNA] best_iteration (del trial): 236
[OPTUNA] Guardado BEST en: BEST_LGBM_FULL_SMOTENC.json
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.669681
[100]	valid_0's average_precision: 0.68309
[150]	valid_0's average_precision: 0.691147
[200]	valid_0's average_precision: 0.694647
[250]	valid_0's average_precision: 0.69566
[300]	valid_0's average_precision: 0.69594
[350]	valid_0's average_precision: 0.693782
[400]	valid_0's average_precision: 0.692243
Early s

8 — Cross-Validation (OOF) para baseline y tuned

In [60]:
def run_oof_cv(model_params, X, y, k_folds=CV_FOLDS, seed=RANDOM_STATE, exp_suffix="BASELINE", categorical_mask=None):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=seed)
    oof_proba = np.zeros_like(y, dtype=float)
    fold_rows = []

    base_params = dict(model_params)
    base_params.pop("verbose", None)
    base_params.setdefault("verbosity", -1)
    base_params.setdefault("metric", "average_precision")

    _fit_kwargs_local = {}
    _callbacks_local = []
    if _LGBM_CB_OK:
        _callbacks_local = [early_stopping(stopping_rounds=200), log_evaluation(period=50)]
    else:
        _fit_kwargs_local["early_stopping_rounds"] = 200

    for f, (tr_idx, va_idx) in enumerate(skf.split(X, y), 1):
        X_tr, X_va = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        # Balanceo SOLO en train del fold
        if USE_BALANCED_TRAIN and BALANCE_IN_CV:
            X_tr, y_tr = maybe_resample(X_tr, y_tr, categorical_mask=categorical_mask)

        mdl = LGBMClassifier(**base_params)
        _fit_call = dict(
            eval_set=[(X_va, y_va)],
            eval_metric="aucpr",
            **_fit_kwargs_local
        )
        if _LGBM_CB_OK:
            _fit_call["callbacks"] = _callbacks_local

        mdl.fit(X_tr, y_tr, **_fit_call)
        proba_va = pred_proba_best(mdl, X_va)
        oof_proba[va_idx] = proba_va

        fold_rows.append({
            "fold": f,
            "pr_auc": average_precision_score(y_va, proba_va),
            "roc_auc": roc_auc_score(y_va, proba_va)
        })

    oof_pr = average_precision_score(y, oof_proba)
    oof_roc = roc_auc_score(y, oof_proba)
    thr_oof, _ = find_best_threshold(y, oof_proba, metric="f1")
    y_oof_pred = (oof_proba >= thr_oof).astype(int)
    oof_f1  = f1_score(y, y_oof_pred, zero_division=0)
    oof_rec = recall_score(y, y_oof_pred, zero_division=0)
    oof_bal = balanced_accuracy_score(y, y_oof_pred)

    cv_tag = f"{EXP_NAME}_{exp_suffix}_CV{CV_FOLDS}"
    cv_csv = OUT_RESULTS / f"cv_summary_{cv_tag}.csv"
    folds_df = pd.DataFrame(fold_rows)
    agg_row = pd.DataFrame([{
        "fold": "OOF", "pr_auc": oof_pr, "roc_auc": oof_roc,
        "thr": thr_oof, "f1": oof_f1, "recall": oof_rec, "bal_acc": oof_bal
    }])
    cv_df = pd.concat([folds_df, agg_row], ignore_index=True)
    cv_df.to_csv(cv_csv, index=False)

    oof_path = OUT_PREDS / f"oof_{cv_tag}.parquet"
    pd.DataFrame({"oof_proba": oof_proba, "y_true": y}).to_parquet(oof_path, index=False)

    print(f"[CV-{exp_suffix}] Guardados: {cv_csv.name} | {oof_path.name}")
    return {"oof_pr_auc": oof_pr, "oof_roc_auc": oof_roc, "thr": thr_oof,
            "oof_f1": oof_f1, "oof_recall": oof_rec, "oof_bal_acc": oof_bal}

9 — Evaluación en test + guardados

In [61]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, average_precision_score, roc_curve, roc_auc_score, confusion_matrix

def plot_pr_curve(y_true, y_proba, title, out_path):
    prec, rec, _ = precision_recall_curve(y_true, y_proba)
    ap = average_precision_score(y_true, y_proba)
    plt.figure(figsize=(6,5))
    plt.step(rec, prec, where='post')
    plt.xlabel('Recall'); plt.ylabel('Precision')
    plt.title(f'{title} (AP={ap:.4f})')
    plt.grid(True, linestyle='--', alpha=.3)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150); plt.close()

def plot_roc_curve(y_true, y_proba, title, out_path):
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    auc = roc_auc_score(y_true, y_proba)
    plt.figure(figsize=(6,5))
    plt.plot(fpr, tpr, lw=2)
    plt.plot([0,1],[0,1], 'k--', lw=1)
    plt.xlabel('False Positive Rate'); plt.ylabel('True Positive Rate')
    plt.title(f'{title} (AUC={auc:.4f})')
    plt.grid(True, linestyle='--', alpha=.3)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150); plt.close()

def plot_confusion(y_true, y_pred, title, out_path, normalize=False):
    norm = 'true' if normalize else None
    cm = confusion_matrix(y_true, y_pred, normalize=norm)
    plt.figure(figsize=(5,4))
    im = plt.imshow(cm, interpolation='nearest', cmap='Blues')
    plt.title(title)
    plt.colorbar(im, fraction=0.046, pad=0.04)
    ticks = np.arange(2)
    plt.xticks(ticks, ['0','1']); plt.yticks(ticks, ['0','1'])
    thresh = cm.max()/2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            txt = f'{cm[i,j]:.2f}' if normalize else str(cm[i,j])
            plt.text(j, i, txt, ha='center', va='center',
                     color='white' if cm[i,j] > thresh else 'black')
    plt.ylabel('True label'); plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.savefig(out_path, dpi=150); plt.close()

base = EXP_NAME

# === BASELINE ===
proba_val = pred_proba_best(model, X_val_fit)
proba_test = pred_proba_best(model, X_test_fit)

# CV baseline usando las features y máscara efectivas
cv_base = run_oof_cv(seed_params, X_train_fit, y_train, exp_suffix="BASELINE", categorical_mask=CAT_MASK_USED)
thr_oof = cv_base["thr"]
thr_used = thr_oof

y_pred_test = (proba_test >= thr_used).astype(int)
test_metrics = compute_all_metrics(y_test, proba_test, thr_used)

# Guardar HP baseline
params_seed_path = OUT_PARAMS / f"{base}_BASE_seed_params.json"
with open(params_seed_path, "w", encoding="utf-8") as f:
    json.dump(seed_params, f, indent=2, ensure_ascii=False)

params_fitted_path = OUT_PARAMS / f"{base}_BASE_fitted_params.json"
with open(params_fitted_path, "w", encoding="utf-8") as f:
    json.dump(model.get_params(), f, indent=2, ensure_ascii=False)

# Figuras baseline
plot_pr_curve(y_val,  proba_val,  f"{base} — PR (val)",  OUT_FIGS / f"{base}_pr_val.png")
plot_pr_curve(y_test, proba_test, f"{base} — PR (test)", OUT_FIGS / f"{base}_pr_test.png")
plot_roc_curve(y_val,  proba_val,  f"{base} — ROC (val)",  OUT_FIGS / f"{base}_roc_val.png")
plot_roc_curve(y_test, proba_test, f"{base} — ROC (test)", OUT_FIGS / f"{base}_roc_test.png")
plot_confusion(y_test, y_pred_test,
               f"{base} — Confusion (test @thr_used={thr_used:.3f})",
               OUT_FIGS / f"{base}_cm_test.png")

# Importancias baseline
try:
    imp_gain = model.booster_.feature_importance(importance_type="gain")
except Exception:
    imp_gain = model.feature_importances_
imp_df = pd.DataFrame({"feature": feature_names_used[:len(imp_gain)], "importance_gain": imp_gain})\
         .sort_values("importance_gain", ascending=False)
imp_path = OUT_RESULTS / f"{base}_feature_importances.csv"
imp_df.to_csv(imp_path, index=False)

# Preds test baseline
preds_path = OUT_PREDS / f"preds_test_{base}.parquet"
pd.DataFrame({"proba": proba_test, "y_true": y_test}).to_parquet(preds_path, index=False)

# Registro de resultados baseline
row_base = {
    "model": base,
    "thr_val": thr_val,
    "thr_oof": thr_oof,
    "thr_used": thr_used,
    "val_pr_auc": val_metrics["pr_auc"],
    "val_roc_auc": val_metrics["roc_auc"],
    "val_precision": val_metrics["precision"],
    "val_f1": val_metrics["f1"],
    "val_recall": val_metrics["recall"],
    "val_bal_acc": val_metrics["bal_acc"],
    "test_pr_auc": test_metrics["pr_auc"],
    "test_roc_auc": test_metrics["roc_auc"],
    "test_precision": test_metrics["precision"],
    "test_f1": test_metrics["f1"],
    "test_recall": test_metrics["recall"],
    "test_bal_acc": test_metrics["bal_acc"],
    "best_iteration": getattr(model, "best_iteration_", None)
}
res_csv = OUT_RESULTS / "baselines.csv"
pd.DataFrame([row_base]).to_csv(
    res_csv,
    mode=("a" if res_csv.exists() else "w"),
    index=False,
    header=not res_csv.exists()
)

print("[OK][BASE] Guardados:",
      "\n  - Seed HPs   :", params_seed_path.name,
      "\n  - Fitted HPs :", params_fitted_path.name,
      "\n  - Importancias:", imp_path.name,
      "\n  - Preds test  :", preds_path.name,
      "\n  - Baselines   :", res_csv.name)

# === TUNED ===
if tuned_model is not None:
    proba_val_tuned  = pred_proba_best(tuned_model, X_val_fit)
    thr_val_tuned, _ = find_best_threshold(y_val, proba_val_tuned, metric="f1")
    val_metrics_tuned = compute_all_metrics(y_val, proba_val_tuned, thr_val_tuned)

    proba_test_tuned = pred_proba_best(tuned_model, X_test_fit)

    # CV para tuned
    cv_tuned = run_oof_cv(tuned_model.get_params(), X_train_fit, y_train, exp_suffix="TUNED", categorical_mask=CAT_MASK_USED)
    thr_oof_tuned = cv_tuned["thr"]
    thr_used_tuned = thr_oof_tuned

    y_pred_test_tuned = (proba_test_tuned >= thr_used_tuned).astype(int)
    test_metrics_tuned = compute_all_metrics(y_test, proba_test_tuned, thr_used_tuned)

    tuned_fitted_path = OUT_PARAMS / f"{base}_TUNED_fitted_params.json"
    with open(tuned_fitted_path, "w", encoding="utf-8") as f:
        json.dump(tuned_model.get_params(), f, indent=2, ensure_ascii=False)

    base_t = base + "_TUNED"
    plot_pr_curve(y_val,  proba_val_tuned,  f"{base_t} — PR (val)",  OUT_FIGS / f"{base_t}_pr_val.png")
    plot_pr_curve(y_test, proba_test_tuned, f"{base_t} — PR (test)", OUT_FIGS / f"{base_t}_pr_test.png")
    plot_roc_curve(y_val,  proba_val_tuned,  f"{base_t} — ROC (val)",  OUT_FIGS / f"{base_t}_roc_val.png")
    plot_roc_curve(y_test, proba_test_tuned, f"{base_t} — ROC (test)", OUT_FIGS / f"{base_t}_roc_test.png")
    plot_confusion(y_test, y_pred_test_tuned,
                   f"{base_t} — Confusion (test @thr_used={thr_used_tuned:.3f})",
                   OUT_FIGS / f"{base_t}_cm_test.png")

    try:
        imp_gain_t = tuned_model.booster_.feature_importance(importance_type="gain")
    except Exception:
        imp_gain_t = tuned_model.feature_importances_
    imp_t_path = OUT_RESULTS / f"{base_t}_feature_importances.csv"
    pd.DataFrame({"feature": feature_names_used[:len(imp_gain_t)], "importance_gain": imp_gain_t})\
      .sort_values("importance_gain", ascending=False)\
      .to_csv(imp_t_path, index=False)

    preds_t_path = OUT_PREDS / f"preds_test_{base_t}.parquet"
    pd.DataFrame({"proba": proba_test_tuned, "y_true": y_test}).to_parquet(preds_t_path, index=False)

    row_t = {
        "model": base_t,
        "thr_val": thr_val_tuned,
        "thr_oof": thr_oof_tuned,
        "thr_used": thr_used_tuned,
        "val_pr_auc": val_metrics_tuned["pr_auc"],
        "val_roc_auc": val_metrics_tuned["roc_auc"],
        "val_precision": val_metrics_tuned["precision"],
        "val_f1": val_metrics_tuned["f1"],
        "val_recall": val_metrics_tuned["recall"],
        "val_bal_acc": val_metrics_tuned["bal_acc"],
        "test_pr_auc": test_metrics_tuned["pr_auc"],
        "test_roc_auc": test_metrics_tuned["roc_auc"],
        "test_precision": test_metrics_tuned["precision"],
        "test_f1": test_metrics_tuned["f1"],
        "test_recall": test_metrics_tuned["recall"],
        "test_bal_acc": test_metrics_tuned["bal_acc"],
        "best_iteration": getattr(tuned_model, "best_iteration_", None)
    }
    pd.DataFrame([row_t]).to_csv(
        res_csv,
        mode=("a" if res_csv.exists() else "w"),
        index=False,
        header=not res_csv.exists()
    )

    print("[OK][TUNED] Guardados:",
          "\n  - Fitted HPs :", tuned_fitted_path.name,
          "\n  - Importancias:", imp_t_path.name,
          "\n  - Preds test  :", preds_t_path.name,
          "\n  - Baselines   :", res_csv.name)

Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.676361
[100]	valid_0's average_precision: 0.677282
[150]	valid_0's average_precision: 0.67827
[200]	valid_0's average_precision: 0.676592
[250]	valid_0's average_precision: 0.675391
Early stopping, best iteration is:
[56]	valid_0's average_precision: 0.680367
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.680816
[100]	valid_0's average_precision: 0.67861
[150]	valid_0's average_precision: 0.675773
[200]	valid_0's average_precision: 0.674951
[250]	valid_0's average_precision: 0.677869
Early stopping, best iteration is:
[66]	valid_0's average_precision: 0.682887
Training until validation scores don't improve for 200 rounds
[50]	valid_0's average_precision: 0.697127
[100]	valid_0's average_precision: 0.711845
[150]	valid_0's average_precision: 0.712725
[200]	valid_0's average_precision: 0.714068
[250]	valid_0's average_precision: 0.711009
[30

10 - Mejores Resultados

In [62]:
AGGREGATE_ALL_RUNS = False

def safe(v, fmt=".4f"):
    try:
        return f"{float(v):{fmt}}"
    except Exception:
        return "NA"

base_csv = OUT_RESULTS / "baselines.csv"
if not base_csv.exists():
    raise FileNotFoundError(f"No existe {base_csv}")

df = pd.read_csv(base_csv)

# Esquema final 
needed = [
    "model",
    "thr_val", "thr_oof", "thr_used",
    "val_pr_auc","val_roc_auc","val_precision","val_f1","val_recall","val_bal_acc",
    "test_pr_auc","test_roc_auc","test_precision","test_f1","test_recall","test_bal_acc",
    "best_iteration"
]

# Columnas faltantes
for c in needed:
    if c not in df.columns:
        df[c] = pd.NA

# Ordena
df = df[needed].copy()

# Convierte numéricas
num_cols = [c for c in needed if c not in ("model",)]
for c in num_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce")

if AGGREGATE_ALL_RUNS:
    root_art = ARTIF_DIR.parent
    for p in (root_art).glob("LGBM_*/results/baselines.csv"):
        if p == base_csv:
            continue
        try:
            d2 = pd.read_csv(p)
            for c in needed:
                if c not in d2.columns:
                    d2[c] = pd.NA
            d2 = d2[needed]
            for c in num_cols:
                d2[c] = pd.to_numeric(d2[c], errors="coerce")
            df = pd.concat([df, d2], ignore_index=True)
        except Exception:
            pass

if df.empty:
    raise ValueError("El dataframe de resultados está vacío.")

def best_by(metric):
    if metric not in df.columns or df[metric].dropna().empty:
        return None
    r = df.loc[df[metric].idxmax()]
    print(
        f"- {metric}: {r['model']} | "
        f"PR-AUC={safe(r['test_pr_auc'])} | "
        f"ROC-AUC={safe(r['test_roc_auc'])} | "
        f"F1={safe(r['test_f1'])} | "
        f"Recall={safe(r['test_recall'])} | "
        f"Precision={safe(r['test_precision'])} | "
        f"thr_used={safe(r['thr_used'], '.3f')} | "
        f"best_iter={int(r['best_iteration']) if pd.notna(r['best_iteration']) else 'NA'}"
    )
    return r

print("=== MEJORES EN TEST (por métrica) ===")
winners = {}
for m in ["test_pr_auc","test_roc_auc","test_recall","test_f1","test_precision"]:
    w = best_by(m)
    if w is not None: winners[m] = w

cv_files = list(OUT_RESULTS.glob("cv_summary_*_CV*.csv"))
if cv_files:
    print("\n=== RESUMEN CV-OOF (por experimento) ===")
    rows = []
    for f in cv_files:
        tag = re.sub(r"^cv_summary_|\.csv$", "", f.name)
        cv = pd.read_csv(f)
        oof = cv.loc[cv["fold"] == "OOF"]
        if not oof.empty:
            r = oof.iloc[0]
            rows.append({
                "tag": tag,
                "pr_auc": r.get("pr_auc"),
                "roc_auc": r.get("roc_auc"),
                "f1": r.get("f1"),
                "recall": r.get("recall"),
                "bal_acc": r.get("bal_acc"),
                "thr": r.get("thr"),
            })
    if rows:
        print(pd.DataFrame(rows).sort_values(["pr_auc","roc_auc"], ascending=False).to_string(index=False))
else:
    print("\n(No se hallaron archivos de CV para este experimento)")

# SOTA
SOTA = {"LGBM": {"AUC": 0.914, "Recall": 0.881, "Precision": 0.948}}
if "test_roc_auc" in winners:
    bt = winners["test_roc_auc"]
    d_auc = float(bt["test_roc_auc"]) - SOTA["LGBM"]["AUC"]
    d_rec = float(bt["test_recall"])   - SOTA["LGBM"]["Recall"]
    print("\n=== COMPARACIÓN SOTA vs. MEJOR TEST ===")
    print(f"Paper LGBM: AUC={SOTA['LGBM']['AUC']:.3f} | Recall={SOTA['LGBM']['Recall']:.3f} | Precision={SOTA['LGBM']['Precision']:.3f}")
    print(f"Mejor  : AUC={safe(bt['test_roc_auc'])} | Recall={safe(bt['test_recall'])} | Precision={safe(bt['test_precision'])}")
    print(f"Deltas    : ΔAUC={d_auc:+.3f} | ΔRecall={d_rec:+.3f}")

# Normalización con backup
backup = OUT_RESULTS / "baselines_legacy_backup.csv"
base_csv.replace(backup)
df.to_csv(base_csv, index=False)
print("[OK] Normalizado. Backup:", backup.name)

=== MEJORES EN TEST (por métrica) ===
- test_pr_auc: LGBM_FULL_SMOTENC_TUNED | PR-AUC=0.7037 | ROC-AUC=0.8599 | F1=0.6291 | Recall=0.5897 | Precision=0.6742 | thr_used=0.527 | best_iter=236
- test_roc_auc: LGBM_FULL_SMOTENC | PR-AUC=0.6979 | ROC-AUC=0.8617 | F1=0.6235 | Recall=0.5921 | Precision=0.6585 | thr_used=0.525 | best_iter=53
- test_recall: LGBM_FULL_SMOTENC | PR-AUC=0.6979 | ROC-AUC=0.8617 | F1=0.6235 | Recall=0.5921 | Precision=0.6585 | thr_used=0.525 | best_iter=53
- test_f1: LGBM_FULL_SMOTENC_TUNED | PR-AUC=0.7037 | ROC-AUC=0.8599 | F1=0.6291 | Recall=0.5897 | Precision=0.6742 | thr_used=0.527 | best_iter=236
- test_precision: LGBM_FULL_SMOTENC_TUNED | PR-AUC=0.7037 | ROC-AUC=0.8599 | F1=0.6291 | Recall=0.5897 | Precision=0.6742 | thr_used=0.527 | best_iter=236

=== RESUMEN CV-OOF (por experimento) ===
                           tag   pr_auc  roc_auc       f1   recall  bal_acc   thr
   LGBM_FULL_SMOTENC_TUNED_CV5 0.680747 0.851699 0.614302 0.642682 0.763773 0.527
LGBM_FULL_