# High-ROC AUC Tabular Neural Net (PyTorch) — Stratified K-Fold

This notebook is auto-generated to target **ROC AUC** for the Kaggle Playground S5E8 "Binary Classification with a Bank Dataset".  
It uses:
- A **neural network** (PyTorch) with **categorical embeddings** + **numeric standardization**
- **Stratified K-Fold** CV with **OOF** predictions
- **Early stopping** + **LR scheduler**
- Organized, timestamped **run folders** with metrics, models, logs, and **graphs**

> Tip: Put `train.csv`, `test.csv` in the working directory. Modify the **CONFIG** section if your columns differ.


In [9]:
# =========================
# CONFIG
# =========================
COMPETITION_NAME = "playground-series-s5e8"  # label only
ID_COL = "id"
TARGET_COL = "y"

# Files
TRAIN_PATH = "playground-series-s5e8/train.csv"
TEST_PATH = "playground-series-s5e8/test.csv" 

# CV
N_SPLITS = 5
RANDOM_SEED = 2025

# Model / Training
BATCH_SIZE = 4096
EPOCHS = 100
PATIENCE = 12                # early stopping
BASE_LR = 1e-3
WEIGHT_DECAY = 1e-5
HIDDEN_LAYERS = [512, 256, 128]
DROPOUT = 0.2
USE_CLASS_WEIGHTS = True

# Output (auto-filled below)
RUN_STAMP = None
RUN_DIR = None               # root for this run
FIGS_DIR = None
FOLDS_DIR = None
LOGS_DIR = None
MODELS_DIR = None
ARTIFACTS_DIR = None

VERBOSE_EVERY = 1
SAVE_OOF = True


In [10]:
# =========================
# IMPORTS & FOLDERS
# =========================
import os, gc, math, random, json, time
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, RocCurveDisplay
from sklearn.preprocessing import StandardScaler, LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(RANDOM_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Prepare organized run folders
RUN_STAMP = time.strftime("%Y-%m-%d_%H-%M-%S")
RUN_DIR = Path(f"runs/{RUN_STAMP}")
FIGS_DIR = RUN_DIR / "figs"
FOLDS_DIR = RUN_DIR / "folds"
LOGS_DIR = RUN_DIR / "logs"
MODELS_DIR = RUN_DIR / "models"
ARTIFACTS_DIR = RUN_DIR / "artifacts"

for d in [RUN_DIR, FIGS_DIR, FOLDS_DIR, LOGS_DIR, MODELS_DIR, ARTIFACTS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# Save config snapshot
cfg = dict(
    COMPETITION_NAME=COMPETITION_NAME,
    ID_COL=ID_COL,
    TARGET_COL=TARGET_COL,
    TRAIN_PATH=TRAIN_PATH,
    TEST_PATH=TEST_PATH,
    N_SPLITS=N_SPLITS,
    RANDOM_SEED=RANDOM_SEED,
    BATCH_SIZE=BATCH_SIZE,
    EPOCHS=EPOCHS,
    PATIENCE=PATIENCE,
    BASE_LR=BASE_LR,
    WEIGHT_DECAY=WEIGHT_DECAY,
    HIDDEN_LAYERS=HIDDEN_LAYERS,
    DROPOUT=DROPOUT,
    USE_CLASS_WEIGHTS=USE_CLASS_WEIGHTS,
    RUN_STAMP=RUN_STAMP,
)
with open(RUN_DIR / "config.json", "w") as f:
    json.dump(cfg, f, indent=2)

print("Run folder:", RUN_DIR.as_posix())


Device: cpu
Run folder: runs/2025-08-12_09-12-57


In [11]:
# =========================
# LOAD DATA
# =========================
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)
print("Train:", train.shape, "Test:", test.shape)

assert TARGET_COL in train.columns, f"TARGET_COL '{TARGET_COL}' missing"
assert ID_COL in train.columns, f"ID_COL '{ID_COL}' missing"
assert ID_COL in test.columns, f"ID_COL '{ID_COL}' missing in test"

feature_cols = [c for c in train.columns if c not in [TARGET_COL, ID_COL]]
missing_in_test = [c for c in feature_cols if c not in test.columns]
assert not missing_in_test, f"Missing features in test: {missing_in_test}"

y = train[TARGET_COL].values
print("Target positive rate:", y.mean().round(6))


Train: (750000, 18) Test: (250000, 17)
Target positive rate: 0.120651


### Feature typing & preprocessing

- **Categoricals**: object dtype or low-cardinality integers → label-encoded + **embeddings**  
- **Numerics**: standardized via `StandardScaler`  
- Rare categories combined into `__RARE__` for stability


In [12]:
# =========================
# FEATURE TYPING
# =========================
obj_cols = [c for c in feature_cols if train[c].dtype == 'object']
lowcard_int_cols = [c for c in feature_cols 
                    if str(train[c].dtype).startswith('int') and train[c].nunique() <= 30]
cat_cols = sorted(list(set(obj_cols + lowcard_int_cols)))
num_cols = sorted([c for c in feature_cols if c not in cat_cols])
print(f"Categoricals ({len(cat_cols)}):", cat_cols[:20])
print(f"Numerics ({len(num_cols)}):", num_cols[:20])

# Rare category handling
RARE_NAME = "__RARE__"
MIN_CAT_COUNT = 25

def apply_rare(series: pd.Series, min_count: int = MIN_CAT_COUNT) -> pd.Series:
    counts = series.value_counts()
    rare = counts[counts < min_count].index
    return series.where(~series.isin(rare), RARE_NAME)

# Label encoding fit on full (train+test) to keep indices aligned
encoders = {}
for c in cat_cols:
    s_tr = apply_rare(train[c].astype(str))
    s_te = apply_rare(test[c].astype(str))
    le = LabelEncoder()
    le.fit(pd.concat([s_tr, s_te], axis=0).fillna("NA"))
    encoders[c] = le
    train[c] = le.transform(s_tr.fillna("NA"))
    test[c]  = le.transform(s_te.fillna("NA"))

# Standardize numerics (fit on train only to avoid leakage)
scaler = None
if len(num_cols) > 0:
    scaler = StandardScaler()
    train[num_cols] = scaler.fit_transform(train[num_cols])
    test[num_cols] = scaler.transform(test[num_cols])

# Build categorical cardinalities list for embeddings
cat_cardinalities = [int(train[c].nunique()) for c in cat_cols]
cat_cardinalities


Categoricals (9): ['contact', 'default', 'education', 'housing', 'job', 'loan', 'marital', 'month', 'poutcome']
Numerics (7): ['age', 'balance', 'campaign', 'day', 'duration', 'pdays', 'previous']


[3, 2, 4, 2, 12, 2, 3, 12, 4]

In [13]:
# =========================
# DATASET
# =========================
class TabDataset(Dataset):
    def __init__(self, df, y=None, num_cols=None, cat_cols=None):
        self.num = df[num_cols].values.astype(np.float32) if num_cols else np.zeros((len(df),0), np.float32)
        self.cat = df[cat_cols].values.astype(np.int64) if cat_cols else np.zeros((len(df),0), np.int64)
        self.y = y.astype(np.float32) if y is not None else None

    def __len__(self):
        return len(self.num)

    def __getitem__(self, idx):
        if self.y is None:
            return self.num[idx], self.cat[idx]
        return self.num[idx], self.cat[idx], self.y[idx]


In [14]:
# =========================
# MODEL
# =========================
class TabularNN(nn.Module):
    def __init__(self, num_dim, cat_cardinalities, hidden_layers, dropout=0.2):
        super().__init__()
        self.has_cat = len(cat_cardinalities) > 0
        self.has_num = num_dim > 0

        # Embeddings
        if self.has_cat:
            emb_dims = []
            self.emb_layers = nn.ModuleList()
            for card in cat_cardinalities:
                emb_dim = int(min(50, max(4, round(1.6 * (card ** 0.56)))))  # heuristic
                self.emb_layers.append(nn.Embedding(card, emb_dim))
                emb_dims.append(emb_dim)
            emb_total = sum(emb_dims)
        else:
            emb_total = 0

        in_dim = (num_dim if self.has_num else 0) + emb_total

        layers = []
        prev = in_dim
        for h in hidden_layers:
            layers += [nn.Linear(prev, h), nn.ReLU(), nn.Dropout(dropout)]
            prev = h
        layers += [nn.Linear(prev, 1)]
        self.mlp = nn.Sequential(*layers)

    def forward(self, x_num, x_cat):
        feats = []
        if self.has_cat:
            embs = [emb(x_cat[:, i]) for i, emb in enumerate(self.emb_layers)]
            feats.append(torch.cat(embs, dim=1))
        if self.has_num:
            feats.append(x_num)
        x = torch.cat(feats, dim=1) if len(feats) > 1 else feats[0]
        logit = self.mlp(x).squeeze(1)
        return logit


In [15]:
# =========================
# TRAINING HELPERS
# =========================
def epoch_loop(model, loader, criterion, optimizer=None):
    is_train = optimizer is not None
    model.train() if is_train else model.eval()
    losses = []
    preds = []
    targs = []

    for batch in loader:
        if is_train:
            x_num, x_cat, y = batch
        else:
            try:
                x_num, x_cat, y = batch
            except:
                x_num, x_cat = batch
                y = None

        x_num = x_num.to(device)
        x_cat = x_cat.to(device)
        if y is not None:
            y = y.to(device)

        with torch.set_grad_enabled(is_train):
            logit = model(x_num, x_cat)
            prob = torch.sigmoid(logit)
            if y is not None:
                loss = criterion(logit, y)
            else:
                loss = None

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if loss is not None:
            losses.append(loss.item())
            targs.append(y.detach().cpu().numpy())
        preds.append(prob.detach().cpu().numpy())

    preds = np.concatenate(preds) if len(preds)>0 else np.array([])
    y_true = np.concatenate(targs) if len(targs)>0 else None
    avg_loss = float(np.mean(losses)) if losses else None
    return avg_loss, preds, y_true

class EarlyStopper:
    def __init__(self, patience=10, mode="max", min_delta=1e-6):
        self.patience = patience
        self.mode = mode
        self.best = -np.inf if mode=="max" else np.inf
        self.count = 0
        self.min_delta = min_delta
        self.best_state = None

    def step(self, metric, model):
        improved = (metric > self.best + self.min_delta) if self.mode=="max" else (metric < self.best - self.min_delta)
        if improved:
            self.best = metric
            self.count = 0
            self.best_state = {k: v.cpu().clone() for k,v in model.state_dict().items()}
            return True
        else:
            self.count += 1
            return False

    def should_stop(self):
        return self.count >= self.patience


In [16]:
# =========================
# STRATIFIED K-FOLD TRAINING
# =========================
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_SEED)

oof = np.zeros(len(train), dtype=np.float32)
test_preds = np.zeros((len(test), N_SPLITS), dtype=np.float32)

metrics_rows = []

pos_weight = None
if USE_CLASS_WEIGHTS:
    pos_ratio = train[TARGET_COL].mean()
    # Avoid div by zero
    pos_weight = max(1e-6, (1.0 - pos_ratio) / max(1e-6, pos_ratio))
    pos_weight = torch.tensor([pos_weight], dtype=torch.float32, device=device)

for fold, (tr_idx, va_idx) in enumerate(skf.split(train[feature_cols], train[TARGET_COL])):
    print(f"\n===== Fold {fold} =====")
    tr_df = train.iloc[tr_idx].reset_index(drop=True)
    va_df = train.iloc[va_idx].reset_index(drop=True)

    tr_ds = TabDataset(tr_df, tr_df[TARGET_COL].values, num_cols, cat_cols)
    va_ds = TabDataset(va_df, va_df[TARGET_COL].values, num_cols, cat_cols)
    te_ds = TabDataset(test, None, num_cols, cat_cols)

    tr_loader = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
    va_loader = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
    te_loader = DataLoader(te_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

    model = TabularNN(num_dim=len(num_cols), cat_cardinalities=cat_cardinalities, hidden_layers=HIDDEN_LAYERS, dropout=DROPOUT).to(device)

    if pos_weight is not None:
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    else:
        criterion = nn.BCEWithLogitsLoss()

    optimizer = torch.optim.AdamW(model.parameters(), lr=BASE_LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=3)

    early = EarlyStopper(patience=PATIENCE, mode="max")

    fold_log = []
    best_auc = -np.inf

    for epoch in range(1, EPOCHS+1):
        tr_loss, _, _ = epoch_loop(model, tr_loader, criterion, optimizer=optimizer)
        va_loss, va_pred, va_true = epoch_loop(model, va_loader, criterion, optimizer=None)
        va_auc = roc_auc_score(va_true, va_pred)

        scheduler.step(va_auc)

        fold_log.append(dict(epoch=epoch, train_loss=tr_loss, val_loss=va_loss, val_auc=float(va_auc), lr=float(optimizer.param_groups[0]['lr'])))

        if epoch % VERBOSE_EVERY == 0:
            print(f"Epoch {epoch:03d} | tr_loss={tr_loss:.5f} | va_loss={va_loss:.5f} | va_auc={va_auc:.6f} | lr={optimizer.param_groups[0]['lr']:.2e}")

        improved = early.step(va_auc, model)
        if improved:
            best_auc = va_auc

        if early.should_stop():
            print(f"Early stopping at epoch {epoch}. Best AUC: {best_auc:.6f}")
            break

    # load best weights
    model.load_state_dict(early.best_state)

    # Save fold artifacts
    fold_dir = FOLDS_DIR / f"fold_{fold}"
    fold_dir.mkdir(exist_ok=True)

    # Save metrics log
    log_df = pd.DataFrame(fold_log)
    log_df.to_csv(fold_dir / "train_log.csv", index=False)

    # Save model
    torch.save(model.state_dict(), fold_dir / "model.pth")

    # Validation predictions
    _, va_pred, va_true = epoch_loop(model, va_loader, criterion, optimizer=None)
    oof[va_idx] = va_pred.squeeze()

    # Test predictions
    _, te_pred, _ = epoch_loop(model, te_loader, criterion, optimizer=None)
    test_preds[:, fold] = te_pred.squeeze()

    # Make plots (ROC + learning curves)
    # ROC
    fig_roc, ax = plt.subplots()
    RocCurveDisplay.from_predictions(va_true, va_pred, ax=ax)
    ax.set_title(f"Fold {fold} ROC")
    fig_roc.savefig(fold_dir / "roc_curve.png", bbox_inches="tight")
    plt.close(fig_roc)

    # Learning curves
    fig_lc, ax = plt.subplots()
    ax.plot(log_df["epoch"], log_df["train_loss"], label="train_loss")
    ax.plot(log_df["epoch"], log_df["val_loss"], label="val_loss")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.set_title(f"Fold {fold} Loss")
    ax.legend()
    fig_lc.savefig(fold_dir / "loss_curve.png", bbox_inches="tight")
    plt.close(fig_lc)

    # AUC over epochs
    fig_auc, ax = plt.subplots()
    ax.plot(log_df["epoch"], log_df["val_auc"], label="val_auc")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("ROC AUC")
    ax.set_title(f"Fold {fold} Val AUC")
    ax.legend()
    fig_auc.savefig(fold_dir / "val_auc_curve.png", bbox_inches="tight")
    plt.close(fig_auc)

    metrics_rows.append(dict(fold=fold, best_val_auc=float(best_auc)))

# Aggregate metrics
metrics_df = pd.DataFrame(metrics_rows)
overall_auc = roc_auc_score(train[TARGET_COL].values, oof)
metrics_df.loc[len(metrics_df)] = dict(fold="OOF", best_val_auc=float(overall_auc))
metrics_df.to_csv(RUN_DIR / "metrics.csv", index=False)
print("\nOOF ROC AUC:", overall_auc)

# Save OOF
if SAVE_OOF:
    oof_df = pd.DataFrame({ID_COL: train[ID_COL].values, "oof": oof, TARGET_COL: train[TARGET_COL].values})
    oof_df.to_csv(RUN_DIR / "oof_predictions.csv", index=False)

# Ensemble test preds
test_pred_mean = test_preds.mean(axis=1)
sub = pd.DataFrame({ID_COL: test[ID_COL].values, TARGET_COL: test_pred_mean})
sub.to_csv(RUN_DIR / "submission.csv", index=False)
print("Submission saved to:", (RUN_DIR / "submission.csv").as_posix())

# Save per-fold and overall ROC plots combined
fig_all, ax = plt.subplots()
for fold in range(N_SPLITS):
    va_mask = np.zeros(len(train), dtype=bool)
    va_mask[list(np.where(metrics_df["fold"]==fold)[0])] = True  # placeholder (not used for curves)
# We instead compute ROC once on all OOF; also show histogram
RocCurveDisplay.from_predictions(train[TARGET_COL].values, oof, ax=ax)
ax.set_title("OOF ROC Curve")
fig_all.savefig(FIGS_DIR / "oof_roc_curve.png", bbox_inches="tight")
plt.close(fig_all)

# Histogram of OOF probs
fig_hist, ax = plt.subplots()
ax.hist(oof, bins=50)
ax.set_title("OOF Prediction Distribution")
ax.set_xlabel("Predicted probability")
fig_hist.savefig(FIGS_DIR / "oof_pred_hist.png", bbox_inches="tight")
plt.close(fig_hist)

# Save quick README for the run
with open(RUN_DIR / "README.txt", "w") as f:
    f.write(
        "This folder contains outputs for a single run.\n"
        "- metrics.csv: per-fold best AUC + OOF AUC\n"
        "- oof_predictions.csv: OOF probabilities with IDs and targets\n"
        "- submission.csv: ready for Kaggle submit\n"
        "- folds/*: per-fold model.pth, training logs, and plots (ROC, losses, AUC)\n"
        "- figs/*: overall figures (OOF ROC, hist)\n"
        "- models/*, logs/*, artifacts/*: reserved for extras\n"
    )

print("All artifacts saved under:", RUN_DIR.as_posix())


===== Fold 0 =====




Epoch 001 | tr_loss=0.56081 | va_loss=0.48479 | va_auc=0.950139 | lr=1.00e-03




Epoch 002 | tr_loss=0.48116 | va_loss=0.46048 | va_auc=0.953995 | lr=1.00e-03




Epoch 003 | tr_loss=0.46457 | va_loss=0.44816 | va_auc=0.956861 | lr=1.00e-03




Epoch 004 | tr_loss=0.45323 | va_loss=0.44106 | va_auc=0.958306 | lr=1.00e-03




Epoch 005 | tr_loss=0.44729 | va_loss=0.43939 | va_auc=0.958996 | lr=1.00e-03




Epoch 006 | tr_loss=0.44338 | va_loss=0.43684 | va_auc=0.959726 | lr=1.00e-03




Epoch 007 | tr_loss=0.43813 | va_loss=0.43156 | va_auc=0.960174 | lr=1.00e-03




Epoch 008 | tr_loss=0.43579 | va_loss=0.43064 | va_auc=0.960332 | lr=1.00e-03




Epoch 009 | tr_loss=0.43400 | va_loss=0.43330 | va_auc=0.960480 | lr=1.00e-03




Epoch 010 | tr_loss=0.43114 | va_loss=0.42970 | va_auc=0.960326 | lr=1.00e-03




Epoch 011 | tr_loss=0.42959 | va_loss=0.42848 | va_auc=0.960940 | lr=1.00e-03




Epoch 012 | tr_loss=0.42759 | va_loss=0.42648 | va_auc=0.961084 | lr=1.00e-03




Epoch 013 | tr_loss=0.42587 | va_loss=0.43473 | va_auc=0.960974 | lr=1.00e-03




Epoch 014 | tr_loss=0.42567 | va_loss=0.42559 | va_auc=0.961277 | lr=1.00e-03




Epoch 015 | tr_loss=0.42318 | va_loss=0.42567 | va_auc=0.961230 | lr=1.00e-03




Epoch 016 | tr_loss=0.42223 | va_loss=0.42631 | va_auc=0.961478 | lr=1.00e-03




Epoch 017 | tr_loss=0.42071 | va_loss=0.42547 | va_auc=0.961505 | lr=1.00e-03




Epoch 018 | tr_loss=0.41909 | va_loss=0.42489 | va_auc=0.961495 | lr=1.00e-03




Epoch 019 | tr_loss=0.41856 | va_loss=0.42466 | va_auc=0.961616 | lr=1.00e-03




Epoch 020 | tr_loss=0.41744 | va_loss=0.42271 | va_auc=0.961864 | lr=1.00e-03




Epoch 021 | tr_loss=0.41471 | va_loss=0.42386 | va_auc=0.961749 | lr=1.00e-03




Epoch 022 | tr_loss=0.41525 | va_loss=0.42344 | va_auc=0.961829 | lr=1.00e-03




Epoch 023 | tr_loss=0.41295 | va_loss=0.43033 | va_auc=0.961917 | lr=1.00e-03




Epoch 024 | tr_loss=0.41299 | va_loss=0.42143 | va_auc=0.962145 | lr=1.00e-03




Epoch 025 | tr_loss=0.41091 | va_loss=0.42296 | va_auc=0.961818 | lr=1.00e-03




Epoch 026 | tr_loss=0.41065 | va_loss=0.42242 | va_auc=0.961986 | lr=1.00e-03




Epoch 027 | tr_loss=0.40898 | va_loss=0.42089 | va_auc=0.962314 | lr=1.00e-03




Epoch 028 | tr_loss=0.40795 | va_loss=0.42254 | va_auc=0.962353 | lr=1.00e-03




Epoch 029 | tr_loss=0.40582 | va_loss=0.42110 | va_auc=0.962506 | lr=1.00e-03




Epoch 030 | tr_loss=0.40577 | va_loss=0.42422 | va_auc=0.962241 | lr=1.00e-03




Epoch 031 | tr_loss=0.40361 | va_loss=0.42421 | va_auc=0.962354 | lr=1.00e-03




Epoch 032 | tr_loss=0.40360 | va_loss=0.42644 | va_auc=0.962236 | lr=1.00e-03




Epoch 033 | tr_loss=0.40163 | va_loss=0.42493 | va_auc=0.962090 | lr=5.00e-04




Epoch 034 | tr_loss=0.39534 | va_loss=0.42248 | va_auc=0.962472 | lr=5.00e-04




Epoch 035 | tr_loss=0.39362 | va_loss=0.42505 | va_auc=0.962310 | lr=5.00e-04




Epoch 036 | tr_loss=0.39305 | va_loss=0.42622 | va_auc=0.962466 | lr=5.00e-04




Epoch 037 | tr_loss=0.39167 | va_loss=0.42353 | va_auc=0.962561 | lr=2.50e-04




Epoch 038 | tr_loss=0.38680 | va_loss=0.42670 | va_auc=0.962426 | lr=2.50e-04




Epoch 039 | tr_loss=0.38531 | va_loss=0.42571 | va_auc=0.962382 | lr=2.50e-04




Epoch 040 | tr_loss=0.38563 | va_loss=0.42963 | va_auc=0.962388 | lr=2.50e-04




Epoch 041 | tr_loss=0.38489 | va_loss=0.42788 | va_auc=0.962427 | lr=1.25e-04




Epoch 042 | tr_loss=0.38234 | va_loss=0.42823 | va_auc=0.962375 | lr=1.25e-04




Epoch 043 | tr_loss=0.38101 | va_loss=0.42743 | va_auc=0.962482 | lr=1.25e-04




Epoch 044 | tr_loss=0.38102 | va_loss=0.42930 | va_auc=0.962401 | lr=1.25e-04




Epoch 045 | tr_loss=0.38122 | va_loss=0.42831 | va_auc=0.962399 | lr=6.25e-05




Epoch 046 | tr_loss=0.38009 | va_loss=0.42861 | va_auc=0.962368 | lr=6.25e-05




Epoch 047 | tr_loss=0.37975 | va_loss=0.42950 | va_auc=0.962365 | lr=6.25e-05




Epoch 048 | tr_loss=0.37845 | va_loss=0.42970 | va_auc=0.962331 | lr=6.25e-05




Epoch 049 | tr_loss=0.37856 | va_loss=0.43068 | va_auc=0.962309 | lr=3.13e-05
Early stopping at epoch 49. Best AUC: 0.962561





===== Fold 1 =====




Epoch 001 | tr_loss=0.56358 | va_loss=0.48233 | va_auc=0.949643 | lr=1.00e-03




Epoch 002 | tr_loss=0.47970 | va_loss=0.46206 | va_auc=0.953899 | lr=1.00e-03




Epoch 003 | tr_loss=0.46010 | va_loss=0.44676 | va_auc=0.957203 | lr=1.00e-03




Epoch 004 | tr_loss=0.44911 | va_loss=0.44170 | va_auc=0.958493 | lr=1.00e-03




Epoch 005 | tr_loss=0.44210 | va_loss=0.43679 | va_auc=0.959361 | lr=1.00e-03




Epoch 006 | tr_loss=0.43835 | va_loss=0.43684 | va_auc=0.959379 | lr=1.00e-03




Epoch 007 | tr_loss=0.43455 | va_loss=0.43293 | va_auc=0.960059 | lr=1.00e-03




Epoch 008 | tr_loss=0.43257 | va_loss=0.43166 | va_auc=0.960345 | lr=1.00e-03




Epoch 009 | tr_loss=0.42980 | va_loss=0.43345 | va_auc=0.960248 | lr=1.00e-03




Epoch 010 | tr_loss=0.42736 | va_loss=0.43001 | va_auc=0.960553 | lr=1.00e-03




Epoch 011 | tr_loss=0.42702 | va_loss=0.43013 | va_auc=0.960450 | lr=1.00e-03




Epoch 012 | tr_loss=0.42463 | va_loss=0.43100 | va_auc=0.960601 | lr=1.00e-03




Epoch 013 | tr_loss=0.42334 | va_loss=0.42811 | va_auc=0.960879 | lr=1.00e-03




Epoch 014 | tr_loss=0.42090 | va_loss=0.43049 | va_auc=0.960923 | lr=1.00e-03




Epoch 015 | tr_loss=0.42062 | va_loss=0.42930 | va_auc=0.960982 | lr=1.00e-03




Epoch 016 | tr_loss=0.41946 | va_loss=0.42838 | va_auc=0.960938 | lr=1.00e-03




Epoch 017 | tr_loss=0.41664 | va_loss=0.42677 | va_auc=0.961278 | lr=1.00e-03




Epoch 018 | tr_loss=0.41597 | va_loss=0.42676 | va_auc=0.961453 | lr=1.00e-03




Epoch 019 | tr_loss=0.41420 | va_loss=0.42844 | va_auc=0.961517 | lr=1.00e-03




Epoch 020 | tr_loss=0.41250 | va_loss=0.42594 | va_auc=0.961614 | lr=1.00e-03




Epoch 021 | tr_loss=0.41165 | va_loss=0.42606 | va_auc=0.961708 | lr=1.00e-03




Epoch 022 | tr_loss=0.41184 | va_loss=0.42532 | va_auc=0.961834 | lr=1.00e-03




Epoch 023 | tr_loss=0.41035 | va_loss=0.42464 | va_auc=0.961693 | lr=1.00e-03




Epoch 024 | tr_loss=0.40942 | va_loss=0.42318 | va_auc=0.962024 | lr=1.00e-03




Epoch 025 | tr_loss=0.40741 | va_loss=0.42330 | va_auc=0.961948 | lr=1.00e-03




Epoch 026 | tr_loss=0.40605 | va_loss=0.42594 | va_auc=0.962120 | lr=1.00e-03




Epoch 027 | tr_loss=0.40504 | va_loss=0.42642 | va_auc=0.961941 | lr=1.00e-03




Epoch 028 | tr_loss=0.40393 | va_loss=0.42505 | va_auc=0.962082 | lr=5.00e-04




Epoch 029 | tr_loss=0.39747 | va_loss=0.42486 | va_auc=0.962225 | lr=5.00e-04




Epoch 030 | tr_loss=0.39607 | va_loss=0.42498 | va_auc=0.962404 | lr=5.00e-04




Epoch 031 | tr_loss=0.39561 | va_loss=0.42554 | va_auc=0.962302 | lr=5.00e-04




Epoch 032 | tr_loss=0.39354 | va_loss=0.42503 | va_auc=0.962300 | lr=5.00e-04




Epoch 033 | tr_loss=0.39382 | va_loss=0.42817 | va_auc=0.962063 | lr=5.00e-04




Epoch 034 | tr_loss=0.39150 | va_loss=0.42794 | va_auc=0.962287 | lr=2.50e-04




Epoch 035 | tr_loss=0.38884 | va_loss=0.42801 | va_auc=0.962375 | lr=2.50e-04




Epoch 036 | tr_loss=0.38759 | va_loss=0.42834 | va_auc=0.962283 | lr=2.50e-04




Epoch 037 | tr_loss=0.38694 | va_loss=0.42470 | va_auc=0.962315 | lr=2.50e-04




Epoch 038 | tr_loss=0.38638 | va_loss=0.43011 | va_auc=0.962304 | lr=1.25e-04




Epoch 039 | tr_loss=0.38433 | va_loss=0.42583 | va_auc=0.962337 | lr=1.25e-04




Epoch 040 | tr_loss=0.38388 | va_loss=0.42975 | va_auc=0.962237 | lr=1.25e-04




Epoch 041 | tr_loss=0.38302 | va_loss=0.42676 | va_auc=0.962386 | lr=1.25e-04




Epoch 042 | tr_loss=0.38307 | va_loss=0.42882 | va_auc=0.962361 | lr=6.25e-05
Early stopping at epoch 42. Best AUC: 0.962404





===== Fold 2 =====




Epoch 001 | tr_loss=0.56429 | va_loss=0.48106 | va_auc=0.949930 | lr=1.00e-03




Epoch 002 | tr_loss=0.48063 | va_loss=0.46103 | va_auc=0.953946 | lr=1.00e-03




Epoch 003 | tr_loss=0.46237 | va_loss=0.44695 | va_auc=0.957325 | lr=1.00e-03




Epoch 004 | tr_loss=0.45220 | va_loss=0.43937 | va_auc=0.958719 | lr=1.00e-03




Epoch 005 | tr_loss=0.44594 | va_loss=0.43726 | va_auc=0.958983 | lr=1.00e-03




Epoch 006 | tr_loss=0.44171 | va_loss=0.43459 | va_auc=0.959856 | lr=1.00e-03




Epoch 007 | tr_loss=0.43789 | va_loss=0.43744 | va_auc=0.959876 | lr=1.00e-03




Epoch 008 | tr_loss=0.43602 | va_loss=0.43282 | va_auc=0.960189 | lr=1.00e-03




Epoch 009 | tr_loss=0.43277 | va_loss=0.43087 | va_auc=0.960234 | lr=1.00e-03




Epoch 010 | tr_loss=0.43073 | va_loss=0.43013 | va_auc=0.960477 | lr=1.00e-03




Epoch 011 | tr_loss=0.42957 | va_loss=0.42894 | va_auc=0.960554 | lr=1.00e-03




Epoch 012 | tr_loss=0.42761 | va_loss=0.43043 | va_auc=0.960415 | lr=1.00e-03




Epoch 013 | tr_loss=0.42630 | va_loss=0.42779 | va_auc=0.960909 | lr=1.00e-03




Epoch 014 | tr_loss=0.42506 | va_loss=0.42698 | va_auc=0.960814 | lr=1.00e-03




Epoch 015 | tr_loss=0.42327 | va_loss=0.42787 | va_auc=0.960863 | lr=1.00e-03




Epoch 016 | tr_loss=0.42189 | va_loss=0.42834 | va_auc=0.960763 | lr=1.00e-03




Epoch 017 | tr_loss=0.42093 | va_loss=0.42623 | va_auc=0.961311 | lr=1.00e-03




Epoch 018 | tr_loss=0.41989 | va_loss=0.42453 | va_auc=0.961321 | lr=1.00e-03




Epoch 019 | tr_loss=0.41715 | va_loss=0.42668 | va_auc=0.961288 | lr=1.00e-03




Epoch 020 | tr_loss=0.41680 | va_loss=0.42496 | va_auc=0.961400 | lr=1.00e-03




Epoch 021 | tr_loss=0.41538 | va_loss=0.42519 | va_auc=0.961404 | lr=5.00e-04




Epoch 022 | tr_loss=0.40992 | va_loss=0.42464 | va_auc=0.961541 | lr=5.00e-04




Epoch 023 | tr_loss=0.40875 | va_loss=0.42397 | va_auc=0.961692 | lr=5.00e-04




Epoch 024 | tr_loss=0.40750 | va_loss=0.42525 | va_auc=0.961760 | lr=5.00e-04




Epoch 025 | tr_loss=0.40602 | va_loss=0.42309 | va_auc=0.961699 | lr=5.00e-04




Epoch 026 | tr_loss=0.40545 | va_loss=0.42276 | va_auc=0.961856 | lr=5.00e-04




Epoch 027 | tr_loss=0.40424 | va_loss=0.42398 | va_auc=0.961693 | lr=5.00e-04




Epoch 028 | tr_loss=0.40353 | va_loss=0.42376 | va_auc=0.961948 | lr=5.00e-04




Epoch 029 | tr_loss=0.40220 | va_loss=0.42355 | va_auc=0.961817 | lr=5.00e-04




Epoch 030 | tr_loss=0.40154 | va_loss=0.42483 | va_auc=0.961744 | lr=2.50e-04




Epoch 031 | tr_loss=0.39880 | va_loss=0.42434 | va_auc=0.961858 | lr=2.50e-04




Epoch 032 | tr_loss=0.39680 | va_loss=0.42332 | va_auc=0.961971 | lr=2.50e-04




Epoch 033 | tr_loss=0.39591 | va_loss=0.42395 | va_auc=0.961927 | lr=2.50e-04




Epoch 034 | tr_loss=0.39645 | va_loss=0.42439 | va_auc=0.961934 | lr=2.50e-04




Epoch 035 | tr_loss=0.39445 | va_loss=0.42726 | va_auc=0.961889 | lr=2.50e-04




Epoch 036 | tr_loss=0.39421 | va_loss=0.42567 | va_auc=0.961991 | lr=1.25e-04




Epoch 037 | tr_loss=0.39203 | va_loss=0.42435 | va_auc=0.961963 | lr=1.25e-04




Epoch 038 | tr_loss=0.39192 | va_loss=0.42546 | va_auc=0.961948 | lr=1.25e-04




Epoch 039 | tr_loss=0.39072 | va_loss=0.42540 | va_auc=0.961896 | lr=1.25e-04




Epoch 040 | tr_loss=0.38970 | va_loss=0.42503 | va_auc=0.961994 | lr=6.25e-05




Epoch 041 | tr_loss=0.38988 | va_loss=0.42609 | va_auc=0.961966 | lr=6.25e-05




Epoch 042 | tr_loss=0.38879 | va_loss=0.42682 | va_auc=0.961969 | lr=6.25e-05




Epoch 043 | tr_loss=0.38906 | va_loss=0.42586 | va_auc=0.961959 | lr=6.25e-05




Epoch 044 | tr_loss=0.38841 | va_loss=0.42544 | va_auc=0.961958 | lr=3.13e-05




Epoch 045 | tr_loss=0.38790 | va_loss=0.42632 | va_auc=0.961988 | lr=3.13e-05




Epoch 046 | tr_loss=0.38714 | va_loss=0.42664 | va_auc=0.961972 | lr=3.13e-05




Epoch 047 | tr_loss=0.38848 | va_loss=0.42590 | va_auc=0.961961 | lr=3.13e-05




Epoch 048 | tr_loss=0.38837 | va_loss=0.42694 | va_auc=0.961981 | lr=1.56e-05




Epoch 049 | tr_loss=0.38764 | va_loss=0.42625 | va_auc=0.961979 | lr=1.56e-05




Epoch 050 | tr_loss=0.38771 | va_loss=0.42693 | va_auc=0.961978 | lr=1.56e-05




Epoch 051 | tr_loss=0.38688 | va_loss=0.42644 | va_auc=0.961961 | lr=1.56e-05




Epoch 052 | tr_loss=0.38684 | va_loss=0.42613 | va_auc=0.961967 | lr=7.81e-06
Early stopping at epoch 52. Best AUC: 0.961994





===== Fold 3 =====




Epoch 001 | tr_loss=0.55230 | va_loss=0.47401 | va_auc=0.950882 | lr=1.00e-03




Epoch 002 | tr_loss=0.47949 | va_loss=0.45585 | va_auc=0.954987 | lr=1.00e-03




Epoch 003 | tr_loss=0.46153 | va_loss=0.44026 | va_auc=0.958120 | lr=1.00e-03




Epoch 004 | tr_loss=0.45067 | va_loss=0.43506 | va_auc=0.959393 | lr=1.00e-03




Epoch 005 | tr_loss=0.44408 | va_loss=0.43392 | va_auc=0.959941 | lr=1.00e-03




Epoch 006 | tr_loss=0.44129 | va_loss=0.43029 | va_auc=0.960545 | lr=1.00e-03




Epoch 007 | tr_loss=0.43733 | va_loss=0.42720 | va_auc=0.960796 | lr=1.00e-03




Epoch 008 | tr_loss=0.43466 | va_loss=0.42581 | va_auc=0.960981 | lr=1.00e-03




Epoch 009 | tr_loss=0.43217 | va_loss=0.42641 | va_auc=0.960933 | lr=1.00e-03




Epoch 010 | tr_loss=0.43126 | va_loss=0.42391 | va_auc=0.961472 | lr=1.00e-03




Epoch 011 | tr_loss=0.42989 | va_loss=0.42452 | va_auc=0.961496 | lr=1.00e-03




Epoch 012 | tr_loss=0.42780 | va_loss=0.42181 | va_auc=0.961723 | lr=1.00e-03




Epoch 013 | tr_loss=0.42631 | va_loss=0.42276 | va_auc=0.961763 | lr=1.00e-03




Epoch 014 | tr_loss=0.42451 | va_loss=0.42461 | va_auc=0.961572 | lr=1.00e-03




Epoch 015 | tr_loss=0.42333 | va_loss=0.42153 | va_auc=0.961910 | lr=1.00e-03




Epoch 016 | tr_loss=0.42203 | va_loss=0.41978 | va_auc=0.962134 | lr=1.00e-03




Epoch 017 | tr_loss=0.42006 | va_loss=0.42139 | va_auc=0.961930 | lr=1.00e-03




Epoch 018 | tr_loss=0.41948 | va_loss=0.42017 | va_auc=0.962198 | lr=1.00e-03




Epoch 019 | tr_loss=0.41775 | va_loss=0.42001 | va_auc=0.962097 | lr=1.00e-03




Epoch 020 | tr_loss=0.41685 | va_loss=0.42163 | va_auc=0.961949 | lr=5.00e-04




Epoch 021 | tr_loss=0.41099 | va_loss=0.41795 | va_auc=0.962556 | lr=5.00e-04




Epoch 022 | tr_loss=0.40997 | va_loss=0.41879 | va_auc=0.962547 | lr=5.00e-04




Epoch 023 | tr_loss=0.40901 | va_loss=0.41802 | va_auc=0.962669 | lr=5.00e-04




Epoch 024 | tr_loss=0.40846 | va_loss=0.41954 | va_auc=0.962614 | lr=5.00e-04




Epoch 025 | tr_loss=0.40599 | va_loss=0.41866 | va_auc=0.962681 | lr=5.00e-04




Epoch 026 | tr_loss=0.40516 | va_loss=0.41871 | va_auc=0.962756 | lr=5.00e-04




Epoch 027 | tr_loss=0.40398 | va_loss=0.42252 | va_auc=0.962589 | lr=2.50e-04




Epoch 028 | tr_loss=0.40149 | va_loss=0.41913 | va_auc=0.962857 | lr=2.50e-04




Epoch 029 | tr_loss=0.40080 | va_loss=0.41870 | va_auc=0.962853 | lr=2.50e-04




Epoch 030 | tr_loss=0.39913 | va_loss=0.41982 | va_auc=0.962845 | lr=2.50e-04




Epoch 031 | tr_loss=0.39845 | va_loss=0.41870 | va_auc=0.962875 | lr=2.50e-04




Epoch 032 | tr_loss=0.39851 | va_loss=0.41917 | va_auc=0.962842 | lr=1.25e-04




Epoch 033 | tr_loss=0.39591 | va_loss=0.41920 | va_auc=0.962950 | lr=1.25e-04




Epoch 034 | tr_loss=0.39495 | va_loss=0.41967 | va_auc=0.962942 | lr=1.25e-04




Epoch 035 | tr_loss=0.39529 | va_loss=0.41871 | va_auc=0.962977 | lr=1.25e-04




Epoch 036 | tr_loss=0.39412 | va_loss=0.41902 | va_auc=0.962902 | lr=1.25e-04




Epoch 037 | tr_loss=0.39364 | va_loss=0.41949 | va_auc=0.962884 | lr=1.25e-04




Epoch 038 | tr_loss=0.39371 | va_loss=0.42113 | va_auc=0.962914 | lr=1.25e-04




Epoch 039 | tr_loss=0.39304 | va_loss=0.42080 | va_auc=0.962869 | lr=6.25e-05




Epoch 040 | tr_loss=0.39139 | va_loss=0.41985 | va_auc=0.962917 | lr=6.25e-05




Epoch 041 | tr_loss=0.39158 | va_loss=0.42061 | va_auc=0.962899 | lr=6.25e-05




Epoch 042 | tr_loss=0.39100 | va_loss=0.42073 | va_auc=0.962904 | lr=6.25e-05




Epoch 043 | tr_loss=0.39162 | va_loss=0.42037 | va_auc=0.962901 | lr=3.13e-05




Epoch 044 | tr_loss=0.39095 | va_loss=0.42009 | va_auc=0.962919 | lr=3.13e-05




Epoch 045 | tr_loss=0.38964 | va_loss=0.42108 | va_auc=0.962912 | lr=3.13e-05




Epoch 046 | tr_loss=0.38985 | va_loss=0.42079 | va_auc=0.962891 | lr=3.13e-05




Epoch 047 | tr_loss=0.39074 | va_loss=0.42063 | va_auc=0.962904 | lr=1.56e-05
Early stopping at epoch 47. Best AUC: 0.962977





===== Fold 4 =====




Epoch 001 | tr_loss=0.56643 | va_loss=0.48171 | va_auc=0.949871 | lr=1.00e-03




Epoch 002 | tr_loss=0.48399 | va_loss=0.46260 | va_auc=0.954225 | lr=1.00e-03




Epoch 003 | tr_loss=0.46367 | va_loss=0.44959 | va_auc=0.957571 | lr=1.00e-03




Epoch 004 | tr_loss=0.45267 | va_loss=0.43929 | va_auc=0.959016 | lr=1.00e-03




Epoch 005 | tr_loss=0.44656 | va_loss=0.43969 | va_auc=0.959508 | lr=1.00e-03




Epoch 006 | tr_loss=0.44166 | va_loss=0.43474 | va_auc=0.960153 | lr=1.00e-03




Epoch 007 | tr_loss=0.43744 | va_loss=0.43246 | va_auc=0.960554 | lr=1.00e-03




Epoch 008 | tr_loss=0.43549 | va_loss=0.43244 | va_auc=0.960441 | lr=1.00e-03




Epoch 009 | tr_loss=0.43373 | va_loss=0.43034 | va_auc=0.960990 | lr=1.00e-03




Epoch 010 | tr_loss=0.43116 | va_loss=0.42898 | va_auc=0.960913 | lr=1.00e-03




Epoch 011 | tr_loss=0.42887 | va_loss=0.42864 | va_auc=0.961140 | lr=1.00e-03




Epoch 012 | tr_loss=0.42706 | va_loss=0.42760 | va_auc=0.961327 | lr=1.00e-03




Epoch 013 | tr_loss=0.42656 | va_loss=0.43000 | va_auc=0.960662 | lr=1.00e-03




Epoch 014 | tr_loss=0.42496 | va_loss=0.42651 | va_auc=0.961354 | lr=1.00e-03




Epoch 015 | tr_loss=0.42328 | va_loss=0.42403 | va_auc=0.961700 | lr=1.00e-03




Epoch 016 | tr_loss=0.42233 | va_loss=0.42643 | va_auc=0.961572 | lr=1.00e-03




Epoch 017 | tr_loss=0.42050 | va_loss=0.42359 | va_auc=0.961946 | lr=1.00e-03




Epoch 018 | tr_loss=0.41858 | va_loss=0.42190 | va_auc=0.962108 | lr=1.00e-03




Epoch 019 | tr_loss=0.41746 | va_loss=0.42262 | va_auc=0.962049 | lr=1.00e-03




Epoch 020 | tr_loss=0.41621 | va_loss=0.42130 | va_auc=0.962336 | lr=1.00e-03




Epoch 021 | tr_loss=0.41532 | va_loss=0.42129 | va_auc=0.962242 | lr=1.00e-03




Epoch 022 | tr_loss=0.41401 | va_loss=0.42088 | va_auc=0.962414 | lr=1.00e-03




Epoch 023 | tr_loss=0.41297 | va_loss=0.42341 | va_auc=0.962330 | lr=1.00e-03




Epoch 024 | tr_loss=0.41152 | va_loss=0.42074 | va_auc=0.962503 | lr=1.00e-03




Epoch 025 | tr_loss=0.41012 | va_loss=0.42148 | va_auc=0.962473 | lr=1.00e-03




Epoch 026 | tr_loss=0.40868 | va_loss=0.42194 | va_auc=0.962559 | lr=1.00e-03




Epoch 027 | tr_loss=0.40733 | va_loss=0.42066 | va_auc=0.962618 | lr=1.00e-03




Epoch 028 | tr_loss=0.40626 | va_loss=0.42181 | va_auc=0.962511 | lr=1.00e-03




Epoch 029 | tr_loss=0.40417 | va_loss=0.42275 | va_auc=0.962417 | lr=1.00e-03




Epoch 030 | tr_loss=0.40309 | va_loss=0.42720 | va_auc=0.962044 | lr=1.00e-03




Epoch 031 | tr_loss=0.40331 | va_loss=0.42244 | va_auc=0.962668 | lr=5.00e-04




Epoch 032 | tr_loss=0.39592 | va_loss=0.42160 | va_auc=0.962752 | lr=5.00e-04




Epoch 033 | tr_loss=0.39353 | va_loss=0.42325 | va_auc=0.962755 | lr=5.00e-04




Epoch 034 | tr_loss=0.39309 | va_loss=0.42444 | va_auc=0.962824 | lr=5.00e-04




Epoch 035 | tr_loss=0.39202 | va_loss=0.42373 | va_auc=0.962796 | lr=5.00e-04




Epoch 036 | tr_loss=0.39076 | va_loss=0.42876 | va_auc=0.962636 | lr=2.50e-04




Epoch 037 | tr_loss=0.38733 | va_loss=0.42396 | va_auc=0.962680 | lr=2.50e-04




Epoch 038 | tr_loss=0.38666 | va_loss=0.42498 | va_auc=0.962775 | lr=2.50e-04




Epoch 039 | tr_loss=0.38561 | va_loss=0.42501 | va_auc=0.962769 | lr=2.50e-04




Epoch 040 | tr_loss=0.38487 | va_loss=0.42496 | va_auc=0.962833 | lr=1.25e-04




Epoch 041 | tr_loss=0.38246 | va_loss=0.42427 | va_auc=0.962717 | lr=1.25e-04




Epoch 042 | tr_loss=0.38176 | va_loss=0.42561 | va_auc=0.962733 | lr=1.25e-04




Epoch 043 | tr_loss=0.38143 | va_loss=0.42549 | va_auc=0.962678 | lr=1.25e-04




Epoch 044 | tr_loss=0.38054 | va_loss=0.42913 | va_auc=0.962665 | lr=6.25e-05




Epoch 045 | tr_loss=0.37948 | va_loss=0.42794 | va_auc=0.962599 | lr=6.25e-05




Epoch 046 | tr_loss=0.37980 | va_loss=0.42775 | va_auc=0.962630 | lr=6.25e-05




Epoch 047 | tr_loss=0.38015 | va_loss=0.42716 | va_auc=0.962630 | lr=6.25e-05




Epoch 048 | tr_loss=0.37901 | va_loss=0.42795 | va_auc=0.962714 | lr=3.13e-05




Epoch 049 | tr_loss=0.37817 | va_loss=0.42834 | va_auc=0.962650 | lr=3.13e-05




Epoch 050 | tr_loss=0.37875 | va_loss=0.42798 | va_auc=0.962622 | lr=3.13e-05




Epoch 051 | tr_loss=0.37799 | va_loss=0.42771 | va_auc=0.962626 | lr=3.13e-05




Epoch 052 | tr_loss=0.37824 | va_loss=0.42808 | va_auc=0.962653 | lr=1.56e-05
Early stopping at epoch 52. Best AUC: 0.962833





OOF ROC AUC: 0.9625154905879971
Submission saved to: runs/2025-08-12_09-12-57/submission.csv
All artifacts saved under: runs/2025-08-12_09-12-57
