# Swin-T Tiny Training for RSNA Cervical Spine Fracture Detection

# Goal: Train 5-fold Swin-T tiny (seed=789) for diversity in 4-way ensemble with ConvNeXt v1/v2 + RegNet.
# Adapt from 02_baseline_cnn.ipynb: same MIP data, augs, BCEWithLogitsLoss, pos_weight, sampler, HFlip TTA.
# Model: timm.create_model('swin_tiny_patch4_window7_224', num_classes=7, in_chans=3, drop_rate=0.3, drop_path_rate=0.1).
# Resolution: 224x224 (Swin native), bone-windowed MIPs resized.
# Save: fold_{}_swin.pth, oof_logits_swin_tta.npy.
# After training, blend as 4th model with capped weights (Swin <=0.4), re-calibrate temps/LR/alpha, target CV<0.40 silver.
# Estimated time: ~2h on A10 GPU (5 folds, 20 epochs each with early stopping).

# Next: Load data/folds, define model/dataset, train loop, generate OOF TTA logits, compute CV WLL, request expert review.

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import timm
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.metrics import log_loss
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import os
import random
import subprocess
from scipy.special import expit as sigmoid
from collections import OrderedDict
import warnings
warnings.filterwarnings('ignore')

# GPU check from best practices
print(subprocess.run(['bash', '-lc', 'nvidia-smi || true'], capture_output=True, text=True).stdout)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Set seed for reproducibility
SEED = 789
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything(SEED)

# Load data (train_mips.csv already includes labels, no merge needed)
df = pd.read_csv('data/train_mips.csv')
print(f'df shape: {df.shape}, columns: {df.columns.tolist()}')
label_cols = ['C1','C2','C3','C4','C5','C6','C7']
y = df[label_cols].fillna(0).values  # Fill NaN with 0 if any
groups = df['StudyInstanceUID'].values
print(f'y shape: {y.shape}, groups shape: {groups.shape}, NaN in y: {np.isnan(y).sum()}')

# Folds (GroupKFold n_splits=5, random_state=42)
gkf = GroupKFold(n_splits=5)
folds = []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(df, y, groups=groups)):
    folds.append((tr_idx, va_idx))
print(f'Loaded {len(df)} samples, 5 folds ready.')

# Model definition
def build_swin_model():
    model = timm.create_model('swin_tiny_patch4_window7_224',
                              pretrained=False, num_classes=7, in_chans=3,
                              drop_rate=0.3, drop_path_rate=0.1)
    model = model.to(device)
    return model

# Dataset (adapt to 224x224)
class MIPDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df.reset_index(drop=True)
        self.labels = labels
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        uid = self.df.iloc[idx]['StudyInstanceUID']
        mip_path = f'data/mips/train/{uid}.npy'
        if not os.path.exists(mip_path):
            print(f'Missing MIP for {uid}')
            # For subsample, assume all exist or skip
            mip = np.zeros((3, 224, 224), dtype=np.float32)  # fallback zero
        else:
            mip = np.load(mip_path).astype(np.float32)  # (3,H,W)
        img = np.transpose(mip, (1,2,0))  # HWC
        if self.transform:
            augmented = self.transform(image=img)
            img = augmented['image']
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return img, label

# Transforms (224x224, same augs as ConvNeXt)
train_transform = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Normalize(mean=0.5, std=0.5),
    ToTensorV2()
])
val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=0.5, std=0.5),
    ToTensorV2()
])

# Loss and sampler (BCEWithLogitsLoss with pos_weight, WeightedRandomSampler)
pos_weight = torch.tensor([2.0] * 7).to(device)  # adjust if needed
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

def get_sampler(df, labels):
    class_counts = np.sum(labels, axis=0)
    n_samples = len(df)
    weights = 1.0 / (class_counts + 1e-6)
    sample_weights = np.mean([weights[c] for c in range(7) if labels[:,c].sum() > 0], axis=0) * np.ones(n_samples)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

print('Setup complete: Swin-T model, 224 res transforms, data/folds ready. Next: implement training loop for 5 folds, save weights/OOF TTA logits, compute CV WLL.')

Fri Sep 26 08:26:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10-24Q                 On  |   00000002:00:00.0 Off |                    0 |
| N/A   N/A    P0             N/A /  N/A  |   14924MiB /  24512MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
# Training loop: 5 folds, Swin-T, BCEWithLogitsLoss, AdamW lr=1e-4, early stopping on val WLL, HFlip TTA for OOF
import time
from sklearn.metrics import log_loss
def wll(y_true, p_pred):
    p_pred = np.clip(p_pred, 1e-6, 1-1e-6)
    losses = [log_loss(y_true[:,i], p_pred[:,i], labels=[0,1]) for i in range(7)]
    return np.average(losses, weights=[1]*7)

N_FOLDS = 5
BATCH_SIZE = 16  # Smaller for Swin-T
EPOCHS = 20
LR = 1e-4
PATIENCE = 5
oof_logits = np.zeros((len(df), 7), dtype=np.float32)
fold_scores = []
for fold in range(N_FOLDS):
    tr_idx, va_idx = folds[fold]
    print(f'\n=== Fold {fold+1}/{N_FOLDS} ===')
    tr_df = df.iloc[tr_idx].reset_index(drop=True)
    va_df = df.iloc[va_idx].reset_index(drop=True)
    tr_labels = y[tr_idx]
    va_labels = y[va_idx]
    
    tr_ds = MIPDataset(tr_df, tr_labels, train_transform)
    va_ds = MIPDataset(va_df, va_labels, val_transform)
    
    sampler = get_sampler(tr_df, tr_labels)
    tr_dl = DataLoader(tr_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=4, pin_memory=True)
    va_dl = DataLoader(va_ds, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
    
    model = build_swin_model()
    optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
    
    best_val_loss = float('inf')
    patience_counter = 0
    fold_start = time.time()
    
    for epoch in range(EPOCHS):
        epoch_start = time.time()
        model.train()
        tr_loss = 0.0
        for imgs, lbls in tqdm(tr_dl, desc=f'Epoch {epoch+1} Train'):
            imgs, lbls = imgs.to(device), lbls.to(device)
            optimizer.zero_grad()
            logits = model(imgs)
            loss = criterion(logits, lbls)
            loss.backward()
            optimizer.step()
            tr_loss += loss.item()
        tr_loss /= len(tr_dl)
        
        model.eval()
        va_logits = []
        va_loss = 0.0
        with torch.no_grad():
            for imgs, lbls in tqdm(va_dl, desc=f'Epoch {epoch+1} Val'):
                imgs, lbls = imgs.to(device), lbls.to(device)
                logits = model(imgs)
                loss = criterion(logits, lbls)
                va_loss += loss.item()
                va_logits.append(logits.cpu().numpy())
        va_loss /= len(va_dl)
        va_logits = np.concatenate(va_logits, axis=0)
        p_va = sigmoid(va_logits)
        val_wll = wll(va_labels, p_va)
        
        scheduler.step(val_wll)
        
        elapsed = time.time() - epoch_start
        print(f'Epoch {epoch+1}: tr_loss={tr_loss:.4f}, val_loss={va_loss:.4f}, val_wll={val_wll:.4f}, lr={optimizer.param_groups[0]["lr"]:.2e}, time={elapsed:.1f}s')
        
        if val_wll < best_val_loss:
            best_val_loss = val_wll
            patience_counter = 0
            torch.save(model.state_dict(), f'fold_{fold+1}_swin.pth')
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f'Early stopping at epoch {epoch+1}')
                break
    
    fold_time = time.time() - fold_start
    print(f'Fold {fold+1} done: best_val_wll={best_val_loss:.4f}, time={fold_time/60:.1f}min')
    fold_scores.append(best_val_loss)
    
    # OOF TTA logits (HFlip)
    model.load_state_dict(torch.load(f'fold_{fold+1}_swin.pth', map_location='cpu', weights_only=True))
    model.to(device).eval()
    fold_oof_logits = []
    with torch.no_grad():
        for imgs, _ in va_dl:
            imgs = imgs.to(device)
            logits = model(imgs)
            logits_f = model(torch.flip(imgs, dims=[3]))
            tta_logits = 0.5 * (logits + logits_f)
            fold_oof_logits.append(tta_logits.cpu().numpy())
    oof_logits[va_idx] = np.concatenate(fold_oof_logits, axis=0)
    
# Compute CV WLL
oof_p = sigmoid(oof_logits)
cv_wll = wll(y, oof_p)
print(f'\nSwin-T CV WLL: {cv_wll:.4f} (mean fold: {np.mean(fold_scores):.4f} \u00b1 {np.std(fold_scores):.4f})')
np.save('oof_logits_swin_tta.npy', oof_logits)
print('Saved oof_logits_swin_tta.npy. Next: ensemble with 3-way, re-optimize weights/LR/alpha, target <0.40 WLL, request expert review.')



=== Fold 1/5 ===


Epoch 1 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 1 Train:   9%|▉         | 1/11 [00:00<00:02,  3.63it/s]

Epoch 1 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.23it/s]

Epoch 1 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.84it/s]

Epoch 1 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.66it/s]

Epoch 1 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.18it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00, 11.76it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00,  9.50it/s]




Epoch 1 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1 Val:  50%|█████     | 1/2 [00:00<00:00,  3.84it/s]

Epoch 1 Val: 100%|██████████| 2/2 [00:00<00:00,  6.38it/s]




Epoch 1: tr_loss=0.7431, val_loss=0.5065, val_wll=0.3765, lr=1.00e-04, time=1.5s


Epoch 2 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 2 Train:   9%|▉         | 1/11 [00:00<00:02,  3.61it/s]

Epoch 2 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.18it/s]

Epoch 2 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.78it/s]

Epoch 2 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.65it/s]

Epoch 2 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.14it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00, 11.69it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00,  9.46it/s]




Epoch 2 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2 Val:  50%|█████     | 1/2 [00:00<00:00,  3.76it/s]

Epoch 2 Val: 100%|██████████| 2/2 [00:00<00:00,  6.19it/s]




Epoch 2: tr_loss=0.5950, val_loss=0.4930, val_wll=0.3586, lr=1.00e-04, time=1.5s


Epoch 3 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 3 Train:   9%|▉         | 1/11 [00:00<00:02,  3.63it/s]

Epoch 3 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.18it/s]

Epoch 3 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.77it/s]

Epoch 3 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.62it/s]

Epoch 3 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.14it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00, 11.75it/s]

Epoch 3 Val:  50%|█████     | 1/2 [00:00<00:00,  3.69it/s]

Epoch 3 Val: 100%|██████████| 2/2 [00:00<00:00,  6.15it/s]




Epoch 3: tr_loss=0.5710, val_loss=0.5179, val_wll=0.3741, lr=1.00e-04, time=1.5s


Epoch 4 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 4 Train:   9%|▉         | 1/11 [00:00<00:02,  3.79it/s]

Epoch 4 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.36it/s]

Epoch 4 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.91it/s]

Epoch 4 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.74it/s]

Epoch 4 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.24it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00, 11.74it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00,  9.61it/s]




Epoch 4 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 4 Val:  50%|█████     | 1/2 [00:00<00:00,  3.68it/s]

Epoch 4 Val: 100%|██████████| 2/2 [00:00<00:00,  6.05it/s]




Epoch 4: tr_loss=0.5404, val_loss=0.4999, val_wll=0.3822, lr=1.00e-04, time=1.5s


Epoch 5 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 5 Train:   9%|▉         | 1/11 [00:00<00:02,  3.61it/s]

Epoch 5 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.19it/s]

Epoch 5 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.77it/s]

Epoch 5 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.64it/s]

Epoch 5 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.13it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00, 11.68it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00,  9.42it/s]




Epoch 5 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 5 Val:  50%|█████     | 1/2 [00:00<00:00,  3.65it/s]

Epoch 5 Val: 100%|██████████| 2/2 [00:00<00:00,  6.07it/s]




Epoch 5: tr_loss=0.5440, val_loss=0.5009, val_wll=0.3620, lr=1.00e-04, time=1.5s


Epoch 6 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 6 Train:   9%|▉         | 1/11 [00:00<00:02,  3.66it/s]

Epoch 6 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.25it/s]

Epoch 6 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.83it/s]

Epoch 6 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.69it/s]

Epoch 6 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.19it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00, 11.75it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00,  9.50it/s]




Epoch 6 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 6 Val:  50%|█████     | 1/2 [00:00<00:00,  3.69it/s]

Epoch 6 Val: 100%|██████████| 2/2 [00:00<00:00,  6.07it/s]




Epoch 6: tr_loss=0.6004, val_loss=0.4993, val_wll=0.3837, lr=5.00e-05, time=1.5s


Epoch 7 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 7 Train:   9%|▉         | 1/11 [00:00<00:02,  3.82it/s]

Epoch 7 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.40it/s]

Epoch 7 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.94it/s]

Epoch 7 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.75it/s]

Epoch 7 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.23it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00, 11.73it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00,  9.61it/s]




Epoch 7 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 7 Val:  50%|█████     | 1/2 [00:00<00:00,  3.69it/s]

Epoch 7 Val: 100%|██████████| 2/2 [00:00<00:00,  6.00it/s]

Epoch 7: tr_loss=0.6026, val_loss=0.5038, val_wll=0.3767, lr=5.00e-05, time=1.5s
Early stopping at epoch 7
Fold 1 done: best_val_wll=0.3586, time=0.2min






=== Fold 2/5 ===


Epoch 1 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 1 Train:   9%|▉         | 1/11 [00:00<00:02,  3.54it/s]

Epoch 1 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.11it/s]

Epoch 1 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.72it/s]

Epoch 1 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.58it/s]

Epoch 1 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.12it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00, 11.64it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00,  9.38it/s]




Epoch 1 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1 Val:  50%|█████     | 1/2 [00:00<00:00,  3.79it/s]

Epoch 1 Val: 100%|██████████| 2/2 [00:00<00:00,  6.25it/s]




Epoch 1: tr_loss=0.6303, val_loss=0.5260, val_wll=0.3924, lr=1.00e-04, time=1.5s


Epoch 2 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 2 Train:   9%|▉         | 1/11 [00:00<00:02,  3.56it/s]

Epoch 2 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.10it/s]

Epoch 2 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.70it/s]

Epoch 2 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.58it/s]

Epoch 2 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.12it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00, 11.64it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00,  9.43it/s]




Epoch 2 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2 Val:  50%|█████     | 1/2 [00:00<00:00,  3.77it/s]

Epoch 2 Val: 100%|██████████| 2/2 [00:00<00:00,  6.26it/s]




Epoch 2: tr_loss=0.5032, val_loss=0.5317, val_wll=0.3792, lr=1.00e-04, time=1.5s


Epoch 3 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 3 Train:   9%|▉         | 1/11 [00:00<00:02,  3.65it/s]

Epoch 3 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.22it/s]

Epoch 3 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.80it/s]

Epoch 3 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.64it/s]

Epoch 3 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.14it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00, 11.66it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00,  9.44it/s]




Epoch 3 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 3 Val:  50%|█████     | 1/2 [00:00<00:00,  3.75it/s]

Epoch 3 Val: 100%|██████████| 2/2 [00:00<00:00,  6.20it/s]




Epoch 3: tr_loss=0.5394, val_loss=0.5389, val_wll=0.4049, lr=1.00e-04, time=1.5s


Epoch 4 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 4 Train:   9%|▉         | 1/11 [00:00<00:02,  3.49it/s]

Epoch 4 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.04it/s]

Epoch 4 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.68it/s]

Epoch 4 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.56it/s]

Epoch 4 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.10it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00, 11.66it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00,  9.42it/s]




Epoch 4 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 4 Val:  50%|█████     | 1/2 [00:00<00:00,  3.90it/s]

Epoch 4 Val: 100%|██████████| 2/2 [00:00<00:00,  6.31it/s]




Epoch 4: tr_loss=0.4962, val_loss=0.5163, val_wll=0.3664, lr=1.00e-04, time=1.5s


Epoch 5 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 5 Train:   9%|▉         | 1/11 [00:00<00:02,  3.78it/s]

Epoch 5 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.34it/s]

Epoch 5 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.90it/s]

Epoch 5 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.71it/s]

Epoch 5 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.19it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00, 11.67it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00,  9.52it/s]




Epoch 5 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 5 Val:  50%|█████     | 1/2 [00:00<00:00,  3.73it/s]

Epoch 5 Val: 100%|██████████| 2/2 [00:00<00:00,  6.08it/s]




Epoch 5: tr_loss=0.5650, val_loss=0.5153, val_wll=0.3874, lr=1.00e-04, time=1.5s


Epoch 6 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 6 Train:   9%|▉         | 1/11 [00:00<00:02,  3.62it/s]

Epoch 6 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.19it/s]

Epoch 6 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.76it/s]

Epoch 6 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.60it/s]

Epoch 6 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.10it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00, 11.61it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00,  9.40it/s]




Epoch 6 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 6 Val:  50%|█████     | 1/2 [00:00<00:00,  3.67it/s]

Epoch 6 Val: 100%|██████████| 2/2 [00:00<00:00,  6.11it/s]




Epoch 6: tr_loss=0.5235, val_loss=0.4985, val_wll=0.3819, lr=1.00e-04, time=1.5s


Epoch 7 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 7 Train:   9%|▉         | 1/11 [00:00<00:02,  3.59it/s]

Epoch 7 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.15it/s]

Epoch 7 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.73it/s]

Epoch 7 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.57it/s]

Epoch 7 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.08it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00, 11.59it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00,  9.43it/s]




Epoch 7 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 7 Val:  50%|█████     | 1/2 [00:00<00:00,  3.81it/s]

Epoch 7 Val: 100%|██████████| 2/2 [00:00<00:00,  6.30it/s]




Epoch 7: tr_loss=0.6120, val_loss=0.5445, val_wll=0.4092, lr=1.00e-04, time=1.5s


Epoch 8 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 8 Train:   9%|▉         | 1/11 [00:00<00:02,  3.71it/s]

Epoch 8 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.29it/s]

Epoch 8 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.83it/s]

Epoch 8 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.64it/s]

Epoch 8 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.11it/s]

Epoch 8 Train: 100%|██████████| 11/11 [00:01<00:00, 11.61it/s]

Epoch 8 Train: 100%|██████████| 11/11 [00:01<00:00,  9.47it/s]




Epoch 8 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 8 Val:  50%|█████     | 1/2 [00:00<00:00,  3.79it/s]

Epoch 8 Val: 100%|██████████| 2/2 [00:00<00:00,  6.27it/s]




Epoch 8: tr_loss=0.5789, val_loss=0.5214, val_wll=0.4012, lr=5.00e-05, time=1.5s


Epoch 9 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 9 Train:   9%|▉         | 1/11 [00:00<00:02,  3.58it/s]

Epoch 9 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.15it/s]

Epoch 9 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.75it/s]

Epoch 9 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.61it/s]

Epoch 9 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.11it/s]

Epoch 9 Train: 100%|██████████| 11/11 [00:01<00:00, 11.66it/s]

Epoch 9 Train: 100%|██████████| 11/11 [00:01<00:00,  9.42it/s]




Epoch 9 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 9 Val:  50%|█████     | 1/2 [00:00<00:00,  3.66it/s]

Epoch 9 Val: 100%|██████████| 2/2 [00:00<00:00,  5.99it/s]

Epoch 9: tr_loss=0.4904, val_loss=0.5069, val_wll=0.3719, lr=5.00e-05, time=1.5s
Early stopping at epoch 9
Fold 2 done: best_val_wll=0.3664, time=0.2min






=== Fold 3/5 ===


Epoch 1 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 1 Train:   9%|▉         | 1/11 [00:00<00:02,  3.67it/s]

Epoch 1 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.23it/s]

Epoch 1 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.78it/s]

Epoch 1 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.60it/s]

Epoch 1 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.09it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00, 11.58it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00,  9.38it/s]




Epoch 1 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1 Val:  50%|█████     | 1/2 [00:00<00:00,  3.50it/s]

Epoch 1 Val: 100%|██████████| 2/2 [00:00<00:00,  5.80it/s]




Epoch 1: tr_loss=0.5994, val_loss=0.3363, val_wll=0.2748, lr=1.00e-04, time=1.5s


Epoch 2 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 2 Train:   9%|▉         | 1/11 [00:00<00:02,  3.71it/s]

Epoch 2 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.27it/s]

Epoch 2 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.82it/s]

Epoch 2 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.64it/s]

Epoch 2 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.11it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00, 11.63it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00,  9.49it/s]




Epoch 2 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2 Val:  50%|█████     | 1/2 [00:00<00:00,  3.62it/s]

Epoch 2 Val: 100%|██████████| 2/2 [00:00<00:00,  6.07it/s]




Epoch 2: tr_loss=0.6129, val_loss=0.4643, val_wll=0.4163, lr=1.00e-04, time=1.5s


Epoch 3 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 3 Train:   9%|▉         | 1/11 [00:00<00:02,  3.70it/s]

Epoch 3 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.26it/s]

Epoch 3 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.81it/s]

Epoch 3 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.61it/s]

Epoch 3 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.10it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00, 11.63it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00,  9.45it/s]




Epoch 3 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 3 Val:  50%|█████     | 1/2 [00:00<00:00,  3.64it/s]

Epoch 3 Val: 100%|██████████| 2/2 [00:00<00:00,  6.05it/s]




Epoch 3: tr_loss=0.6767, val_loss=0.3781, val_wll=0.3285, lr=1.00e-04, time=1.5s


Epoch 4 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 4 Train:   9%|▉         | 1/11 [00:00<00:02,  3.49it/s]

Epoch 4 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.08it/s]

Epoch 4 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.71it/s]

Epoch 4 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.59it/s]

Epoch 4 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.12it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00, 11.67it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00,  9.39it/s]




Epoch 4 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 4 Val:  50%|█████     | 1/2 [00:00<00:00,  3.53it/s]

Epoch 4 Val: 100%|██████████| 2/2 [00:00<00:00,  5.88it/s]




Epoch 4: tr_loss=0.7399, val_loss=0.4702, val_wll=0.4079, lr=1.00e-04, time=1.5s


Epoch 5 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 5 Train:   9%|▉         | 1/11 [00:00<00:02,  3.69it/s]

Epoch 5 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.29it/s]

Epoch 5 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.88it/s]

Epoch 5 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.72it/s]

Epoch 5 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.20it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00, 11.73it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00,  9.54it/s]




Epoch 5 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 5 Val:  50%|█████     | 1/2 [00:00<00:00,  3.57it/s]

Epoch 5 Val: 100%|██████████| 2/2 [00:00<00:00,  5.95it/s]




Epoch 5: tr_loss=0.6246, val_loss=0.3922, val_wll=0.3462, lr=5.00e-05, time=1.5s


Epoch 6 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 6 Train:   9%|▉         | 1/11 [00:00<00:02,  3.77it/s]

Epoch 6 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.36it/s]

Epoch 6 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.91it/s]

Epoch 6 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.73it/s]

Epoch 6 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.22it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00, 11.72it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00,  9.61it/s]




Epoch 6 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 6 Val:  50%|█████     | 1/2 [00:00<00:00,  3.49it/s]

Epoch 6 Val: 100%|██████████| 2/2 [00:00<00:00,  5.86it/s]

Epoch 6: tr_loss=0.6489, val_loss=0.3935, val_wll=0.3383, lr=5.00e-05, time=1.5s
Early stopping at epoch 6
Fold 3 done: best_val_wll=0.2748, time=0.2min






=== Fold 4/5 ===


Epoch 1 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 1 Train:   9%|▉         | 1/11 [00:00<00:02,  3.50it/s]

Epoch 1 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.07it/s]

Epoch 1 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.69it/s]

Epoch 1 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.57it/s]

Epoch 1 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.09it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00, 11.64it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00,  9.39it/s]




Epoch 1 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1 Val:  50%|█████     | 1/2 [00:00<00:00,  3.64it/s]

Epoch 1 Val: 100%|██████████| 2/2 [00:00<00:00,  5.90it/s]




Epoch 1: tr_loss=0.5584, val_loss=0.6986, val_wll=0.3926, lr=1.00e-04, time=1.5s


Epoch 2 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 2 Train:   9%|▉         | 1/11 [00:00<00:02,  3.81it/s]

Epoch 2 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.38it/s]

Epoch 2 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.92it/s]

Epoch 2 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.74it/s]

Epoch 2 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.21it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00, 11.73it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00,  9.55it/s]




Epoch 2 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2 Val:  50%|█████     | 1/2 [00:00<00:00,  3.61it/s]

Epoch 2 Val: 100%|██████████| 2/2 [00:00<00:00,  6.03it/s]




Epoch 2: tr_loss=0.5773, val_loss=0.6907, val_wll=0.4297, lr=1.00e-04, time=1.5s


Epoch 3 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 3 Train:   9%|▉         | 1/11 [00:00<00:02,  3.80it/s]

Epoch 3 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.37it/s]

Epoch 3 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.91it/s]

Epoch 3 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.72it/s]

Epoch 3 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.20it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00, 11.76it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00,  9.58it/s]




Epoch 3 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 3 Val:  50%|█████     | 1/2 [00:00<00:00,  3.63it/s]

Epoch 3 Val: 100%|██████████| 2/2 [00:00<00:00,  6.07it/s]




Epoch 3: tr_loss=0.5346, val_loss=0.7310, val_wll=0.4117, lr=1.00e-04, time=1.5s


Epoch 4 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 4 Train:   9%|▉         | 1/11 [00:00<00:02,  3.72it/s]

Epoch 4 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.29it/s]

Epoch 4 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.84it/s]

Epoch 4 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.70it/s]

Epoch 4 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.21it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00, 11.71it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00,  9.52it/s]




Epoch 4 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 4 Val:  50%|█████     | 1/2 [00:00<00:00,  3.54it/s]

Epoch 4 Val: 100%|██████████| 2/2 [00:00<00:00,  5.86it/s]




Epoch 4: tr_loss=0.5428, val_loss=0.6920, val_wll=0.4008, lr=1.00e-04, time=1.5s


Epoch 5 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 5 Train:   9%|▉         | 1/11 [00:00<00:03,  3.18it/s]

Epoch 5 Train:  27%|██▋       | 3/11 [00:00<00:01,  6.71it/s]

Epoch 5 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.42it/s]

Epoch 5 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.38it/s]

Epoch 5 Train:  82%|████████▏ | 9/11 [00:01<00:00,  9.97it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00, 11.54it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00,  9.19it/s]




Epoch 5 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 5 Val:  50%|█████     | 1/2 [00:00<00:00,  3.55it/s]

Epoch 5 Val: 100%|██████████| 2/2 [00:00<00:00,  5.94it/s]




Epoch 5: tr_loss=0.5104, val_loss=0.7336, val_wll=0.4059, lr=5.00e-05, time=1.5s


Epoch 6 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 6 Train:   9%|▉         | 1/11 [00:00<00:02,  3.59it/s]

Epoch 6 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.19it/s]

Epoch 6 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.79it/s]

Epoch 6 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.64it/s]

Epoch 6 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.16it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00, 11.69it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00,  9.49it/s]




Epoch 6 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 6 Val:  50%|█████     | 1/2 [00:00<00:00,  3.55it/s]

Epoch 6 Val: 100%|██████████| 2/2 [00:00<00:00,  5.95it/s]

Epoch 6: tr_loss=0.5655, val_loss=0.6719, val_wll=0.4118, lr=5.00e-05, time=1.5s
Early stopping at epoch 6
Fold 4 done: best_val_wll=0.3926, time=0.2min






=== Fold 5/5 ===


Epoch 1 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 1 Train:   9%|▉         | 1/11 [00:00<00:02,  3.74it/s]

Epoch 1 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.33it/s]

Epoch 1 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.90it/s]

Epoch 1 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.74it/s]

Epoch 1 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.22it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00, 11.78it/s]

Epoch 1 Train: 100%|██████████| 11/11 [00:01<00:00,  9.57it/s]




Epoch 1 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1 Val:  50%|█████     | 1/2 [00:00<00:00,  3.86it/s]

Epoch 1 Val: 100%|██████████| 2/2 [00:00<00:00,  6.26it/s]




Epoch 1: tr_loss=0.6231, val_loss=0.6791, val_wll=0.3713, lr=1.00e-04, time=1.5s


Epoch 2 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 2 Train:   9%|▉         | 1/11 [00:00<00:02,  3.71it/s]

Epoch 2 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.28it/s]

Epoch 2 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.85it/s]

Epoch 2 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.69it/s]

Epoch 2 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.17it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00, 11.71it/s]

Epoch 2 Train: 100%|██████████| 11/11 [00:01<00:00,  9.55it/s]




Epoch 2 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2 Val:  50%|█████     | 1/2 [00:00<00:00,  3.99it/s]

Epoch 2 Val: 100%|██████████| 2/2 [00:00<00:00,  6.54it/s]




Epoch 2: tr_loss=0.5311, val_loss=0.6536, val_wll=0.3860, lr=1.00e-04, time=1.5s


Epoch 3 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 3 Train:   9%|▉         | 1/11 [00:00<00:02,  3.98it/s]

Epoch 3 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.57it/s]

Epoch 3 Train:  45%|████▌     | 5/11 [00:00<00:00,  9.06it/s]

Epoch 3 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.84it/s]

Epoch 3 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.30it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00, 11.82it/s]

Epoch 3 Train: 100%|██████████| 11/11 [00:01<00:00,  9.71it/s]




Epoch 3 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 3 Val:  50%|█████     | 1/2 [00:00<00:00,  3.83it/s]

Epoch 3 Val: 100%|██████████| 2/2 [00:00<00:00,  6.30it/s]




Epoch 3: tr_loss=0.5412, val_loss=0.6061, val_wll=0.3455, lr=1.00e-04, time=1.5s


Epoch 4 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 4 Train:   9%|▉         | 1/11 [00:00<00:02,  3.65it/s]

Epoch 4 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.24it/s]

Epoch 4 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.83it/s]

Epoch 4 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.66it/s]

Epoch 4 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.15it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00, 11.73it/s]

Epoch 4 Train: 100%|██████████| 11/11 [00:01<00:00,  9.49it/s]




Epoch 4 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 4 Val:  50%|█████     | 1/2 [00:00<00:00,  3.76it/s]

Epoch 4 Val: 100%|██████████| 2/2 [00:00<00:00,  6.15it/s]




Epoch 4: tr_loss=0.5848, val_loss=0.6090, val_wll=0.3648, lr=1.00e-04, time=1.5s


Epoch 5 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 5 Train:   9%|▉         | 1/11 [00:00<00:02,  3.75it/s]

Epoch 5 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.32it/s]

Epoch 5 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.86it/s]

Epoch 5 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.69it/s]

Epoch 5 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.18it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00, 11.72it/s]

Epoch 5 Train: 100%|██████████| 11/11 [00:01<00:00,  9.51it/s]




Epoch 5 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 5 Val:  50%|█████     | 1/2 [00:00<00:00,  3.84it/s]

Epoch 5 Val: 100%|██████████| 2/2 [00:00<00:00,  6.30it/s]




Epoch 5: tr_loss=0.5356, val_loss=0.5959, val_wll=0.3595, lr=1.00e-04, time=1.5s


Epoch 6 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 6 Train:   9%|▉         | 1/11 [00:00<00:02,  3.57it/s]

Epoch 6 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.15it/s]

Epoch 6 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.76it/s]

Epoch 6 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.63it/s]

Epoch 6 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.15it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00, 11.68it/s]

Epoch 6 Train: 100%|██████████| 11/11 [00:01<00:00,  9.47it/s]




Epoch 6 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 6 Val:  50%|█████     | 1/2 [00:00<00:00,  4.01it/s]

Epoch 6 Val: 100%|██████████| 2/2 [00:00<00:00,  6.57it/s]




Epoch 6: tr_loss=0.5225, val_loss=0.6363, val_wll=0.3406, lr=1.00e-04, time=1.5s


Epoch 7 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 7 Train:   9%|▉         | 1/11 [00:00<00:02,  3.86it/s]

Epoch 7 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.43it/s]

Epoch 7 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.94it/s]

Epoch 7 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.76it/s]

Epoch 7 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.24it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00, 11.76it/s]

Epoch 7 Train: 100%|██████████| 11/11 [00:01<00:00,  9.62it/s]




Epoch 7 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 7 Val:  50%|█████     | 1/2 [00:00<00:00,  3.73it/s]

Epoch 7 Val: 100%|██████████| 2/2 [00:00<00:00,  6.17it/s]




Epoch 7: tr_loss=0.5688, val_loss=0.6164, val_wll=0.3506, lr=1.00e-04, time=1.5s


Epoch 8 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 8 Train:   9%|▉         | 1/11 [00:00<00:02,  3.81it/s]

Epoch 8 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.37it/s]

Epoch 8 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.92it/s]

Epoch 8 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.72it/s]

Epoch 8 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.21it/s]

Epoch 8 Train: 100%|██████████| 11/11 [00:01<00:00, 11.81it/s]

Epoch 8 Train: 100%|██████████| 11/11 [00:01<00:00,  9.56it/s]




Epoch 8 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 8 Val:  50%|█████     | 1/2 [00:00<00:00,  3.86it/s]

Epoch 8 Val: 100%|██████████| 2/2 [00:00<00:00,  6.39it/s]




Epoch 8: tr_loss=0.5066, val_loss=0.6193, val_wll=0.3725, lr=1.00e-04, time=1.5s


Epoch 9 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 9 Train:   9%|▉         | 1/11 [00:00<00:02,  3.54it/s]

Epoch 9 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.10it/s]

Epoch 9 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.73it/s]

Epoch 9 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.61it/s]

Epoch 9 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.13it/s]

Epoch 9 Train: 100%|██████████| 11/11 [00:01<00:00, 11.67it/s]

Epoch 9 Train: 100%|██████████| 11/11 [00:01<00:00,  9.39it/s]




Epoch 9 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 9 Val:  50%|█████     | 1/2 [00:00<00:00,  3.86it/s]

Epoch 9 Val: 100%|██████████| 2/2 [00:00<00:00,  6.37it/s]




Epoch 9: tr_loss=0.6047, val_loss=0.6003, val_wll=0.3384, lr=1.00e-04, time=1.5s


Epoch 10 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 10 Train:   9%|▉         | 1/11 [00:00<00:02,  3.58it/s]

Epoch 10 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.14it/s]

Epoch 10 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.74it/s]

Epoch 10 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.61it/s]

Epoch 10 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.13it/s]

Epoch 10 Train: 100%|██████████| 11/11 [00:01<00:00, 11.69it/s]

Epoch 10 Train: 100%|██████████| 11/11 [00:01<00:00,  9.41it/s]




Epoch 10 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 10 Val:  50%|█████     | 1/2 [00:00<00:00,  4.00it/s]

Epoch 10 Val: 100%|██████████| 2/2 [00:00<00:00,  6.57it/s]




Epoch 10: tr_loss=0.6050, val_loss=0.5800, val_wll=0.3731, lr=1.00e-04, time=1.5s


Epoch 11 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 11 Train:   9%|▉         | 1/11 [00:00<00:02,  3.80it/s]

Epoch 11 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.39it/s]

Epoch 11 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.93it/s]

Epoch 11 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.75it/s]

Epoch 11 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.22it/s]

Epoch 11 Train: 100%|██████████| 11/11 [00:01<00:00, 11.79it/s]

Epoch 11 Train: 100%|██████████| 11/11 [00:01<00:00,  9.63it/s]




Epoch 11 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 11 Val:  50%|█████     | 1/2 [00:00<00:00,  3.75it/s]

Epoch 11 Val: 100%|██████████| 2/2 [00:00<00:00,  6.08it/s]




Epoch 11: tr_loss=0.6221, val_loss=0.5911, val_wll=0.3707, lr=1.00e-04, time=1.5s


Epoch 12 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 12 Train:   9%|▉         | 1/11 [00:00<00:02,  3.71it/s]

Epoch 12 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.27it/s]

Epoch 12 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.84it/s]

Epoch 12 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.68it/s]

Epoch 12 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.17it/s]

Epoch 12 Train: 100%|██████████| 11/11 [00:01<00:00, 11.74it/s]

Epoch 12 Train: 100%|██████████| 11/11 [00:01<00:00,  9.52it/s]




Epoch 12 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 12 Val:  50%|█████     | 1/2 [00:00<00:00,  3.71it/s]

Epoch 12 Val: 100%|██████████| 2/2 [00:00<00:00,  6.18it/s]




Epoch 12: tr_loss=0.5907, val_loss=0.5619, val_wll=0.3794, lr=1.00e-04, time=1.5s


Epoch 13 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 13 Train:   9%|▉         | 1/11 [00:00<00:02,  3.56it/s]

Epoch 13 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.14it/s]

Epoch 13 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.75it/s]

Epoch 13 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.61it/s]

Epoch 13 Train:  82%|████████▏ | 9/11 [00:01<00:00, 10.13it/s]

Epoch 13 Train: 100%|██████████| 11/11 [00:01<00:00, 11.65it/s]

Epoch 13 Train: 100%|██████████| 11/11 [00:01<00:00,  9.41it/s]




Epoch 13 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 13 Val:  50%|█████     | 1/2 [00:00<00:00,  3.75it/s]

Epoch 13 Val: 100%|██████████| 2/2 [00:00<00:00,  6.21it/s]




Epoch 13: tr_loss=0.5176, val_loss=0.6012, val_wll=0.3554, lr=5.00e-05, time=1.5s


Epoch 14 Train:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch 14 Train:   9%|▉         | 1/11 [00:00<00:02,  3.71it/s]

Epoch 14 Train:  27%|██▋       | 3/11 [00:00<00:01,  7.30it/s]

Epoch 14 Train:  45%|████▌     | 5/11 [00:00<00:00,  8.86it/s]

Epoch 14 Train:  64%|██████▎   | 7/11 [00:00<00:00,  9.68it/s]

Epoch 14 Train:  82%|████████▏ | 9/11 [00:00<00:00, 10.15it/s]

Epoch 14 Train: 100%|██████████| 11/11 [00:01<00:00, 11.67it/s]

Epoch 14 Train: 100%|██████████| 11/11 [00:01<00:00,  9.51it/s]




Epoch 14 Val:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 14 Val:  50%|█████     | 1/2 [00:00<00:00,  3.72it/s]

Epoch 14 Val: 100%|██████████| 2/2 [00:00<00:00,  6.09it/s]

Epoch 14: tr_loss=0.4993, val_loss=0.6044, val_wll=0.3428, lr=5.00e-05, time=1.5s
Early stopping at epoch 14
Fold 5 done: best_val_wll=0.3384, time=0.4min






Swin-T CV WLL: 0.3463 (mean fold: 0.3462 ± 0.0397)
Saved oof_logits_swin_tta.npy. Next: ensemble with 3-way, re-optimize weights/LR/alpha, target <0.40 WLL, request expert review.
