# Plan to medal - Herbarium 2021 (FGVC8)

Objectives:
- Establish robust GPU-enabled pipeline quickly.
- Validate data loading from provided metadata and image structure.
- Build a fast, strong baseline (image classifier with transfer learning, e.g., timm/torchvision).
- Lock CV protocol mirroring test (stratified KFold on species).
- Iterate: augmentations, resolution, label smoothing, class-balanced loss, mixup/cutmix. Cache checkpoints.
- Ensembling if time allows (multi-seed, different backbones).

Milestones:
1) Environment + data sanity checks.
2) EDA: label distribution, image counts, sources, leakage checks.
3) Baseline model: pretrained backbone (e.g., convnext_tiny / efficientnet_b3).
4) CV + OOF + test inference, submission.csv.
5) Error analysis and quick improvements (augmentations, loss, re-balancing).

Notes:
- Metric: macro F1, long-tail: prioritize class-balanced sampling/loss.
- Always log progress and elapsed time per fold.
- Save OOF and test logits for future blends.

Next: Run environment & data checks.

In [21]:
import os, json, random, time, subprocess, sys
from pathlib import Path
import pandas as pd

t0 = time.time()
print("=== GPU check: nvidia-smi ===", flush=True)
try:
    print(subprocess.run(['bash','-lc','nvidia-smi || true'], capture_output=True, text=True).stdout)
except Exception as e:
    print("nvidia-smi failed:", e)

ROOT = Path('.')
TRAIN_DIR = ROOT / 'train'
TEST_DIR = ROOT / 'test'
TRAIN_META = TRAIN_DIR / 'metadata.json'
TEST_META = TEST_DIR / 'metadata.json'
SAMPLE_SUB = ROOT / 'sample_submission.csv'

print("=== Files existence ===", flush=True)
for p in [TRAIN_DIR, TEST_DIR, TRAIN_DIR/'images', TEST_DIR/'images', TRAIN_META, TEST_META, SAMPLE_SUB]:
    print(f"{p}: {'OK' if p.exists() else 'MISSING'}")

def load_coco_like(pth):
    with open(pth, 'r') as f:
        data = json.load(f)
    # Expected keys: annotations, images, categories
    anns = pd.DataFrame(data.get('annotations', []))
    imgs = pd.DataFrame(data.get('images', []))
    cats = pd.DataFrame(data.get('categories', []))
    return anns, imgs, cats

print("\n=== Load train/test COCO-like metadata ===", flush=True)
tr_anns, tr_imgs, tr_cats = load_coco_like(TRAIN_META)
te_anns, te_imgs, te_cats = load_coco_like(TEST_META)  # test usually has images and no annotations
print("train anns/imgs/cats shapes:", tr_anns.shape, tr_imgs.shape, tr_cats.shape)
print("test anns/imgs/cats shapes:", te_anns.shape, te_imgs.shape, te_cats.shape)
print("train anns head:\n", tr_anns.head(3))
print("train imgs head:\n", tr_imgs.head(3))
print("cats head:\n", tr_cats.head(3))

# Identify columns
species_col = None
for c in ['category_id','species_id','label','target']:
    if c in tr_anns.columns:
        species_col = c; break
image_id_col = 'image_id' if 'image_id' in tr_anns.columns else None
specimen_col = None
for c in ['specimen_id','specimen','record_id']:
    if c in tr_imgs.columns:
        specimen_col = c; break
file_col = None
for c in ['file_name','file','path']:
    if c in tr_imgs.columns:
        file_col = c; break

print(f"Detected -> species: {species_col}, specimen: {specimen_col}, file: {file_col}, image_id_col: {image_id_col}")
assert species_col is not None and image_id_col is not None, "Train annotations must have category_id/species and image_id"
assert file_col is not None and 'id' in tr_imgs.columns, "Train images must have id and file_name"

# Merge anns with imgs to get file paths and specimen
train_df = tr_anns.merge(tr_imgs, left_on=image_id_col, right_on='id', how='inner', suffixes=('_ann','_img'))
# Build correct absolute file paths (file_name already includes 'images/...')
train_df['file_path'] = train_df[file_col].apply(lambda x: TRAIN_DIR / x)
te_imgs['file_path'] = te_imgs[file_col].apply(lambda x: TEST_DIR / x)
print("train_df merged shape:", train_df.shape)

print("\n=== Basic stats ===", flush=True)
n_species = train_df[species_col].nunique()
n_imgs = len(train_df)
n_specimens = train_df[specimen_col].nunique() if specimen_col and specimen_col in train_df.columns else None
print("species unique:", n_species, "images:", n_imgs, "specimens unique:", n_specimens)
cnts = train_df[species_col].value_counts()
print("per-class count head:", cnts.head().to_dict())
print("per-class count tail:", cnts.tail().to_dict())

print("\n=== Path existence checks (sample) ===", flush=True)
def build_path(split, rel):
    base = TRAIN_DIR if split=='train' else TEST_DIR
    return base / rel  # file_name already contains 'images/...'
sample_train = train_df.sample(min(20, len(train_df)), random_state=42)
missing_train = 0
for r in sample_train[file_col].tolist():
    p = build_path('train', r)
    if not p.exists():
        missing_train += 1
print(f"Train sample missing paths: {missing_train}/{len(sample_train)}")

print("\n=== Test metadata overview ===", flush=True)
assert 'id' in te_imgs.columns and file_col in te_imgs.columns, "Test images must have id and file_name"
print(te_imgs[[ 'id', file_col ]].head(3))
sample_test = te_imgs.sample(min(20, len(te_imgs)), random_state=42)
missing_test = 0
for r in sample_test[file_col].tolist():
    p = build_path('test', r)
    if not p.exists():
        missing_test += 1
print(f"Test sample missing paths: {missing_test}/{len(sample_test)}")

print("\n=== Sample submission ===", flush=True)
sdf = pd.read_csv(SAMPLE_SUB)
print("sample_submission head:\n", sdf.head())
print("Elapsed: %.2fs" % (time.time()-t0), flush=True)

=== GPU check: nvidia-smi ===


Mon Sep 29 01:51:45 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10-24Q                 On  |   00000002:00:00.0 Off |                    0 |
| N/A   N/A    P0             N/A /  N/A  |    6722MiB /  24512MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

train: OK
test: OK
train/images: OK
test/images: OK
train/metadata.json: OK
test/metadata.json: OK
sample_submission.csv: OK

=== Load train/test COCO-like metadata ===


train anns/imgs/cats shapes: (1779953, 4) (1779953, 5) (64500, 4)
test anns/imgs/cats shapes: (0, 0) (477806, 5) (0, 0)
train anns head:
    category_id       id  image_id  institution_id
0        60492  1608432   1608432               0
1        60492   796948    796948               0
2        60492   994447    994447               0
train imgs head:
                    file_name  height       id  license  width
0  images/604/92/1608432.jpg    1000  1608432        0    671
1   images/604/92/796948.jpg    1000   796948        0    678
2   images/604/92/994447.jpg    1000   994447        0    671
cats head:
         family  id                             name        order
0  Orchidaceae   0    Aa calceata (Rchb.f.) Schltr.  Asparagales
1  Orchidaceae   1  Aa matthewsii (Rchb.f.) Schltr.  Asparagales
2  Orchidaceae   2      Aa paleacea (Kunth) Rchb.f.  Asparagales
Detected -> species: category_id, specimen: None, file: file_name, image_id_col: image_id


train_df merged shape: (1779953, 10)

=== Basic stats ===


species unique: 64500 images: 1779953 specimens unique: None
per-class count head: {42811: 2647, 25229: 1713, 48372: 1630, 42843: 1324, 22344: 1260}
per-class count tail: {6742: 1, 57300: 1, 44413: 1, 4970: 1, 37044: 1}

=== Path existence checks (sample) ===


Train sample missing paths: 0/20

=== Test metadata overview ===


  id         file_name
0  0  images/000/0.jpg
1  1  images/000/1.jpg
2  2  images/000/2.jpg
Test sample missing paths: 0/20

=== Sample submission ===


sample_submission head:
    Id  Predicted
0   0          0
1   1          0
2   2          0
3   3          0
4   4          0
Elapsed: 14.35s


In [None]:
import json, time
from collections import Counter
import pandas as pd
from pathlib import Path

t0 = time.time()
TRAIN_META = Path('train/metadata.json')
TEST_META = Path('test/metadata.json')
SAMPLE_SUB = Path('sample_submission.csv')

print('=== Build majority-class baseline submission ===', flush=True)
print('Loading train annotations to compute majority class...', flush=True)
with open(TRAIN_META, 'r') as f:
    data = json.load(f)
anns = data.get('annotations', [])
cnt = Counter(a['category_id'] for a in anns)
mode_cat, mode_cnt = cnt.most_common(1)[0]
n_classes = len(data.get('categories', []))
print(f'Mode category_id: {mode_cat} with count {mode_cnt}; total classes: {n_classes}', flush=True)

print('Loading sample_submission to get test Id order...', flush=True)
sdf = pd.read_csv(SAMPLE_SUB)
print('sample_submission shape:', sdf.shape, 'cols:', sdf.columns.tolist())

sub = pd.DataFrame({'Id': sdf['Id'].values, 'Predicted': mode_cat})
sub.to_csv('submission.csv', index=False)
print('Wrote submission.csv')
print(sub.head())
print('Elapsed: %.2fs' % (time.time()-t0))

In [None]:
import os, sys, subprocess, shutil, time
from pathlib import Path

t0 = time.time()
print("=== Install CUDA 12.1 torch stack + vision deps ===", flush=True)

def pip(*args):
    print(">", *args, flush=True)
    subprocess.run([sys.executable, "-m", "pip", *args], check=True)

# Uninstall any existing torch stack (idempotent)
for pkg in ("torch","torchvision","torchaudio"):
    subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", pkg], check=False)

# Clean possible stray site dirs (idempotent)
for d in (
    "/app/.pip-target/torch",
    "/app/.pip-target/torchvision",
    "/app/.pip-target/torchaudio",
    "/app/.pip-target/torch-2.4.1.dist-info",
    "/app/.pip-target/torchvision-0.19.1.dist-info",
    "/app/.pip-target/torchaudio-2.4.1.dist-info",
):
    if os.path.exists(d):
        print("Removing", d); shutil.rmtree(d, ignore_errors=True)

# Install exact cu121 torch stack
pip("install",
    "--index-url", "https://download.pytorch.org/whl/cu121",
    "--extra-index-url", "https://pypi.org/simple",
    "torch==2.4.1", "torchvision==0.19.1", "torchaudio==2.4.1")

# Freeze torch versions for subsequent installs
Path("constraints.txt").write_text("\n".join([
    "torch==2.4.1",
    "torchvision==0.19.1",
    "torchaudio==2.4.1"
]))

# Install vision deps respecting constraints (avoid upgrading torch)
pip("install", "-c", "constraints.txt",
    "timm==1.0.9",
    "albumentations==1.4.14",
    "opencv-python-headless==4.10.0.84",
    "numpy", "pandas", "scikit-learn",
    "accelerate==0.34.2",
    "wandb==0.17.9",
    "einops==0.8.0",
    "--upgrade-strategy", "only-if-needed")

import torch
print("torch:", torch.__version__, "built CUDA:", getattr(torch.version, "cuda", None))
print("CUDA available:", torch.cuda.is_available())
assert str(getattr(torch.version, "cuda", "")).startswith("12.1"), f"Wrong CUDA build: {torch.version.cuda}"
assert torch.cuda.is_available(), "CUDA not available"
print("GPU:", torch.cuda.get_device_name(0))
print("Elapsed install: %.1fs" % (time.time()-t0), flush=True)

In [23]:
import os, time, math, random, json, gc, sys
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
import timm
from timm.data.mixup import Mixup
from timm.loss import SoftTargetCrossEntropy
from timm.utils.model_ema import ModelEmaV2
from PIL import Image
import torchvision.transforms as T
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR

torch.backends.cudnn.benchmark = True
torch.set_float32_matmul_precision('high')

ROOT = Path('.')
TRAIN_DIR = ROOT / 'train'
TEST_DIR = ROOT / 'test'
SAMPLE_SUB = ROOT / 'sample_submission.csv'

# Use already loaded metadata via fast reload (small cost compared to images I/O)
with open(TRAIN_DIR / 'metadata.json','r') as f: tr_data = json.load(f)
with open(TEST_DIR / 'metadata.json','r') as f: te_data = json.load(f)
tr_anns = pd.DataFrame(tr_data['annotations'])
tr_imgs = pd.DataFrame(tr_data['images'])
tr_cats = pd.DataFrame(tr_data['categories'])
te_imgs = pd.DataFrame(te_data['images'])

# Merge to get file paths
train_df = tr_anns.merge(tr_imgs, left_on='image_id', right_on='id', how='inner')
train_df['file_path'] = train_df['file_name'].apply(lambda x: TRAIN_DIR / x)
te_imgs['file_path'] = te_imgs['file_name'].apply(lambda x: TEST_DIR / x)

# Label encoding
cats = sorted(train_df['category_id'].unique())
cat2lbl = {c:i for i,c in enumerate(cats)}
lbl2cat = {i:c for c,i in cat2lbl.items()}
train_df['label'] = train_df['category_id'].map(cat2lbl)
NUM_CLASSES = len(cats)
print('NUM_CLASSES:', NUM_CLASSES, 'train images:', len(train_df))

# Transforms using torchvision (avoid albumentations dependency issues)
IMG_SIZE = 256
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

train_tfms_clean = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.6, 1.0), ratio=(0.75, 1.33)),
    T.RandomHorizontalFlip(p=0.5),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std),
])
train_tfms_aug = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.6, 1.0), ratio=(0.75, 1.33)),
    T.RandomHorizontalFlip(p=0.5),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0.05),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std),
])
val_tfms = T.Compose([
    T.Resize(IMG_SIZE),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std),
])

class HerbDataset(Dataset):
    def __init__(self, df, tfms, is_train=False):
        self.df = df.reset_index(drop=True)
        self.tfms = tfms
        self.is_train = is_train
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        fp = row['file_path']
        try:
            img = Image.open(fp).convert('RGB')
        except Exception:
            img = Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))
        img = self.tfms(img)
        label = int(row['label'])
        return img, label

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed_everything(42)

# Build folds: exclude singleton classes from validation; 2-fold stratified on labels with >=2 samples
label_counts = train_df['label'].value_counts()
valid_labels = label_counts[label_counts >= 2].index
idx_valid = train_df[train_df['label'].isin(valid_labels)].index.values
y_valid = train_df.loc[idx_valid, 'label'].values
singletons_idx = train_df[~train_df['label'].isin(valid_labels)].index.values
print('Singletons (train-only):', len(singletons_idx))
skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
folds = []
for tr_sub, va_sub in skf.split(idx_valid, y_valid):
    tr_idx = idx_valid[tr_sub]
    va_idx = idx_valid[va_sub]
    tr_idx_full = np.concatenate([tr_idx, singletons_idx])
    folds.append((tr_idx_full, va_idx))
print('Prepared folds:', len(folds), '| fold0 train/val sizes:', len(folds[0][0]), len(folds[0][1]))

def build_model():
    model = timm.create_model('convnext_tiny.fb_in22k', pretrained=True, num_classes=NUM_CLASSES, drop_path_rate=0.1)
    model.to(device)
    model = model.to(memory_format=torch.channels_last)
    return model

def evaluate(model, loader, max_batches=None):
    m = model
    m.eval()
    preds, targs = [], []
    with torch.no_grad():
        for b, (imgs, labels) in enumerate(loader):
            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
            labels = labels.to(device, non_blocking=True)
            logits = m(imgs)
            pred = logits.argmax(1)
            preds.append(pred.detach().cpu().numpy())
            targs.append(labels.detach().cpu().numpy())
            if max_batches is not None and (b+1) >= max_batches:
                break
    preds = np.concatenate(preds); targs = np.concatenate(targs)
    f1 = f1_score(targs, preds, average='macro')
    return f1

def run_fold(fold, train_idx, val_idx, epochs=9, batch_size=64, accum_steps=4):
    print(f"\n=== Fold {fold} ===", flush=True)
    tr_df = train_df.loc[train_idx]
    va_df = train_df.loc[val_idx]
    # Datasets (start with clean tfms, will toggle to aug later epochs)
    ds_tr = HerbDataset(tr_df, train_tfms_clean, is_train=True)
    ds_va = HerbDataset(va_df, val_tfms, is_train=False)
    # Mini-val: random 1 image per class (cap at 10k) for stable early F1
    mini_va = va_df.groupby('label', group_keys=False).apply(lambda g: g.sample(1, random_state=42))
    if len(mini_va) > 10000:
        mini_va = mini_va.sample(10000, random_state=42)
    ds_mini = HerbDataset(mini_va, val_tfms, is_train=False)
    # Balanced sampling per fold (1/sqrt(freq)), epoch length 300k
    counts = tr_df['label'].value_counts()
    w = tr_df['label'].map(lambda x: 1.0 / (counts[x] ** 0.5)).values
    sampler = WeightedRandomSampler(w, num_samples=300000, replacement=True)
    dl_tr = DataLoader(ds_tr, batch_size=batch_size, sampler=sampler, shuffle=False,
                       num_workers=12, pin_memory=True, persistent_workers=True,
                       prefetch_factor=2, drop_last=True)
    dl_va = DataLoader(ds_va, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True)
    dl_mini = DataLoader(ds_mini, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, persistent_workers=True)

    model = build_model()
    # Differential LRs: higher LR for head, lower for backbone
    head_names = ['head','classifier','fc']
    head_params, backbone_params = [], []
    for n,p in model.named_parameters():
        (head_params if any(h in n for h in head_names) else backbone_params).append(p)
    optimizer = torch.optim.AdamW([
        {'params': backbone_params, 'lr': 5e-4},
        {'params': head_params, 'lr': 5e-3}
    ], weight_decay=0.05)
    # Scheduler with warmup then cosine (step once per epoch)
    warmup = LinearLR(optimizer, start_factor=0.1, total_iters=1)
    cosine = CosineAnnealingLR(optimizer, T_max=max(1, epochs-1), eta_min=1e-6)
    scheduler = SequentialLR(optimizer, [warmup, cosine], milestones=[1])
    # EMA
    ema = ModelEmaV2(model, decay=0.999, device=device)
    # Losses and mixup setup
    criterion_ce = nn.CrossEntropyLoss(label_smoothing=0.1).to(device)
    criterion_soft = SoftTargetCrossEntropy().to(device)
    mixup_fn = Mixup(mixup_alpha=0.3, cutmix_alpha=0.3, prob=0.0, mode='batch', label_smoothing=0.0, num_classes=NUM_CLASSES)
    scaler = torch.amp.GradScaler('cuda', enabled=True)
    best_score = -1.0
    best_f1 = -1.0
    best_mini = -1.0
    best_path = f'ckpt_fold{fold}.pt'
    global_step = 0

    for epoch in range(1, epochs+1):
        t0 = time.time()
        model.train()
        # Toggle training transforms: enable ColorJitter from epoch 3+
        ds_tr.tfms = train_tfms_aug if epoch >= 3 else train_tfms_clean
        # Mixup schedule: 1-3 off (CE+LS), 4..E-1 on (SoftTarget), last off (CE+LS)
        if epoch <= 3:
            mixup_fn.prob = 0.0
            criterion = criterion_ce
        elif epoch < epochs:
            mixup_fn.prob = 0.3
            criterion = criterion_soft
        else:
            mixup_fn.prob = 0.0
            criterion = criterion_ce

        optimizer.zero_grad(set_to_none=True)
        running_loss = 0.0
        steps_this_epoch = 0
        for step, (imgs, labels) in enumerate(dl_tr):
            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
            labels = labels.to(device, non_blocking=True)
            with torch.amp.autocast('cuda', enabled=True):
                if mixup_fn.prob > 0.0:
                    imgs_m, targets = mixup_fn(imgs, labels)
                    logits = model(imgs_m)
                    loss = criterion(logits, targets)
                else:
                    logits = model(imgs)
                    loss = criterion(logits, labels)
            loss = loss / accum_steps
            scaler.scale(loss).backward()
            steps_this_epoch += 1
            if (step + 1) % accum_steps == 0:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                scaler.step(optimizer)
                scaler.update()
                ema.update(model)
                optimizer.zero_grad(set_to_none=True)
            running_loss += loss.item() * accum_steps
            global_step += 1
            if step % 50 == 0:
                elapsed = time.time()-t0
                print(f"Fold {fold} Epoch {epoch} Step {step} Loss {running_loss/(step+1):.4f} Elapsed {elapsed:.1f}s", flush=True)
        # Flush remaining grads if last micro-batch didn't hit accum boundary
        remainder = steps_this_epoch % accum_steps
        if remainder != 0:
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            ema.update(model)
            optimizer.zero_grad(set_to_none=True)

        # Evaluate using EMA weights
        print("Evaluating on mini-val (1/img per class, cap 10k)", flush=True)
        mini_f1 = evaluate(ema.module, dl_mini, max_batches=None)
        # Cap full val; increase cap on final epoch for better selection stability
        max_val_batches = 1500 if epoch == epochs else 500
        print(f"Evaluating on capped validation: {max_val_batches} batches", flush=True)
        f1 = evaluate(ema.module, dl_va, max_batches=max_val_batches)
        sel_score = 0.7 * mini_f1 + 0.3 * f1
        ep_time = time.time()-t0
        print(f"Fold {fold} Epoch {epoch} mini-F1 {mini_f1:.5f} | F1 {f1:.5f} | sel {sel_score:.5f} time {ep_time:.1f}s", flush=True)
        if sel_score > best_score:
            best_score = sel_score
            best_f1 = f1
            best_mini = mini_f1
            torch.save({'model': model.state_dict(), 'ema': ema.state_dict(), 'f1': f1, 'mini_f1': mini_f1, 'sel': sel_score}, best_path)
            print(f"Saved best to {best_path}", flush=True)
        # Step scheduler once per epoch
        scheduler.step()
        gc.collect(); torch.cuda.empty_cache()
    print(f"Fold {fold} best sel {best_score:.5f} (mini {best_mini:.5f} | f1 {best_f1:.5f})", flush=True)
    return best_f1

def build_test_df_in_submission_order():
    sub = pd.read_csv(SAMPLE_SUB)
    # Ensure consistent dtypes for merge
    sub['Id'] = sub['Id'].astype(int)
    te_tmp = te_imgs.copy()
    te_tmp['id'] = te_tmp['id'].astype(int)
    df = sub[['Id']].merge(te_tmp[['id','file_path']], left_on='Id', right_on='id', how='left')
    return df

def infer_fold(fold, batch_size=128):
    ckpt_path = f'ckpt_fold{fold}.pt'
    state = torch.load(ckpt_path, map_location='cpu')
    model = build_model()
    model.load_state_dict(state['model'], strict=True)
    # Rebuild EMA and load
    ema = ModelEmaV2(model, decay=0.999, device=device)
    if 'ema' in state:
        ema.load_state_dict(state['ema'], strict=False)
    m = ema.module if 'ema' in state else model
    m.eval()
    test_df = build_test_df_in_submission_order()
    # Build a test dataset that returns tensors (dummy labels)
    tmp = test_df.copy()
    tmp['label'] = 0
    class TestDataset(Dataset):
        def __init__(self, df, tfms):
            self.df = df.reset_index(drop=True); self.tfms = tfms
        def __len__(self): return len(self.df)
        def __getitem__(self, i):
            fp = self.df.iloc[i]['file_path']
            try:
                img = Image.open(fp).convert('RGB')
            except Exception:
                img = Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))
            img = self.tfms(img)
            return img, 0
    ds_te = TestDataset(tmp, val_tfms)
    dl_te = DataLoader(ds_te, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True)
    pred_lbl = []
    with torch.no_grad():
        for imgs, _ in dl_te:
            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
            # TTA 2x: center + hflip
            logits1 = m(imgs)
            imgs_flipped = torch.flip(imgs, dims=[3])
            logits2 = m(imgs_flipped)
            logits = (logits1 + logits2) / 2.0
            pred_lbl.append(logits.argmax(1).detach().cpu().numpy())
    pred_lbl = np.concatenate(pred_lbl).astype(np.int32)
    np.save(f'test_predlbl_fold{fold}.npy', pred_lbl)
    return pred_lbl

# Orchestrate: run folds and generate submission
def train_and_predict(epochs=9, batch_size=64, run_folds=(0,1)):
    fold_scores = []
    for fold, (tr, va) in enumerate(folds):
        if fold not in run_folds:
            continue
        print(f"Starting fold {fold} with {len(tr)} train idx and {len(va)} val idx", flush=True)
        f1 = run_fold(fold, tr, va, epochs=epochs, batch_size=batch_size, accum_steps=4)
        fold_scores.append((fold, f1))
    print('Fold scores:', fold_scores, flush=True)
    # Inference and ensembling (majority vote on per-fold label preds)
    fold_preds = []
    for fold, _ in fold_scores:
        preds = infer_fold(fold, batch_size=128)
        fold_preds.append(preds)
    if len(fold_preds) == 1:
        vote = fold_preds[0]
    else:
        stack = np.stack(fold_preds, 0)
        vote = np.apply_along_axis(lambda x: np.bincount(x).argmax(), 0, stack)
    pred_cat = [lbl2cat[int(x)] for x in vote]
    sub = pd.DataFrame({ 'Id': pd.read_csv(SAMPLE_SUB)['Id'].values, 'Predicted': pred_cat })
    sub.to_csv('submission.csv', index=False)
    print('Wrote submission.csv with shape', sub.shape, flush=True)

print('Ready: IMG_SIZE=256; CV=2 folds with singleton-excluded val. CE+LS epochs 1-3; mixup on from epoch 4..E-1; last epoch CE+LS; 300k samples/epoch; EMA=0.999; grad accumulation x4; scheduler step per epoch. Checkpoint by 0.7*mini + 0.3*capped F1; final epoch cap 1500 batches.', flush=True)

NUM_CLASSES: 64500 train images: 1779953
Singletons (train-only): 7745


Prepared folds: 2 | fold0 train/val sizes: 893849 886104
Ready: IMG_SIZE=256; CV=2 folds with singleton-excluded val. CE+LS epochs 1-3; mixup on from epoch 4..E-1; last epoch CE+LS; 300k samples/epoch; EMA=0.999; grad accumulation x4; scheduler step per epoch. Checkpoint by 0.7*mini + 0.3*capped F1; final epoch cap 1500 batches.


In [None]:
# Quick smoke run on a small subset to validate pipeline end-to-end
import time, numpy as np, pandas as pd
start_time = time.time()
print('Preparing subset for smoke run...', flush=True)

# Subset size (keep small to finish fast); training still outputs 64.5k classes
N = 20000
if len(train_df) > N:
    sub_df = train_df.sample(N, random_state=42).reset_index(drop=True)
    train_df = sub_df
    # Recompute stratification bins on subset
    cls_counts = train_df['category_id'].value_counts()
    freq = train_df['category_id'].map(cls_counts)
    try:
        y_strat = pd.qcut(freq, q=10, duplicates='drop').astype(str)
    except Exception:
        y_strat = pd.cut(freq, bins=10, include_lowest=True).astype(str)
    # Rebuild folds on subset
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    folds = list(skf.split(train_df.index.values, y_strat))
    print('Subset prepared:', len(train_df), 'rows; folds rebuilt:', len(folds), flush=True)
else:
    print('Dataset smaller than subset target; using full set.', flush=True)

print('Starting quick smoke run (1 epoch, fold 0)...', flush=True)
train_and_predict(epochs=1, batch_size=16, run_folds=(0,))
print('Smoke run elapsed: %.1f min' % ((time.time()-start_time)/60.0), flush=True)

In [None]:
import sys, subprocess, time
t0=time.time()
print('Fixing albumentations/albucore mismatch...', flush=True)
def pip(*args):
    print('>', *args, flush=True)
    subprocess.run([sys.executable, '-m', 'pip', *args], check=True)
subprocess.run([sys.executable, '-m', 'pip', 'uninstall', '-y', 'albucore'], check=False)
pip('install', '-c', 'constraints.txt', 'albumentations==1.3.1', '--upgrade-strategy', 'only-if-needed')
import albumentations as A
print('albumentations version:', A.__version__)
print('Done in %.1fs' % (time.time()-t0))

In [None]:
import sys, subprocess, shutil, os, time
t0=time.time()
print('Force-reinstall albumentations 1.3.1 and remove albucore remnants...', flush=True)
def run(*args, check=True):
    print('>', *args, flush=True)
    subprocess.run([sys.executable, '-m', 'pip', *args], check=check)

# Uninstall both to avoid mixed files
run('uninstall', '-y', 'albumentations')
run('uninstall', '-y', 'albucore')

# Clean any stray site dirs that might shadow
for d in (
    '/app/.pip-target/albumentations',
    '/app/.pip-target/albumentations-1.4.14.dist-info',
    '/app/.pip-target/albumentations-1.3.1.dist-info',
    '/app/.pip-target/albucore',
    '/app/.pip-target/albucore-0.0.33.dist-info',
):
    if os.path.exists(d):
        print('Removing', d); shutil.rmtree(d, ignore_errors=True)

# Force reinstall desired version (no albucore dependency)
run('install', 'albumentations==1.3.1', '--no-cache-dir', '--upgrade', '--force-reinstall')

import albumentations as A
print('albumentations version:', A.__version__)
print('Done in %.1fs' % (time.time()-t0), flush=True)

In [24]:
# Train-only extension to 9 epochs (avoid heavy test inference here)
import time
t0 = time.time()
print('Training fold 0 @256, bs=64, epochs=9 (no test inference at end)...', flush=True)
tr_idx, va_idx = folds[0]
best_f1 = run_fold(0, tr_idx, va_idx, epochs=9, batch_size=64, accum_steps=4)
print('Fold0 training complete. Best val F1:', best_f1, flush=True)
print('Elapsed: %.1f min' % ((time.time()-t0)/60.0), flush=True)

Training fold 0 @256, bs=64, epochs=9 (no test inference at end)...



=== Fold 0 ===


  mini_va = va_df.groupby('label', group_keys=False).apply(lambda g: g.sample(1, random_state=42))


Fold 0 Epoch 1 Step 0 Loss 11.0847 Elapsed 1.7s


Fold 0 Epoch 1 Step 50 Loss 11.0946 Elapsed 9.8s


Fold 0 Epoch 1 Step 100 Loss 11.0833 Elapsed 17.9s


Fold 0 Epoch 1 Step 150 Loss 11.0769 Elapsed 26.0s


Fold 0 Epoch 1 Step 200 Loss 11.0722 Elapsed 34.2s


Fold 0 Epoch 1 Step 250 Loss 11.0680 Elapsed 42.4s


Fold 0 Epoch 1 Step 300 Loss 11.0635 Elapsed 50.6s


Fold 0 Epoch 1 Step 350 Loss 11.0596 Elapsed 58.8s


Fold 0 Epoch 1 Step 400 Loss 11.0576 Elapsed 67.0s


Fold 0 Epoch 1 Step 450 Loss 11.0543 Elapsed 75.2s


Fold 0 Epoch 1 Step 500 Loss 11.0522 Elapsed 83.4s


Fold 0 Epoch 1 Step 550 Loss 11.0501 Elapsed 91.6s


Fold 0 Epoch 1 Step 600 Loss 11.0483 Elapsed 99.9s


Fold 0 Epoch 1 Step 650 Loss 11.0459 Elapsed 108.1s


Fold 0 Epoch 1 Step 700 Loss 11.0444 Elapsed 116.4s


Fold 0 Epoch 1 Step 750 Loss 11.0422 Elapsed 124.6s


Fold 0 Epoch 1 Step 800 Loss 11.0410 Elapsed 132.9s


Fold 0 Epoch 1 Step 850 Loss 11.0390 Elapsed 141.1s


Fold 0 Epoch 1 Step 900 Loss 11.0379 Elapsed 149.4s


Fold 0 Epoch 1 Step 950 Loss 11.0362 Elapsed 157.7s


Fold 0 Epoch 1 Step 1000 Loss 11.0345 Elapsed 166.0s


Fold 0 Epoch 1 Step 1050 Loss 11.0328 Elapsed 174.3s


Fold 0 Epoch 1 Step 1100 Loss 11.0312 Elapsed 182.7s


Fold 0 Epoch 1 Step 1150 Loss 11.0293 Elapsed 191.0s


Fold 0 Epoch 1 Step 1200 Loss 11.0269 Elapsed 199.3s


Fold 0 Epoch 1 Step 1250 Loss 11.0244 Elapsed 207.7s


Fold 0 Epoch 1 Step 1300 Loss 11.0218 Elapsed 216.1s


Fold 0 Epoch 1 Step 1350 Loss 11.0182 Elapsed 224.4s


Fold 0 Epoch 1 Step 1400 Loss 11.0144 Elapsed 232.8s


Fold 0 Epoch 1 Step 1450 Loss 11.0097 Elapsed 241.2s


Fold 0 Epoch 1 Step 1500 Loss 11.0057 Elapsed 249.6s


Fold 0 Epoch 1 Step 1550 Loss 11.0016 Elapsed 257.9s


Fold 0 Epoch 1 Step 1600 Loss 10.9967 Elapsed 266.3s


Fold 0 Epoch 1 Step 1650 Loss 10.9917 Elapsed 274.7s


Fold 0 Epoch 1 Step 1700 Loss 10.9866 Elapsed 283.1s


Fold 0 Epoch 1 Step 1750 Loss 10.9825 Elapsed 291.5s


Fold 0 Epoch 1 Step 1800 Loss 10.9772 Elapsed 299.9s


Fold 0 Epoch 1 Step 1850 Loss 10.9718 Elapsed 308.2s


Fold 0 Epoch 1 Step 1900 Loss 10.9660 Elapsed 316.6s


Fold 0 Epoch 1 Step 1950 Loss 10.9603 Elapsed 325.0s


Fold 0 Epoch 1 Step 2000 Loss 10.9551 Elapsed 333.4s


Fold 0 Epoch 1 Step 2050 Loss 10.9495 Elapsed 341.8s


Fold 0 Epoch 1 Step 2100 Loss 10.9437 Elapsed 350.2s


Fold 0 Epoch 1 Step 2150 Loss 10.9374 Elapsed 358.5s


Fold 0 Epoch 1 Step 2200 Loss 10.9324 Elapsed 366.9s


Fold 0 Epoch 1 Step 2250 Loss 10.9254 Elapsed 375.3s


Fold 0 Epoch 1 Step 2300 Loss 10.9196 Elapsed 383.7s


Fold 0 Epoch 1 Step 2350 Loss 10.9137 Elapsed 392.0s


Fold 0 Epoch 1 Step 2400 Loss 10.9078 Elapsed 400.4s


Fold 0 Epoch 1 Step 2450 Loss 10.9018 Elapsed 408.8s


Fold 0 Epoch 1 Step 2500 Loss 10.8953 Elapsed 417.2s


Fold 0 Epoch 1 Step 2550 Loss 10.8895 Elapsed 425.6s


Fold 0 Epoch 1 Step 2600 Loss 10.8836 Elapsed 434.0s


Fold 0 Epoch 1 Step 2650 Loss 10.8777 Elapsed 442.4s


Fold 0 Epoch 1 Step 2700 Loss 10.8716 Elapsed 450.8s


Fold 0 Epoch 1 Step 2750 Loss 10.8653 Elapsed 459.2s


Fold 0 Epoch 1 Step 2800 Loss 10.8582 Elapsed 467.6s


Fold 0 Epoch 1 Step 2850 Loss 10.8516 Elapsed 476.0s


Fold 0 Epoch 1 Step 2900 Loss 10.8445 Elapsed 484.5s


Fold 0 Epoch 1 Step 2950 Loss 10.8378 Elapsed 492.8s


Fold 0 Epoch 1 Step 3000 Loss 10.8311 Elapsed 501.3s


Fold 0 Epoch 1 Step 3050 Loss 10.8245 Elapsed 509.7s


Fold 0 Epoch 1 Step 3100 Loss 10.8177 Elapsed 518.1s


Fold 0 Epoch 1 Step 3150 Loss 10.8105 Elapsed 526.5s


Fold 0 Epoch 1 Step 3200 Loss 10.8028 Elapsed 534.9s


Fold 0 Epoch 1 Step 3250 Loss 10.7960 Elapsed 543.3s


Fold 0 Epoch 1 Step 3300 Loss 10.7886 Elapsed 551.7s


Fold 0 Epoch 1 Step 3350 Loss 10.7812 Elapsed 560.1s


Fold 0 Epoch 1 Step 3400 Loss 10.7740 Elapsed 568.5s


Fold 0 Epoch 1 Step 3450 Loss 10.7665 Elapsed 576.9s


Fold 0 Epoch 1 Step 3500 Loss 10.7580 Elapsed 585.3s


Fold 0 Epoch 1 Step 3550 Loss 10.7501 Elapsed 593.7s


Fold 0 Epoch 1 Step 3600 Loss 10.7419 Elapsed 602.1s


Fold 0 Epoch 1 Step 3650 Loss 10.7334 Elapsed 610.6s


Fold 0 Epoch 1 Step 3700 Loss 10.7251 Elapsed 619.0s


Fold 0 Epoch 1 Step 3750 Loss 10.7170 Elapsed 627.4s


Fold 0 Epoch 1 Step 3800 Loss 10.7084 Elapsed 635.8s


Fold 0 Epoch 1 Step 3850 Loss 10.6999 Elapsed 644.2s


Fold 0 Epoch 1 Step 3900 Loss 10.6914 Elapsed 652.7s


Fold 0 Epoch 1 Step 3950 Loss 10.6822 Elapsed 661.1s


Fold 0 Epoch 1 Step 4000 Loss 10.6735 Elapsed 669.6s


Fold 0 Epoch 1 Step 4050 Loss 10.6645 Elapsed 678.0s


Fold 0 Epoch 1 Step 4100 Loss 10.6551 Elapsed 686.4s


Fold 0 Epoch 1 Step 4150 Loss 10.6459 Elapsed 694.9s


Fold 0 Epoch 1 Step 4200 Loss 10.6367 Elapsed 703.3s


Fold 0 Epoch 1 Step 4250 Loss 10.6270 Elapsed 711.7s


Fold 0 Epoch 1 Step 4300 Loss 10.6170 Elapsed 720.2s


Fold 0 Epoch 1 Step 4350 Loss 10.6068 Elapsed 728.6s


Fold 0 Epoch 1 Step 4400 Loss 10.5971 Elapsed 737.1s


Fold 0 Epoch 1 Step 4450 Loss 10.5866 Elapsed 745.5s


Fold 0 Epoch 1 Step 4500 Loss 10.5761 Elapsed 754.0s


Fold 0 Epoch 1 Step 4550 Loss 10.5658 Elapsed 762.4s


Fold 0 Epoch 1 Step 4600 Loss 10.5557 Elapsed 770.8s


Fold 0 Epoch 1 Step 4650 Loss 10.5455 Elapsed 779.3s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


Fold 0 Epoch 1 mini-F1 0.00005 | F1 0.00147 | sel 0.00048 time 839.3s


Saved best to ckpt_fold0.pt




Fold 0 Epoch 2 Step 0 Loss 9.6302 Elapsed 0.7s


Fold 0 Epoch 2 Step 50 Loss 10.3585 Elapsed 9.1s


Fold 0 Epoch 2 Step 100 Loss 10.4840 Elapsed 17.4s


Fold 0 Epoch 2 Step 150 Loss 10.5256 Elapsed 25.7s


Fold 0 Epoch 2 Step 200 Loss 10.5332 Elapsed 34.1s


Fold 0 Epoch 2 Step 250 Loss 10.5341 Elapsed 42.5s


Fold 0 Epoch 2 Step 300 Loss 10.5231 Elapsed 50.8s


Fold 0 Epoch 2 Step 350 Loss 10.5196 Elapsed 59.2s


Fold 0 Epoch 2 Step 400 Loss 10.5110 Elapsed 67.6s


Fold 0 Epoch 2 Step 450 Loss 10.5008 Elapsed 75.9s


Fold 0 Epoch 2 Step 500 Loss 10.4918 Elapsed 84.3s


Fold 0 Epoch 2 Step 550 Loss 10.4786 Elapsed 92.7s


Fold 0 Epoch 2 Step 600 Loss 10.4654 Elapsed 101.0s


Fold 0 Epoch 2 Step 650 Loss 10.4506 Elapsed 109.4s


Fold 0 Epoch 2 Step 700 Loss 10.4343 Elapsed 117.8s


Fold 0 Epoch 2 Step 750 Loss 10.4185 Elapsed 126.2s


Fold 0 Epoch 2 Step 800 Loss 10.4021 Elapsed 134.5s


Fold 0 Epoch 2 Step 850 Loss 10.3879 Elapsed 142.9s


Fold 0 Epoch 2 Step 900 Loss 10.3741 Elapsed 151.3s


Fold 0 Epoch 2 Step 950 Loss 10.3607 Elapsed 159.7s


Fold 0 Epoch 2 Step 1000 Loss 10.3433 Elapsed 168.1s


Fold 0 Epoch 2 Step 1050 Loss 10.3280 Elapsed 176.5s


Fold 0 Epoch 2 Step 1100 Loss 10.3098 Elapsed 184.9s


Fold 0 Epoch 2 Step 1150 Loss 10.2950 Elapsed 193.2s


Fold 0 Epoch 2 Step 1200 Loss 10.2768 Elapsed 201.7s


Fold 0 Epoch 2 Step 1250 Loss 10.2594 Elapsed 210.0s


Fold 0 Epoch 2 Step 1300 Loss 10.2435 Elapsed 218.5s


Fold 0 Epoch 2 Step 1350 Loss 10.2272 Elapsed 226.9s


Fold 0 Epoch 2 Step 1400 Loss 10.2110 Elapsed 235.3s


Fold 0 Epoch 2 Step 1450 Loss 10.1951 Elapsed 243.7s


Fold 0 Epoch 2 Step 1500 Loss 10.1776 Elapsed 252.1s


Fold 0 Epoch 2 Step 1550 Loss 10.1600 Elapsed 260.5s


Fold 0 Epoch 2 Step 1600 Loss 10.1424 Elapsed 269.0s


Fold 0 Epoch 2 Step 1650 Loss 10.1272 Elapsed 277.4s


Fold 0 Epoch 2 Step 1700 Loss 10.1119 Elapsed 285.8s


Fold 0 Epoch 2 Step 1750 Loss 10.0970 Elapsed 294.3s


Fold 0 Epoch 2 Step 1800 Loss 10.0808 Elapsed 302.7s


Fold 0 Epoch 2 Step 1850 Loss 10.0660 Elapsed 311.1s


Fold 0 Epoch 2 Step 1900 Loss 10.0505 Elapsed 319.6s


Fold 0 Epoch 2 Step 1950 Loss 10.0336 Elapsed 328.0s


Fold 0 Epoch 2 Step 2000 Loss 10.0162 Elapsed 336.4s


Fold 0 Epoch 2 Step 2050 Loss 9.9988 Elapsed 344.8s


Fold 0 Epoch 2 Step 2100 Loss 9.9830 Elapsed 353.3s


Fold 0 Epoch 2 Step 2150 Loss 9.9661 Elapsed 361.7s


Fold 0 Epoch 2 Step 2200 Loss 9.9486 Elapsed 370.1s


Fold 0 Epoch 2 Step 2250 Loss 9.9312 Elapsed 378.5s


Fold 0 Epoch 2 Step 2300 Loss 9.9135 Elapsed 386.9s


Fold 0 Epoch 2 Step 2350 Loss 9.8980 Elapsed 395.3s


Fold 0 Epoch 2 Step 2400 Loss 9.8817 Elapsed 403.7s


Fold 0 Epoch 2 Step 2450 Loss 9.8639 Elapsed 412.1s


Fold 0 Epoch 2 Step 2500 Loss 9.8466 Elapsed 420.5s


Fold 0 Epoch 2 Step 2550 Loss 9.8301 Elapsed 428.9s


Fold 0 Epoch 2 Step 2600 Loss 9.8132 Elapsed 437.3s


Fold 0 Epoch 2 Step 2650 Loss 9.7972 Elapsed 445.6s


Fold 0 Epoch 2 Step 2700 Loss 9.7800 Elapsed 454.0s


Fold 0 Epoch 2 Step 2750 Loss 9.7626 Elapsed 462.4s


Fold 0 Epoch 2 Step 2800 Loss 9.7456 Elapsed 470.8s


Fold 0 Epoch 2 Step 2850 Loss 9.7288 Elapsed 479.2s


Fold 0 Epoch 2 Step 2900 Loss 9.7123 Elapsed 487.6s


Fold 0 Epoch 2 Step 2950 Loss 9.6938 Elapsed 495.9s


Fold 0 Epoch 2 Step 3000 Loss 9.6757 Elapsed 504.3s


Fold 0 Epoch 2 Step 3050 Loss 9.6575 Elapsed 512.7s


Fold 0 Epoch 2 Step 3100 Loss 9.6401 Elapsed 521.1s


Fold 0 Epoch 2 Step 3150 Loss 9.6229 Elapsed 529.5s


Fold 0 Epoch 2 Step 3200 Loss 9.6058 Elapsed 537.9s


Fold 0 Epoch 2 Step 3250 Loss 9.5881 Elapsed 546.3s


Fold 0 Epoch 2 Step 3300 Loss 9.5705 Elapsed 554.8s


Fold 0 Epoch 2 Step 3350 Loss 9.5538 Elapsed 563.2s


Fold 0 Epoch 2 Step 3400 Loss 9.5368 Elapsed 571.6s


Fold 0 Epoch 2 Step 3450 Loss 9.5190 Elapsed 580.0s


Fold 0 Epoch 2 Step 3500 Loss 9.5018 Elapsed 588.4s


Fold 0 Epoch 2 Step 3550 Loss 9.4845 Elapsed 596.8s


Fold 0 Epoch 2 Step 3600 Loss 9.4676 Elapsed 605.2s


Fold 0 Epoch 2 Step 3650 Loss 9.4509 Elapsed 613.6s


Fold 0 Epoch 2 Step 3700 Loss 9.4343 Elapsed 622.0s


Fold 0 Epoch 2 Step 3750 Loss 9.4172 Elapsed 630.4s


Fold 0 Epoch 2 Step 3800 Loss 9.3998 Elapsed 638.9s


Fold 0 Epoch 2 Step 3850 Loss 9.3830 Elapsed 647.3s


Fold 0 Epoch 2 Step 3900 Loss 9.3659 Elapsed 655.7s


Fold 0 Epoch 2 Step 3950 Loss 9.3488 Elapsed 664.1s


Fold 0 Epoch 2 Step 4000 Loss 9.3319 Elapsed 672.5s


Fold 0 Epoch 2 Step 4050 Loss 9.3152 Elapsed 681.0s


Fold 0 Epoch 2 Step 4100 Loss 9.2975 Elapsed 689.4s


Fold 0 Epoch 2 Step 4150 Loss 9.2811 Elapsed 697.8s


Fold 0 Epoch 2 Step 4200 Loss 9.2649 Elapsed 706.2s


Fold 0 Epoch 2 Step 4250 Loss 9.2473 Elapsed 714.6s


Fold 0 Epoch 2 Step 4300 Loss 9.2300 Elapsed 723.1s


Fold 0 Epoch 2 Step 4350 Loss 9.2131 Elapsed 731.5s


Fold 0 Epoch 2 Step 4400 Loss 9.1973 Elapsed 739.9s


Fold 0 Epoch 2 Step 4450 Loss 9.1802 Elapsed 748.3s


Fold 0 Epoch 2 Step 4500 Loss 9.1630 Elapsed 756.8s


Fold 0 Epoch 2 Step 4550 Loss 9.1471 Elapsed 765.2s


Fold 0 Epoch 2 Step 4600 Loss 9.1296 Elapsed 773.6s


Fold 0 Epoch 2 Step 4650 Loss 9.1134 Elapsed 782.1s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


Fold 0 Epoch 2 mini-F1 0.00346 | F1 0.01624 | sel 0.00729 time 840.7s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 3 Step 0 Loss 7.5914 Elapsed 0.6s


Fold 0 Epoch 3 Step 50 Loss 7.5384 Elapsed 8.9s


Fold 0 Epoch 3 Step 100 Loss 7.5344 Elapsed 17.3s


Fold 0 Epoch 3 Step 150 Loss 7.4998 Elapsed 25.7s


Fold 0 Epoch 3 Step 200 Loss 7.4715 Elapsed 34.0s


Fold 0 Epoch 3 Step 250 Loss 7.4642 Elapsed 42.4s


Fold 0 Epoch 3 Step 300 Loss 7.4620 Elapsed 50.8s


Fold 0 Epoch 3 Step 350 Loss 7.4469 Elapsed 59.1s


Fold 0 Epoch 3 Step 400 Loss 7.4382 Elapsed 67.5s


Fold 0 Epoch 3 Step 450 Loss 7.4291 Elapsed 75.9s


Fold 0 Epoch 3 Step 500 Loss 7.4257 Elapsed 84.3s


Fold 0 Epoch 3 Step 550 Loss 7.4088 Elapsed 92.7s


Fold 0 Epoch 3 Step 600 Loss 7.3917 Elapsed 101.1s


Fold 0 Epoch 3 Step 650 Loss 7.3815 Elapsed 109.4s


Fold 0 Epoch 3 Step 700 Loss 7.3679 Elapsed 117.8s


Fold 0 Epoch 3 Step 750 Loss 7.3553 Elapsed 126.2s


Fold 0 Epoch 3 Step 800 Loss 7.3406 Elapsed 134.6s


Fold 0 Epoch 3 Step 850 Loss 7.3248 Elapsed 143.0s


Fold 0 Epoch 3 Step 900 Loss 7.3198 Elapsed 151.4s


Fold 0 Epoch 3 Step 950 Loss 7.3062 Elapsed 159.8s


Fold 0 Epoch 3 Step 1000 Loss 7.2974 Elapsed 168.2s


Fold 0 Epoch 3 Step 1050 Loss 7.2827 Elapsed 176.5s


Fold 0 Epoch 3 Step 1100 Loss 7.2696 Elapsed 184.9s


Fold 0 Epoch 3 Step 1150 Loss 7.2591 Elapsed 193.3s


Fold 0 Epoch 3 Step 1200 Loss 7.2512 Elapsed 201.7s


Fold 0 Epoch 3 Step 1250 Loss 7.2455 Elapsed 210.1s


Fold 0 Epoch 3 Step 1300 Loss 7.2369 Elapsed 218.5s


Fold 0 Epoch 3 Step 1350 Loss 7.2259 Elapsed 226.9s


Fold 0 Epoch 3 Step 1400 Loss 7.2194 Elapsed 235.3s


Fold 0 Epoch 3 Step 1450 Loss 7.2091 Elapsed 243.7s


Fold 0 Epoch 3 Step 1500 Loss 7.1993 Elapsed 252.2s


Fold 0 Epoch 3 Step 1550 Loss 7.1926 Elapsed 260.6s


Fold 0 Epoch 3 Step 1600 Loss 7.1778 Elapsed 269.0s


Fold 0 Epoch 3 Step 1650 Loss 7.1653 Elapsed 277.4s


Fold 0 Epoch 3 Step 1700 Loss 7.1571 Elapsed 285.8s


Fold 0 Epoch 3 Step 1750 Loss 7.1441 Elapsed 294.2s


Fold 0 Epoch 3 Step 1800 Loss 7.1313 Elapsed 302.7s


Fold 0 Epoch 3 Step 1850 Loss 7.1197 Elapsed 311.1s


Fold 0 Epoch 3 Step 1900 Loss 7.1102 Elapsed 319.5s


Fold 0 Epoch 3 Step 1950 Loss 7.1013 Elapsed 328.0s


Fold 0 Epoch 3 Step 2000 Loss 7.0913 Elapsed 336.4s


Fold 0 Epoch 3 Step 2050 Loss 7.0829 Elapsed 344.8s


Fold 0 Epoch 3 Step 2100 Loss 7.0710 Elapsed 353.2s


Fold 0 Epoch 3 Step 2150 Loss 7.0586 Elapsed 361.6s


Fold 0 Epoch 3 Step 2200 Loss 7.0492 Elapsed 370.0s


Fold 0 Epoch 3 Step 2250 Loss 7.0394 Elapsed 378.4s


Fold 0 Epoch 3 Step 2300 Loss 7.0286 Elapsed 386.9s


Fold 0 Epoch 3 Step 2350 Loss 7.0202 Elapsed 395.3s


Fold 0 Epoch 3 Step 2400 Loss 7.0105 Elapsed 403.7s


Fold 0 Epoch 3 Step 2450 Loss 6.9999 Elapsed 412.1s


Fold 0 Epoch 3 Step 2500 Loss 6.9894 Elapsed 420.5s


Fold 0 Epoch 3 Step 2550 Loss 6.9774 Elapsed 428.9s


Fold 0 Epoch 3 Step 2600 Loss 6.9673 Elapsed 437.3s


Fold 0 Epoch 3 Step 2650 Loss 6.9596 Elapsed 445.7s


Fold 0 Epoch 3 Step 2700 Loss 6.9535 Elapsed 454.1s


Fold 0 Epoch 3 Step 2750 Loss 6.9454 Elapsed 462.5s


Fold 0 Epoch 3 Step 2800 Loss 6.9365 Elapsed 470.9s


Fold 0 Epoch 3 Step 2850 Loss 6.9263 Elapsed 479.3s


Fold 0 Epoch 3 Step 2900 Loss 6.9160 Elapsed 487.7s


Fold 0 Epoch 3 Step 2950 Loss 6.9047 Elapsed 496.1s


Fold 0 Epoch 3 Step 3000 Loss 6.8936 Elapsed 504.5s


Fold 0 Epoch 3 Step 3050 Loss 6.8857 Elapsed 512.9s


Fold 0 Epoch 3 Step 3100 Loss 6.8750 Elapsed 521.3s


Fold 0 Epoch 3 Step 3150 Loss 6.8632 Elapsed 529.7s


Fold 0 Epoch 3 Step 3200 Loss 6.8536 Elapsed 538.1s


Fold 0 Epoch 3 Step 3250 Loss 6.8444 Elapsed 546.5s


Fold 0 Epoch 3 Step 3300 Loss 6.8372 Elapsed 554.9s


Fold 0 Epoch 3 Step 3350 Loss 6.8318 Elapsed 563.3s


Fold 0 Epoch 3 Step 3400 Loss 6.8238 Elapsed 571.7s


Fold 0 Epoch 3 Step 3450 Loss 6.8156 Elapsed 580.1s


Fold 0 Epoch 3 Step 3500 Loss 6.8080 Elapsed 588.5s


Fold 0 Epoch 3 Step 3550 Loss 6.7978 Elapsed 596.9s


Fold 0 Epoch 3 Step 3600 Loss 6.7881 Elapsed 605.3s


Fold 0 Epoch 3 Step 3650 Loss 6.7806 Elapsed 613.7s


Fold 0 Epoch 3 Step 3700 Loss 6.7731 Elapsed 622.1s


Fold 0 Epoch 3 Step 3750 Loss 6.7644 Elapsed 630.5s


Fold 0 Epoch 3 Step 3800 Loss 6.7554 Elapsed 638.9s


Fold 0 Epoch 3 Step 3850 Loss 6.7457 Elapsed 647.3s


Fold 0 Epoch 3 Step 3900 Loss 6.7383 Elapsed 655.7s


Fold 0 Epoch 3 Step 3950 Loss 6.7316 Elapsed 664.1s


Fold 0 Epoch 3 Step 4000 Loss 6.7241 Elapsed 672.5s


Fold 0 Epoch 3 Step 4050 Loss 6.7172 Elapsed 680.9s


Fold 0 Epoch 3 Step 4100 Loss 6.7098 Elapsed 689.3s


Fold 0 Epoch 3 Step 4150 Loss 6.7028 Elapsed 697.7s


Fold 0 Epoch 3 Step 4200 Loss 6.6961 Elapsed 706.1s


Fold 0 Epoch 3 Step 4250 Loss 6.6900 Elapsed 714.5s


Fold 0 Epoch 3 Step 4300 Loss 6.6820 Elapsed 723.0s


Fold 0 Epoch 3 Step 4350 Loss 6.6742 Elapsed 731.4s


Fold 0 Epoch 3 Step 4400 Loss 6.6665 Elapsed 739.8s


Fold 0 Epoch 3 Step 4450 Loss 6.6612 Elapsed 748.2s


Fold 0 Epoch 3 Step 4500 Loss 6.6551 Elapsed 756.7s


Fold 0 Epoch 3 Step 4550 Loss 6.6488 Elapsed 765.1s


Fold 0 Epoch 3 Step 4600 Loss 6.6430 Elapsed 773.5s


Fold 0 Epoch 3 Step 4650 Loss 6.6366 Elapsed 781.9s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


Fold 0 Epoch 3 mini-F1 0.03784 | F1 0.02877 | sel 0.03512 time 840.5s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 4 Step 0 Loss 4.5660 Elapsed 0.7s


Fold 0 Epoch 4 Step 50 Loss 4.7806 Elapsed 9.1s


Fold 0 Epoch 4 Step 100 Loss 4.7687 Elapsed 17.5s


Fold 0 Epoch 4 Step 150 Loss 4.7743 Elapsed 25.8s


Fold 0 Epoch 4 Step 200 Loss 4.7975 Elapsed 34.2s


Fold 0 Epoch 4 Step 250 Loss 4.8168 Elapsed 42.6s


Fold 0 Epoch 4 Step 300 Loss 4.7973 Elapsed 51.0s


Fold 0 Epoch 4 Step 350 Loss 4.7891 Elapsed 59.4s


Fold 0 Epoch 4 Step 400 Loss 4.7651 Elapsed 67.8s


Fold 0 Epoch 4 Step 450 Loss 4.7521 Elapsed 76.2s


Fold 0 Epoch 4 Step 500 Loss 4.7431 Elapsed 84.6s


Fold 0 Epoch 4 Step 550 Loss 4.7290 Elapsed 93.0s


Fold 0 Epoch 4 Step 600 Loss 4.7122 Elapsed 101.5s


Fold 0 Epoch 4 Step 650 Loss 4.6946 Elapsed 109.9s


Fold 0 Epoch 4 Step 700 Loss 4.6803 Elapsed 118.3s


Fold 0 Epoch 4 Step 750 Loss 4.6599 Elapsed 126.7s


Fold 0 Epoch 4 Step 800 Loss 4.6449 Elapsed 135.1s


Fold 0 Epoch 4 Step 850 Loss 4.6334 Elapsed 143.5s


Fold 0 Epoch 4 Step 900 Loss 4.6288 Elapsed 152.0s


Fold 0 Epoch 4 Step 950 Loss 4.6160 Elapsed 160.4s


Fold 0 Epoch 4 Step 1000 Loss 4.6066 Elapsed 168.8s


Fold 0 Epoch 4 Step 1050 Loss 4.5966 Elapsed 177.2s


Fold 0 Epoch 4 Step 1100 Loss 4.5900 Elapsed 185.6s


Fold 0 Epoch 4 Step 1150 Loss 4.5865 Elapsed 194.1s


Fold 0 Epoch 4 Step 1200 Loss 4.5778 Elapsed 202.5s


Fold 0 Epoch 4 Step 1250 Loss 4.5627 Elapsed 210.9s


Fold 0 Epoch 4 Step 1300 Loss 4.5500 Elapsed 219.3s


Fold 0 Epoch 4 Step 1350 Loss 4.5404 Elapsed 227.7s


Fold 0 Epoch 4 Step 1400 Loss 4.5266 Elapsed 236.1s


Fold 0 Epoch 4 Step 1450 Loss 4.5180 Elapsed 244.6s


Fold 0 Epoch 4 Step 1500 Loss 4.5124 Elapsed 253.0s


Fold 0 Epoch 4 Step 1550 Loss 4.5090 Elapsed 261.4s


Fold 0 Epoch 4 Step 1600 Loss 4.5022 Elapsed 269.8s


Fold 0 Epoch 4 Step 1650 Loss 4.4946 Elapsed 278.2s


Fold 0 Epoch 4 Step 1700 Loss 4.4884 Elapsed 286.7s


Fold 0 Epoch 4 Step 1750 Loss 4.4836 Elapsed 295.1s


Fold 0 Epoch 4 Step 1800 Loss 4.4755 Elapsed 303.5s


Fold 0 Epoch 4 Step 1850 Loss 4.4685 Elapsed 311.9s


Fold 0 Epoch 4 Step 1900 Loss 4.4591 Elapsed 320.4s


Fold 0 Epoch 4 Step 1950 Loss 4.4513 Elapsed 328.8s


Fold 0 Epoch 4 Step 2000 Loss 4.4440 Elapsed 337.2s


Fold 0 Epoch 4 Step 2050 Loss 4.4343 Elapsed 345.7s


Fold 0 Epoch 4 Step 2100 Loss 4.4235 Elapsed 354.1s


Fold 0 Epoch 4 Step 2150 Loss 4.4133 Elapsed 362.6s


Fold 0 Epoch 4 Step 2200 Loss 4.4027 Elapsed 371.0s


Fold 0 Epoch 4 Step 2250 Loss 4.3920 Elapsed 379.4s


Fold 0 Epoch 4 Step 2300 Loss 4.3803 Elapsed 387.9s


Fold 0 Epoch 4 Step 2350 Loss 4.3733 Elapsed 396.3s


Fold 0 Epoch 4 Step 2400 Loss 4.3666 Elapsed 404.8s


Fold 0 Epoch 4 Step 2450 Loss 4.3601 Elapsed 413.2s


Fold 0 Epoch 4 Step 2500 Loss 4.3557 Elapsed 421.7s


Fold 0 Epoch 4 Step 2550 Loss 4.3495 Elapsed 430.1s


Fold 0 Epoch 4 Step 2600 Loss 4.3433 Elapsed 438.5s


Fold 0 Epoch 4 Step 2650 Loss 4.3342 Elapsed 446.9s


Fold 0 Epoch 4 Step 2700 Loss 4.3289 Elapsed 455.4s


Fold 0 Epoch 4 Step 2750 Loss 4.3253 Elapsed 463.8s


Fold 0 Epoch 4 Step 2800 Loss 4.3209 Elapsed 472.2s


Fold 0 Epoch 4 Step 2850 Loss 4.3171 Elapsed 480.6s


Fold 0 Epoch 4 Step 2900 Loss 4.3139 Elapsed 489.1s


Fold 0 Epoch 4 Step 2950 Loss 4.3076 Elapsed 497.5s


Fold 0 Epoch 4 Step 3000 Loss 4.3041 Elapsed 505.9s


Fold 0 Epoch 4 Step 3050 Loss 4.3015 Elapsed 514.3s


Fold 0 Epoch 4 Step 3100 Loss 4.3001 Elapsed 522.8s


Fold 0 Epoch 4 Step 3150 Loss 4.2976 Elapsed 531.2s


Fold 0 Epoch 4 Step 3200 Loss 4.2939 Elapsed 539.6s


Fold 0 Epoch 4 Step 3250 Loss 4.2885 Elapsed 548.0s


Fold 0 Epoch 4 Step 3300 Loss 4.2837 Elapsed 556.5s


Fold 0 Epoch 4 Step 3350 Loss 4.2811 Elapsed 564.9s


Fold 0 Epoch 4 Step 3400 Loss 4.2782 Elapsed 573.3s


Fold 0 Epoch 4 Step 3450 Loss 4.2722 Elapsed 581.7s


Fold 0 Epoch 4 Step 3500 Loss 4.2676 Elapsed 590.1s


Fold 0 Epoch 4 Step 3550 Loss 4.2617 Elapsed 598.5s


Fold 0 Epoch 4 Step 3600 Loss 4.2577 Elapsed 606.9s


Fold 0 Epoch 4 Step 3650 Loss 4.2547 Elapsed 615.4s


Fold 0 Epoch 4 Step 3700 Loss 4.2518 Elapsed 623.8s


Fold 0 Epoch 4 Step 3750 Loss 4.2500 Elapsed 632.2s


Fold 0 Epoch 4 Step 3800 Loss 4.2463 Elapsed 640.6s


Fold 0 Epoch 4 Step 3850 Loss 4.2419 Elapsed 649.1s


Fold 0 Epoch 4 Step 3900 Loss 4.2378 Elapsed 657.5s


Fold 0 Epoch 4 Step 3950 Loss 4.2364 Elapsed 665.9s


Fold 0 Epoch 4 Step 4000 Loss 4.2351 Elapsed 674.4s


Fold 0 Epoch 4 Step 4050 Loss 4.2300 Elapsed 682.8s


Fold 0 Epoch 4 Step 4100 Loss 4.2268 Elapsed 691.2s


Fold 0 Epoch 4 Step 4150 Loss 4.2238 Elapsed 699.7s


Fold 0 Epoch 4 Step 4200 Loss 4.2213 Elapsed 708.1s


Fold 0 Epoch 4 Step 4250 Loss 4.2163 Elapsed 716.5s


Fold 0 Epoch 4 Step 4300 Loss 4.2123 Elapsed 725.0s


Fold 0 Epoch 4 Step 4350 Loss 4.2074 Elapsed 733.4s


Fold 0 Epoch 4 Step 4400 Loss 4.2013 Elapsed 741.8s


Fold 0 Epoch 4 Step 4450 Loss 4.1976 Elapsed 750.3s


Fold 0 Epoch 4 Step 4500 Loss 4.1947 Elapsed 758.7s


Fold 0 Epoch 4 Step 4550 Loss 4.1909 Elapsed 767.1s


Fold 0 Epoch 4 Step 4600 Loss 4.1880 Elapsed 775.6s


Fold 0 Epoch 4 Step 4650 Loss 4.1831 Elapsed 784.0s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)


Fold 0 Epoch 4 mini-F1 0.09585 | F1 0.03533 | sel 0.07770 time 842.5s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 5 Step 0 Loss 3.5854 Elapsed 0.7s


Fold 0 Epoch 5 Step 50 Loss 3.8799 Elapsed 9.0s


Fold 0 Epoch 5 Step 100 Loss 3.8344 Elapsed 17.4s


Fold 0 Epoch 5 Step 150 Loss 3.8555 Elapsed 25.8s


Fold 0 Epoch 5 Step 200 Loss 3.8387 Elapsed 34.2s


Fold 0 Epoch 5 Step 250 Loss 3.8132 Elapsed 42.6s


Fold 0 Epoch 5 Step 300 Loss 3.7717 Elapsed 51.0s


Fold 0 Epoch 5 Step 350 Loss 3.7310 Elapsed 59.4s


Fold 0 Epoch 5 Step 400 Loss 3.7032 Elapsed 67.8s


Fold 0 Epoch 5 Step 450 Loss 3.6796 Elapsed 76.2s


Fold 0 Epoch 5 Step 500 Loss 3.6727 Elapsed 84.6s


Fold 0 Epoch 5 Step 550 Loss 3.6748 Elapsed 93.0s


Fold 0 Epoch 5 Step 600 Loss 3.6894 Elapsed 101.4s


Fold 0 Epoch 5 Step 650 Loss 3.6990 Elapsed 109.8s


Fold 0 Epoch 5 Step 700 Loss 3.7101 Elapsed 118.3s


Fold 0 Epoch 5 Step 750 Loss 3.7196 Elapsed 126.7s


Fold 0 Epoch 5 Step 800 Loss 3.7303 Elapsed 135.1s


Fold 0 Epoch 5 Step 850 Loss 3.7352 Elapsed 143.5s


Fold 0 Epoch 5 Step 900 Loss 3.7270 Elapsed 151.9s


Fold 0 Epoch 5 Step 950 Loss 3.7260 Elapsed 160.3s


Fold 0 Epoch 5 Step 1000 Loss 3.7208 Elapsed 168.8s


Fold 0 Epoch 5 Step 1050 Loss 3.7166 Elapsed 177.2s


Fold 0 Epoch 5 Step 1100 Loss 3.7132 Elapsed 185.6s


Fold 0 Epoch 5 Step 1150 Loss 3.7165 Elapsed 194.0s


Fold 0 Epoch 5 Step 1200 Loss 3.7142 Elapsed 202.4s


Fold 0 Epoch 5 Step 1250 Loss 3.7040 Elapsed 210.8s


Fold 0 Epoch 5 Step 1300 Loss 3.6935 Elapsed 219.2s


Fold 0 Epoch 5 Step 1350 Loss 3.6862 Elapsed 227.6s


Fold 0 Epoch 5 Step 1400 Loss 3.6789 Elapsed 236.0s


Fold 0 Epoch 5 Step 1450 Loss 3.6658 Elapsed 244.4s


Fold 0 Epoch 5 Step 1500 Loss 3.6572 Elapsed 252.8s


Fold 0 Epoch 5 Step 1550 Loss 3.6480 Elapsed 261.2s


Fold 0 Epoch 5 Step 1600 Loss 3.6438 Elapsed 269.7s


Fold 0 Epoch 5 Step 1650 Loss 3.6404 Elapsed 278.1s


Fold 0 Epoch 5 Step 1700 Loss 3.6383 Elapsed 286.5s


Fold 0 Epoch 5 Step 1750 Loss 3.6351 Elapsed 294.9s


Fold 0 Epoch 5 Step 1800 Loss 3.6312 Elapsed 303.4s


Fold 0 Epoch 5 Step 1850 Loss 3.6290 Elapsed 311.8s


Fold 0 Epoch 5 Step 1900 Loss 3.6272 Elapsed 320.2s


Fold 0 Epoch 5 Step 1950 Loss 3.6256 Elapsed 328.7s


Fold 0 Epoch 5 Step 2000 Loss 3.6218 Elapsed 337.1s


Fold 0 Epoch 5 Step 2050 Loss 3.6200 Elapsed 345.5s


Fold 0 Epoch 5 Step 2100 Loss 3.6147 Elapsed 354.0s


Fold 0 Epoch 5 Step 2150 Loss 3.6054 Elapsed 362.4s


Fold 0 Epoch 5 Step 2200 Loss 3.5965 Elapsed 370.8s


Fold 0 Epoch 5 Step 2250 Loss 3.5883 Elapsed 379.3s


Fold 0 Epoch 5 Step 2300 Loss 3.5786 Elapsed 387.7s


Fold 0 Epoch 5 Step 2350 Loss 3.5687 Elapsed 396.1s


Fold 0 Epoch 5 Step 2400 Loss 3.5604 Elapsed 404.6s


Fold 0 Epoch 5 Step 2450 Loss 3.5541 Elapsed 413.0s


Fold 0 Epoch 5 Step 2500 Loss 3.5483 Elapsed 421.4s


Fold 0 Epoch 5 Step 2550 Loss 3.5458 Elapsed 429.8s


Fold 0 Epoch 5 Step 2600 Loss 3.5399 Elapsed 438.3s


Fold 0 Epoch 5 Step 2650 Loss 3.5354 Elapsed 446.7s


Fold 0 Epoch 5 Step 2700 Loss 3.5330 Elapsed 455.1s


Fold 0 Epoch 5 Step 2750 Loss 3.5322 Elapsed 463.5s


Fold 0 Epoch 5 Step 2800 Loss 3.5292 Elapsed 472.0s


Fold 0 Epoch 5 Step 2850 Loss 3.5269 Elapsed 480.4s


Fold 0 Epoch 5 Step 2900 Loss 3.5246 Elapsed 488.8s


Fold 0 Epoch 5 Step 2950 Loss 3.5231 Elapsed 497.2s


Fold 0 Epoch 5 Step 3000 Loss 3.5225 Elapsed 505.6s


Fold 0 Epoch 5 Step 3050 Loss 3.5205 Elapsed 514.1s


Fold 0 Epoch 5 Step 3100 Loss 3.5160 Elapsed 522.5s


Fold 0 Epoch 5 Step 3150 Loss 3.5111 Elapsed 530.9s


Fold 0 Epoch 5 Step 3200 Loss 3.5081 Elapsed 539.3s


Fold 0 Epoch 5 Step 3250 Loss 3.5054 Elapsed 547.8s


Fold 0 Epoch 5 Step 3300 Loss 3.5038 Elapsed 556.2s


Fold 0 Epoch 5 Step 3350 Loss 3.5013 Elapsed 564.6s


Fold 0 Epoch 5 Step 3400 Loss 3.4982 Elapsed 573.1s


Fold 0 Epoch 5 Step 3450 Loss 3.4956 Elapsed 581.5s


Fold 0 Epoch 5 Step 3500 Loss 3.4935 Elapsed 589.9s


Fold 0 Epoch 5 Step 3550 Loss 3.4936 Elapsed 598.3s


Fold 0 Epoch 5 Step 3600 Loss 3.4927 Elapsed 606.8s


Fold 0 Epoch 5 Step 3650 Loss 3.4895 Elapsed 615.2s


Fold 0 Epoch 5 Step 3700 Loss 3.4874 Elapsed 623.6s


Fold 0 Epoch 5 Step 3750 Loss 3.4876 Elapsed 632.0s


Fold 0 Epoch 5 Step 3800 Loss 3.4847 Elapsed 640.5s


Fold 0 Epoch 5 Step 3850 Loss 3.4835 Elapsed 648.9s


Fold 0 Epoch 5 Step 3900 Loss 3.4810 Elapsed 657.3s


Fold 0 Epoch 5 Step 3950 Loss 3.4782 Elapsed 665.7s


Fold 0 Epoch 5 Step 4000 Loss 3.4770 Elapsed 674.2s


Fold 0 Epoch 5 Step 4050 Loss 3.4744 Elapsed 682.6s


Fold 0 Epoch 5 Step 4100 Loss 3.4715 Elapsed 691.0s


Fold 0 Epoch 5 Step 4150 Loss 3.4664 Elapsed 699.4s


Fold 0 Epoch 5 Step 4200 Loss 3.4611 Elapsed 707.9s


Fold 0 Epoch 5 Step 4250 Loss 3.4586 Elapsed 716.3s


Fold 0 Epoch 5 Step 4300 Loss 3.4557 Elapsed 724.7s


Fold 0 Epoch 5 Step 4350 Loss 3.4526 Elapsed 733.1s


Fold 0 Epoch 5 Step 4400 Loss 3.4508 Elapsed 741.5s


Fold 0 Epoch 5 Step 4450 Loss 3.4500 Elapsed 749.9s


Fold 0 Epoch 5 Step 4500 Loss 3.4469 Elapsed 758.4s


Fold 0 Epoch 5 Step 4550 Loss 3.4440 Elapsed 766.8s


Fold 0 Epoch 5 Step 4600 Loss 3.4403 Elapsed 775.2s


Fold 0 Epoch 5 Step 4650 Loss 3.4366 Elapsed 783.6s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)


Fold 0 Epoch 5 mini-F1 0.14155 | F1 0.03933 | sel 0.11088 time 842.1s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 6 Step 0 Loss 3.1498 Elapsed 0.6s


Fold 0 Epoch 6 Step 50 Loss 3.1014 Elapsed 9.0s


Fold 0 Epoch 6 Step 100 Loss 3.0566 Elapsed 17.4s


Fold 0 Epoch 6 Step 150 Loss 2.9867 Elapsed 25.8s


Fold 0 Epoch 6 Step 200 Loss 2.9502 Elapsed 34.3s


Fold 0 Epoch 6 Step 250 Loss 2.9226 Elapsed 42.7s


Fold 0 Epoch 6 Step 300 Loss 2.9076 Elapsed 51.1s


Fold 0 Epoch 6 Step 350 Loss 2.8779 Elapsed 59.5s


Fold 0 Epoch 6 Step 400 Loss 2.8698 Elapsed 67.9s


Fold 0 Epoch 6 Step 450 Loss 2.8769 Elapsed 76.3s


Fold 0 Epoch 6 Step 500 Loss 2.8609 Elapsed 84.8s


Fold 0 Epoch 6 Step 550 Loss 2.8523 Elapsed 93.2s


Fold 0 Epoch 6 Step 600 Loss 2.8582 Elapsed 101.6s


Fold 0 Epoch 6 Step 650 Loss 2.8677 Elapsed 110.0s


Fold 0 Epoch 6 Step 700 Loss 2.8713 Elapsed 118.4s


Fold 0 Epoch 6 Step 750 Loss 2.8684 Elapsed 126.8s


Fold 0 Epoch 6 Step 800 Loss 2.8634 Elapsed 135.2s


Fold 0 Epoch 6 Step 850 Loss 2.8616 Elapsed 143.6s


Fold 0 Epoch 6 Step 900 Loss 2.8570 Elapsed 152.0s


Fold 0 Epoch 6 Step 950 Loss 2.8580 Elapsed 160.4s


Fold 0 Epoch 6 Step 1000 Loss 2.8558 Elapsed 168.8s


Fold 0 Epoch 6 Step 1050 Loss 2.8515 Elapsed 177.2s


Fold 0 Epoch 6 Step 1100 Loss 2.8487 Elapsed 185.6s


Fold 0 Epoch 6 Step 1150 Loss 2.8428 Elapsed 194.0s


Fold 0 Epoch 6 Step 1200 Loss 2.8376 Elapsed 202.4s


Fold 0 Epoch 6 Step 1250 Loss 2.8374 Elapsed 210.8s


Fold 0 Epoch 6 Step 1300 Loss 2.8335 Elapsed 219.2s


Fold 0 Epoch 6 Step 1350 Loss 2.8288 Elapsed 227.6s


Fold 0 Epoch 6 Step 1400 Loss 2.8241 Elapsed 236.1s


Fold 0 Epoch 6 Step 1450 Loss 2.8192 Elapsed 244.5s


Fold 0 Epoch 6 Step 1500 Loss 2.8124 Elapsed 252.9s


Fold 0 Epoch 6 Step 1550 Loss 2.8038 Elapsed 261.3s


Fold 0 Epoch 6 Step 1600 Loss 2.7956 Elapsed 269.7s


Fold 0 Epoch 6 Step 1650 Loss 2.7872 Elapsed 278.1s


Fold 0 Epoch 6 Step 1700 Loss 2.7811 Elapsed 286.5s


Fold 0 Epoch 6 Step 1750 Loss 2.7820 Elapsed 294.9s


Fold 0 Epoch 6 Step 1800 Loss 2.7780 Elapsed 303.3s


Fold 0 Epoch 6 Step 1850 Loss 2.7745 Elapsed 311.7s


Fold 0 Epoch 6 Step 1900 Loss 2.7714 Elapsed 320.2s


Fold 0 Epoch 6 Step 1950 Loss 2.7656 Elapsed 328.6s


Fold 0 Epoch 6 Step 2000 Loss 2.7645 Elapsed 337.0s


Fold 0 Epoch 6 Step 2050 Loss 2.7644 Elapsed 345.4s


Fold 0 Epoch 6 Step 2100 Loss 2.7613 Elapsed 353.8s


Fold 0 Epoch 6 Step 2150 Loss 2.7556 Elapsed 362.2s


Fold 0 Epoch 6 Step 2200 Loss 2.7509 Elapsed 370.7s


Fold 0 Epoch 6 Step 2250 Loss 2.7451 Elapsed 379.1s


Fold 0 Epoch 6 Step 2300 Loss 2.7404 Elapsed 387.5s


Fold 0 Epoch 6 Step 2350 Loss 2.7403 Elapsed 395.9s


Fold 0 Epoch 6 Step 2400 Loss 2.7386 Elapsed 404.4s


Fold 0 Epoch 6 Step 2450 Loss 2.7387 Elapsed 412.8s


Fold 0 Epoch 6 Step 2500 Loss 2.7351 Elapsed 421.2s


Fold 0 Epoch 6 Step 2550 Loss 2.7305 Elapsed 429.7s


Fold 0 Epoch 6 Step 2600 Loss 2.7270 Elapsed 438.1s


Fold 0 Epoch 6 Step 2650 Loss 2.7250 Elapsed 446.5s


Fold 0 Epoch 6 Step 2700 Loss 2.7244 Elapsed 455.0s


Fold 0 Epoch 6 Step 2750 Loss 2.7220 Elapsed 463.4s


Fold 0 Epoch 6 Step 2800 Loss 2.7192 Elapsed 471.9s


Fold 0 Epoch 6 Step 2850 Loss 2.7162 Elapsed 480.3s


Fold 0 Epoch 6 Step 2900 Loss 2.7131 Elapsed 488.7s


Fold 0 Epoch 6 Step 2950 Loss 2.7126 Elapsed 497.1s


Fold 0 Epoch 6 Step 3000 Loss 2.7112 Elapsed 505.5s


Fold 0 Epoch 6 Step 3050 Loss 2.7076 Elapsed 514.0s


Fold 0 Epoch 6 Step 3100 Loss 2.7036 Elapsed 522.4s


Fold 0 Epoch 6 Step 3150 Loss 2.7008 Elapsed 530.8s


Fold 0 Epoch 6 Step 3200 Loss 2.6993 Elapsed 539.3s


Fold 0 Epoch 6 Step 3250 Loss 2.6963 Elapsed 547.7s


Fold 0 Epoch 6 Step 3300 Loss 2.6943 Elapsed 556.1s


Fold 0 Epoch 6 Step 3350 Loss 2.6927 Elapsed 564.5s


Fold 0 Epoch 6 Step 3400 Loss 2.6891 Elapsed 573.0s


Fold 0 Epoch 6 Step 3450 Loss 2.6860 Elapsed 581.4s


Fold 0 Epoch 6 Step 3500 Loss 2.6813 Elapsed 589.8s


Fold 0 Epoch 6 Step 3550 Loss 2.6794 Elapsed 598.3s


Fold 0 Epoch 6 Step 3600 Loss 2.6763 Elapsed 606.7s


Fold 0 Epoch 6 Step 3650 Loss 2.6740 Elapsed 615.1s


Fold 0 Epoch 6 Step 3700 Loss 2.6720 Elapsed 623.5s


Fold 0 Epoch 6 Step 3750 Loss 2.6699 Elapsed 631.9s


Fold 0 Epoch 6 Step 3800 Loss 2.6698 Elapsed 640.3s


Fold 0 Epoch 6 Step 3850 Loss 2.6668 Elapsed 648.7s


Fold 0 Epoch 6 Step 3900 Loss 2.6656 Elapsed 657.1s


Fold 0 Epoch 6 Step 3950 Loss 2.6629 Elapsed 665.5s


Fold 0 Epoch 6 Step 4000 Loss 2.6605 Elapsed 673.9s


Fold 0 Epoch 6 Step 4050 Loss 2.6571 Elapsed 682.3s


Fold 0 Epoch 6 Step 4100 Loss 2.6529 Elapsed 690.7s


Fold 0 Epoch 6 Step 4150 Loss 2.6479 Elapsed 699.1s


Fold 0 Epoch 6 Step 4200 Loss 2.6433 Elapsed 707.5s


Fold 0 Epoch 6 Step 4250 Loss 2.6398 Elapsed 715.9s


Fold 0 Epoch 6 Step 4300 Loss 2.6372 Elapsed 724.3s


Fold 0 Epoch 6 Step 4350 Loss 2.6346 Elapsed 732.7s


Fold 0 Epoch 6 Step 4400 Loss 2.6304 Elapsed 741.2s


Fold 0 Epoch 6 Step 4450 Loss 2.6274 Elapsed 749.6s


Fold 0 Epoch 6 Step 4500 Loss 2.6259 Elapsed 758.0s


Fold 0 Epoch 6 Step 4550 Loss 2.6241 Elapsed 766.4s


Fold 0 Epoch 6 Step 4600 Loss 2.6213 Elapsed 774.8s


Fold 0 Epoch 6 Step 4650 Loss 2.6184 Elapsed 783.2s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)


Fold 0 Epoch 6 mini-F1 0.18055 | F1 0.04283 | sel 0.13923 time 841.7s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 7 Step 0 Loss 1.9333 Elapsed 0.7s


Fold 0 Epoch 7 Step 50 Loss 2.2198 Elapsed 9.1s


Fold 0 Epoch 7 Step 100 Loss 2.2375 Elapsed 17.5s


Fold 0 Epoch 7 Step 150 Loss 2.2152 Elapsed 25.9s


Fold 0 Epoch 7 Step 200 Loss 2.2062 Elapsed 34.3s


Fold 0 Epoch 7 Step 250 Loss 2.1821 Elapsed 42.7s


Fold 0 Epoch 7 Step 300 Loss 2.1654 Elapsed 51.1s


Fold 0 Epoch 7 Step 350 Loss 2.1454 Elapsed 59.5s


Fold 0 Epoch 7 Step 400 Loss 2.1409 Elapsed 67.9s


Fold 0 Epoch 7 Step 450 Loss 2.1298 Elapsed 76.3s


Fold 0 Epoch 7 Step 500 Loss 2.1235 Elapsed 84.8s


Fold 0 Epoch 7 Step 550 Loss 2.1199 Elapsed 93.1s


Fold 0 Epoch 7 Step 600 Loss 2.1109 Elapsed 101.6s


Fold 0 Epoch 7 Step 650 Loss 2.1058 Elapsed 110.0s


Fold 0 Epoch 7 Step 700 Loss 2.0983 Elapsed 118.4s


Fold 0 Epoch 7 Step 750 Loss 2.0943 Elapsed 126.8s


Fold 0 Epoch 7 Step 800 Loss 2.0848 Elapsed 135.2s


Fold 0 Epoch 7 Step 850 Loss 2.0795 Elapsed 143.6s


Fold 0 Epoch 7 Step 900 Loss 2.0752 Elapsed 152.0s


Fold 0 Epoch 7 Step 950 Loss 2.0663 Elapsed 160.4s


Fold 0 Epoch 7 Step 1000 Loss 2.0539 Elapsed 168.8s


Fold 0 Epoch 7 Step 1050 Loss 2.0478 Elapsed 177.2s


Fold 0 Epoch 7 Step 1100 Loss 2.0426 Elapsed 185.6s


Fold 0 Epoch 7 Step 1150 Loss 2.0367 Elapsed 194.0s


Fold 0 Epoch 7 Step 1200 Loss 2.0349 Elapsed 202.4s


Fold 0 Epoch 7 Step 1250 Loss 2.0257 Elapsed 210.8s


Fold 0 Epoch 7 Step 1300 Loss 2.0185 Elapsed 219.3s


Fold 0 Epoch 7 Step 1350 Loss 2.0145 Elapsed 227.7s


Fold 0 Epoch 7 Step 1400 Loss 2.0075 Elapsed 236.1s


Fold 0 Epoch 7 Step 1450 Loss 2.0009 Elapsed 244.5s


Fold 0 Epoch 7 Step 1500 Loss 1.9945 Elapsed 253.0s


Fold 0 Epoch 7 Step 1550 Loss 1.9880 Elapsed 261.4s


Fold 0 Epoch 7 Step 1600 Loss 1.9849 Elapsed 269.8s


Fold 0 Epoch 7 Step 1650 Loss 1.9822 Elapsed 278.2s


Fold 0 Epoch 7 Step 1700 Loss 1.9748 Elapsed 286.6s


Fold 0 Epoch 7 Step 1750 Loss 1.9746 Elapsed 295.1s


Fold 0 Epoch 7 Step 1800 Loss 1.9742 Elapsed 303.5s


Fold 0 Epoch 7 Step 1850 Loss 1.9697 Elapsed 311.9s


Fold 0 Epoch 7 Step 1900 Loss 1.9653 Elapsed 320.3s


Fold 0 Epoch 7 Step 1950 Loss 1.9599 Elapsed 328.7s


Fold 0 Epoch 7 Step 2000 Loss 1.9564 Elapsed 337.2s


Fold 0 Epoch 7 Step 2050 Loss 1.9537 Elapsed 345.6s


Fold 0 Epoch 7 Step 2100 Loss 1.9501 Elapsed 354.0s


Fold 0 Epoch 7 Step 2150 Loss 1.9489 Elapsed 362.4s


Fold 0 Epoch 7 Step 2200 Loss 1.9442 Elapsed 370.8s


Fold 0 Epoch 7 Step 2250 Loss 1.9419 Elapsed 379.2s


Fold 0 Epoch 7 Step 2300 Loss 1.9396 Elapsed 387.7s


Fold 0 Epoch 7 Step 2350 Loss 1.9391 Elapsed 396.1s


Fold 0 Epoch 7 Step 2400 Loss 1.9362 Elapsed 404.5s


Fold 0 Epoch 7 Step 2450 Loss 1.9345 Elapsed 412.9s


Fold 0 Epoch 7 Step 2500 Loss 1.9324 Elapsed 421.3s


Fold 0 Epoch 7 Step 2550 Loss 1.9284 Elapsed 429.8s


Fold 0 Epoch 7 Step 2600 Loss 1.9248 Elapsed 438.2s


Fold 0 Epoch 7 Step 2650 Loss 1.9229 Elapsed 446.6s


Fold 0 Epoch 7 Step 2700 Loss 1.9208 Elapsed 455.0s


Fold 0 Epoch 7 Step 2750 Loss 1.9177 Elapsed 463.5s


Fold 0 Epoch 7 Step 2800 Loss 1.9167 Elapsed 471.9s


Fold 0 Epoch 7 Step 2850 Loss 1.9133 Elapsed 480.3s


Fold 0 Epoch 7 Step 2900 Loss 1.9096 Elapsed 488.8s


Fold 0 Epoch 7 Step 2950 Loss 1.9082 Elapsed 497.2s


Fold 0 Epoch 7 Step 3000 Loss 1.9046 Elapsed 505.6s


Fold 0 Epoch 7 Step 3050 Loss 1.9016 Elapsed 514.1s


Fold 0 Epoch 7 Step 3100 Loss 1.8996 Elapsed 522.5s


Fold 0 Epoch 7 Step 3150 Loss 1.8979 Elapsed 530.9s


Fold 0 Epoch 7 Step 3200 Loss 1.8947 Elapsed 539.4s


Fold 0 Epoch 7 Step 3250 Loss 1.8931 Elapsed 547.8s


Fold 0 Epoch 7 Step 3300 Loss 1.8923 Elapsed 556.2s


Fold 0 Epoch 7 Step 3350 Loss 1.8883 Elapsed 564.6s


Fold 0 Epoch 7 Step 3400 Loss 1.8849 Elapsed 573.1s


Fold 0 Epoch 7 Step 3450 Loss 1.8819 Elapsed 581.5s


Fold 0 Epoch 7 Step 3500 Loss 1.8789 Elapsed 589.9s


Fold 0 Epoch 7 Step 3550 Loss 1.8782 Elapsed 598.3s


Fold 0 Epoch 7 Step 3600 Loss 1.8761 Elapsed 606.7s


Fold 0 Epoch 7 Step 3650 Loss 1.8746 Elapsed 615.1s


Fold 0 Epoch 7 Step 3700 Loss 1.8709 Elapsed 623.5s


Fold 0 Epoch 7 Step 3750 Loss 1.8681 Elapsed 631.9s


Fold 0 Epoch 7 Step 3800 Loss 1.8648 Elapsed 640.3s


Fold 0 Epoch 7 Step 3850 Loss 1.8627 Elapsed 648.7s


Fold 0 Epoch 7 Step 3900 Loss 1.8611 Elapsed 657.1s


Fold 0 Epoch 7 Step 3950 Loss 1.8586 Elapsed 665.5s


Fold 0 Epoch 7 Step 4000 Loss 1.8561 Elapsed 673.9s


Fold 0 Epoch 7 Step 4050 Loss 1.8534 Elapsed 682.3s


Fold 0 Epoch 7 Step 4100 Loss 1.8506 Elapsed 690.7s


Fold 0 Epoch 7 Step 4150 Loss 1.8499 Elapsed 699.1s


Fold 0 Epoch 7 Step 4200 Loss 1.8477 Elapsed 707.5s


Fold 0 Epoch 7 Step 4250 Loss 1.8468 Elapsed 715.9s


Fold 0 Epoch 7 Step 4300 Loss 1.8450 Elapsed 724.3s


Fold 0 Epoch 7 Step 4350 Loss 1.8430 Elapsed 732.7s


Fold 0 Epoch 7 Step 4400 Loss 1.8406 Elapsed 741.2s


Fold 0 Epoch 7 Step 4450 Loss 1.8392 Elapsed 749.6s


Fold 0 Epoch 7 Step 4500 Loss 1.8372 Elapsed 758.0s


Fold 0 Epoch 7 Step 4550 Loss 1.8355 Elapsed 766.4s


Fold 0 Epoch 7 Step 4600 Loss 1.8337 Elapsed 774.8s


Fold 0 Epoch 7 Step 4650 Loss 1.8323 Elapsed 783.2s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)


Fold 0 Epoch 7 mini-F1 0.21361 | F1 0.04713 | sel 0.16367 time 841.6s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 8 Step 0 Loss 1.3532 Elapsed 0.6s


Fold 0 Epoch 8 Step 50 Loss 1.5759 Elapsed 9.0s


Fold 0 Epoch 8 Step 100 Loss 1.5673 Elapsed 17.3s


Fold 0 Epoch 8 Step 150 Loss 1.5584 Elapsed 25.7s


Fold 0 Epoch 8 Step 200 Loss 1.5500 Elapsed 34.0s


Fold 0 Epoch 8 Step 250 Loss 1.5319 Elapsed 42.4s


Fold 0 Epoch 8 Step 300 Loss 1.5097 Elapsed 50.8s


Fold 0 Epoch 8 Step 350 Loss 1.4916 Elapsed 59.2s


Fold 0 Epoch 8 Step 400 Loss 1.4713 Elapsed 67.6s


Fold 0 Epoch 8 Step 450 Loss 1.4644 Elapsed 76.0s


Fold 0 Epoch 8 Step 500 Loss 1.4572 Elapsed 84.5s


Fold 0 Epoch 8 Step 550 Loss 1.4529 Elapsed 92.9s


Fold 0 Epoch 8 Step 600 Loss 1.4499 Elapsed 101.3s


Fold 0 Epoch 8 Step 650 Loss 1.4421 Elapsed 109.8s


Fold 0 Epoch 8 Step 700 Loss 1.4332 Elapsed 118.2s


Fold 0 Epoch 8 Step 750 Loss 1.4265 Elapsed 126.6s


Fold 0 Epoch 8 Step 800 Loss 1.4210 Elapsed 135.1s


Fold 0 Epoch 8 Step 850 Loss 1.4162 Elapsed 143.5s


Fold 0 Epoch 8 Step 900 Loss 1.4150 Elapsed 151.9s


Fold 0 Epoch 8 Step 950 Loss 1.4120 Elapsed 160.3s


Fold 0 Epoch 8 Step 1000 Loss 1.4107 Elapsed 168.8s


Fold 0 Epoch 8 Step 1050 Loss 1.4016 Elapsed 177.2s


Fold 0 Epoch 8 Step 1100 Loss 1.3964 Elapsed 185.6s


Fold 0 Epoch 8 Step 1150 Loss 1.3911 Elapsed 194.1s


Fold 0 Epoch 8 Step 1200 Loss 1.3846 Elapsed 202.5s


Fold 0 Epoch 8 Step 1250 Loss 1.3800 Elapsed 210.9s


Fold 0 Epoch 8 Step 1300 Loss 1.3750 Elapsed 219.3s


Fold 0 Epoch 8 Step 1350 Loss 1.3688 Elapsed 227.7s


Fold 0 Epoch 8 Step 1400 Loss 1.3650 Elapsed 236.2s


Fold 0 Epoch 8 Step 1450 Loss 1.3606 Elapsed 244.6s


Fold 0 Epoch 8 Step 1500 Loss 1.3572 Elapsed 253.0s


Fold 0 Epoch 8 Step 1550 Loss 1.3530 Elapsed 261.4s


Fold 0 Epoch 8 Step 1600 Loss 1.3493 Elapsed 269.9s


Fold 0 Epoch 8 Step 1650 Loss 1.3476 Elapsed 278.3s


Fold 0 Epoch 8 Step 1700 Loss 1.3444 Elapsed 286.7s


Fold 0 Epoch 8 Step 1750 Loss 1.3425 Elapsed 295.1s


Fold 0 Epoch 8 Step 1800 Loss 1.3397 Elapsed 303.5s


Fold 0 Epoch 8 Step 1850 Loss 1.3351 Elapsed 311.9s


Fold 0 Epoch 8 Step 1900 Loss 1.3317 Elapsed 320.4s


Fold 0 Epoch 8 Step 1950 Loss 1.3282 Elapsed 328.8s


Fold 0 Epoch 8 Step 2000 Loss 1.3253 Elapsed 337.2s


Fold 0 Epoch 8 Step 2050 Loss 1.3224 Elapsed 345.6s


Fold 0 Epoch 8 Step 2100 Loss 1.3169 Elapsed 354.0s


Fold 0 Epoch 8 Step 2150 Loss 1.3131 Elapsed 362.4s


Fold 0 Epoch 8 Step 2200 Loss 1.3094 Elapsed 370.8s


Fold 0 Epoch 8 Step 2250 Loss 1.3070 Elapsed 379.3s


Fold 0 Epoch 8 Step 2300 Loss 1.3056 Elapsed 387.7s


Fold 0 Epoch 8 Step 2350 Loss 1.3027 Elapsed 396.1s


Fold 0 Epoch 8 Step 2400 Loss 1.2999 Elapsed 404.5s


Fold 0 Epoch 8 Step 2450 Loss 1.2971 Elapsed 412.9s


Fold 0 Epoch 8 Step 2500 Loss 1.2944 Elapsed 421.3s


Fold 0 Epoch 8 Step 2550 Loss 1.2923 Elapsed 429.7s


Fold 0 Epoch 8 Step 2600 Loss 1.2897 Elapsed 438.2s


Fold 0 Epoch 8 Step 2650 Loss 1.2860 Elapsed 446.6s


Fold 0 Epoch 8 Step 2700 Loss 1.2836 Elapsed 455.0s


Fold 0 Epoch 8 Step 2750 Loss 1.2816 Elapsed 463.4s


Fold 0 Epoch 8 Step 2800 Loss 1.2805 Elapsed 471.8s


Fold 0 Epoch 8 Step 2850 Loss 1.2777 Elapsed 480.2s


Fold 0 Epoch 8 Step 2900 Loss 1.2759 Elapsed 488.7s


Fold 0 Epoch 8 Step 2950 Loss 1.2736 Elapsed 497.1s


Fold 0 Epoch 8 Step 3000 Loss 1.2713 Elapsed 505.5s


Fold 0 Epoch 8 Step 3050 Loss 1.2700 Elapsed 514.0s


Fold 0 Epoch 8 Step 3100 Loss 1.2677 Elapsed 522.4s


Fold 0 Epoch 8 Step 3150 Loss 1.2651 Elapsed 530.8s


Fold 0 Epoch 8 Step 3200 Loss 1.2634 Elapsed 539.3s


Fold 0 Epoch 8 Step 3250 Loss 1.2616 Elapsed 547.7s


Fold 0 Epoch 8 Step 3300 Loss 1.2594 Elapsed 556.1s


Fold 0 Epoch 8 Step 3350 Loss 1.2572 Elapsed 564.5s


Fold 0 Epoch 8 Step 3400 Loss 1.2545 Elapsed 572.9s


Fold 0 Epoch 8 Step 3450 Loss 1.2522 Elapsed 581.3s


Fold 0 Epoch 8 Step 3500 Loss 1.2496 Elapsed 589.8s


Fold 0 Epoch 8 Step 3550 Loss 1.2472 Elapsed 598.2s


Fold 0 Epoch 8 Step 3600 Loss 1.2456 Elapsed 606.6s


Fold 0 Epoch 8 Step 3650 Loss 1.2432 Elapsed 615.0s


Fold 0 Epoch 8 Step 3700 Loss 1.2419 Elapsed 623.4s


Fold 0 Epoch 8 Step 3750 Loss 1.2408 Elapsed 631.8s


Fold 0 Epoch 8 Step 3800 Loss 1.2387 Elapsed 640.2s


Fold 0 Epoch 8 Step 3850 Loss 1.2373 Elapsed 648.6s


Fold 0 Epoch 8 Step 3900 Loss 1.2354 Elapsed 657.0s


Fold 0 Epoch 8 Step 3950 Loss 1.2342 Elapsed 665.4s


Fold 0 Epoch 8 Step 4000 Loss 1.2328 Elapsed 673.8s


Fold 0 Epoch 8 Step 4050 Loss 1.2302 Elapsed 682.2s


Fold 0 Epoch 8 Step 4100 Loss 1.2282 Elapsed 690.6s


Fold 0 Epoch 8 Step 4150 Loss 1.2261 Elapsed 699.0s


Fold 0 Epoch 8 Step 4200 Loss 1.2245 Elapsed 707.4s


Fold 0 Epoch 8 Step 4250 Loss 1.2234 Elapsed 715.8s


Fold 0 Epoch 8 Step 4300 Loss 1.2214 Elapsed 724.2s


Fold 0 Epoch 8 Step 4350 Loss 1.2198 Elapsed 732.6s


Fold 0 Epoch 8 Step 4400 Loss 1.2175 Elapsed 741.0s


Fold 0 Epoch 8 Step 4450 Loss 1.2160 Elapsed 749.4s


Fold 0 Epoch 8 Step 4500 Loss 1.2141 Elapsed 757.8s


Fold 0 Epoch 8 Step 4550 Loss 1.2125 Elapsed 766.2s


Fold 0 Epoch 8 Step 4600 Loss 1.2104 Elapsed 774.6s


Fold 0 Epoch 8 Step 4650 Loss 1.2091 Elapsed 783.0s


Evaluating on mini-val (1/img per class, cap 10k)


Evaluating on capped validation: 500 batches


  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)


Fold 0 Epoch 8 mini-F1 0.23056 | F1 0.05117 | sel 0.17674 time 841.4s


Saved best to ckpt_fold0.pt


Fold 0 Epoch 9 Step 0 Loss 3.0708 Elapsed 0.7s


Fold 0 Epoch 9 Step 50 Loss 3.1725 Elapsed 9.0s


Fold 0 Epoch 9 Step 100 Loss 3.1310 Elapsed 17.3s


Fold 0 Epoch 9 Step 150 Loss 3.0625 Elapsed 25.6s


Fold 0 Epoch 9 Step 200 Loss 3.0202 Elapsed 34.0s


Fold 0 Epoch 9 Step 250 Loss 2.9915 Elapsed 42.4s


Fold 0 Epoch 9 Step 300 Loss 2.9614 Elapsed 50.8s


Fold 0 Epoch 9 Step 350 Loss 2.9404 Elapsed 59.1s


Fold 0 Epoch 9 Step 400 Loss 2.9224 Elapsed 67.6s


Fold 0 Epoch 9 Step 450 Loss 2.9112 Elapsed 76.0s


Fold 0 Epoch 9 Step 500 Loss 2.8947 Elapsed 84.4s


Fold 0 Epoch 9 Step 550 Loss 2.8834 Elapsed 92.8s


Fold 0 Epoch 9 Step 600 Loss 2.8752 Elapsed 101.2s


Fold 0 Epoch 9 Step 650 Loss 2.8653 Elapsed 109.7s


Fold 0 Epoch 9 Step 700 Loss 2.8579 Elapsed 118.1s


Fold 0 Epoch 9 Step 750 Loss 2.8495 Elapsed 126.5s


Fold 0 Epoch 9 Step 800 Loss 2.8426 Elapsed 134.9s


Fold 0 Epoch 9 Step 850 Loss 2.8365 Elapsed 143.4s


Fold 0 Epoch 9 Step 900 Loss 2.8326 Elapsed 151.8s


Fold 0 Epoch 9 Step 950 Loss 2.8266 Elapsed 160.2s


Fold 0 Epoch 9 Step 1000 Loss 2.8215 Elapsed 168.7s


Fold 0 Epoch 9 Step 1050 Loss 2.8208 Elapsed 177.1s


Fold 0 Epoch 9 Step 1100 Loss 2.8138 Elapsed 185.5s


Fold 0 Epoch 9 Step 1150 Loss 2.8103 Elapsed 193.9s


Fold 0 Epoch 9 Step 1200 Loss 2.8051 Elapsed 202.4s


Fold 0 Epoch 9 Step 1250 Loss 2.8012 Elapsed 210.8s


Fold 0 Epoch 9 Step 1300 Loss 2.7979 Elapsed 219.2s


Fold 0 Epoch 9 Step 1350 Loss 2.7940 Elapsed 227.6s


Fold 0 Epoch 9 Step 1400 Loss 2.7914 Elapsed 236.1s


Fold 0 Epoch 9 Step 1450 Loss 2.7894 Elapsed 244.5s


Fold 0 Epoch 9 Step 1500 Loss 2.7869 Elapsed 252.9s


Fold 0 Epoch 9 Step 1550 Loss 2.7843 Elapsed 261.3s


Fold 0 Epoch 9 Step 1600 Loss 2.7817 Elapsed 269.7s


Fold 0 Epoch 9 Step 1650 Loss 2.7808 Elapsed 278.1s


Fold 0 Epoch 9 Step 1700 Loss 2.7782 Elapsed 286.5s


Fold 0 Epoch 9 Step 1750 Loss 2.7753 Elapsed 294.9s


Fold 0 Epoch 9 Step 1800 Loss 2.7731 Elapsed 303.3s


Fold 0 Epoch 9 Step 1850 Loss 2.7695 Elapsed 311.7s


Fold 0 Epoch 9 Step 1900 Loss 2.7676 Elapsed 320.1s


Fold 0 Epoch 9 Step 1950 Loss 2.7655 Elapsed 328.5s


Fold 0 Epoch 9 Step 2000 Loss 2.7640 Elapsed 336.9s


Fold 0 Epoch 9 Step 2050 Loss 2.7620 Elapsed 345.2s


Fold 0 Epoch 9 Step 2100 Loss 2.7608 Elapsed 353.6s


Fold 0 Epoch 9 Step 2150 Loss 2.7586 Elapsed 362.0s


Fold 0 Epoch 9 Step 2200 Loss 2.7569 Elapsed 370.4s


Fold 0 Epoch 9 Step 2250 Loss 2.7552 Elapsed 378.8s


Fold 0 Epoch 9 Step 2300 Loss 2.7537 Elapsed 387.2s


Fold 0 Epoch 9 Step 2350 Loss 2.7521 Elapsed 395.6s


Fold 0 Epoch 9 Step 2400 Loss 2.7510 Elapsed 404.0s


Fold 0 Epoch 9 Step 2450 Loss 2.7483 Elapsed 412.4s


Fold 0 Epoch 9 Step 2500 Loss 2.7460 Elapsed 420.8s


Fold 0 Epoch 9 Step 2550 Loss 2.7443 Elapsed 429.2s


Fold 0 Epoch 9 Step 2600 Loss 2.7429 Elapsed 437.6s


Fold 0 Epoch 9 Step 2650 Loss 2.7409 Elapsed 446.0s


Fold 0 Epoch 9 Step 2700 Loss 2.7392 Elapsed 454.4s


Fold 0 Epoch 9 Step 2750 Loss 2.7379 Elapsed 462.8s


Fold 0 Epoch 9 Step 2800 Loss 2.7377 Elapsed 471.2s


Fold 0 Epoch 9 Step 2850 Loss 2.7364 Elapsed 479.6s


Fold 0 Epoch 9 Step 2900 Loss 2.7346 Elapsed 488.0s


Fold 0 Epoch 9 Step 2950 Loss 2.7330 Elapsed 496.4s


Fold 0 Epoch 9 Step 3000 Loss 2.7314 Elapsed 504.8s


Fold 0 Epoch 9 Step 3050 Loss 2.7295 Elapsed 513.2s


Fold 0 Epoch 9 Step 3100 Loss 2.7272 Elapsed 521.6s


Fold 0 Epoch 9 Step 3150 Loss 2.7259 Elapsed 530.0s


Fold 0 Epoch 9 Step 3200 Loss 2.7243 Elapsed 538.4s


Fold 0 Epoch 9 Step 3250 Loss 2.7235 Elapsed 546.8s


Fold 0 Epoch 9 Step 3300 Loss 2.7221 Elapsed 555.2s


In [7]:
# Sanity check: try opening a sample of image paths to confirm I/O works
from PIL import Image
import numpy as np
import random, time
from pathlib import Path

def check_opens(df, n=500, split='train'):
    samp = df.sample(min(n, len(df)), random_state=42).reset_index(drop=True)
    ok = 0; fail = 0; failed_paths = []
    t0 = time.time()
    for i in range(len(samp)):
        fp = samp.iloc[i]['file_path']
        try:
            with Image.open(fp) as im:
                im.verify()  # lightweight check
            ok += 1
        except Exception as e:
            fail += 1
            if len(failed_paths) < 10:
                failed_paths.append((str(fp), str(e)))
    print(f"[{split}] Tried {len(samp)} -> OK {ok}, FAIL {fail}, time {time.time()-t0:.1f}s", flush=True)
    if failed_paths:
        print('Sample failures:')
        for p,e in failed_paths:
            print(' -', p, '|', e)

print('=== Image open sanity checks ===', flush=True)
check_opens(train_df, n=500, split='train')
check_opens(te_imgs, n=200, split='test')

=== Image open sanity checks ===


[train] Tried 500 -> OK 500, FAIL 0, time 0.1s


[test] Tried 200 -> OK 200, FAIL 0, time 0.0s


In [17]:
# Inference from saved best checkpoint for fold 0 and write submission
import pandas as pd
print('Running inference with ckpt_fold0.pt ...', flush=True)
preds = infer_fold(0, batch_size=128)
sub_ids = pd.read_csv(SAMPLE_SUB)['Id'].values
pred_cat = [lbl2cat[int(x)] for x in preds]
sub = pd.DataFrame({'Id': sub_ids, 'Predicted': pred_cat})
sub.to_csv('submission.csv', index=False)
print('submission.csv written with shape', sub.shape, flush=True)

Running inference with ckpt_fold0.pt ...


  state = torch.load(ckpt_path, map_location='cpu')


KeyboardInterrupt: 

In [19]:
# Retrieval baseline (DINO/ViT features + FAISS flat IP). Prep while classifier trains.
import sys, subprocess, math, time, faiss, torch, numpy as np, pandas as pd
from pathlib import Path
import torchvision.transforms as T
from PIL import Image
import timm
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

RET_IMG_SIZE = 256
ret_tfms = T.Compose([
    T.Resize(RET_IMG_SIZE),
    T.CenterCrop(RET_IMG_SIZE),
    T.ToTensor(),
    T.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),
])

class ImageTensorDS(Dataset):
    def __init__(self, df, tfms):
        self.df = df.reset_index(drop=True); self.tfms = tfms
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        fp = self.df.iloc[i]['file_path']
        try:
            img = Image.open(fp).convert('RGB')
        except Exception:
            img = Image.fromarray(np.zeros((RET_IMG_SIZE, RET_IMG_SIZE,3), np.uint8))
        return self.tfms(img), i

def build_feat_model():
    # Prefer fast/compact backbone first to reduce download/compute
    names = [
        'vit_base_patch16_224.dino',
        'convnext_base.fb_in22k',
    ]
    last_err = None
    for name in names:
        try:
            m = timm.create_model(name, pretrained=True, num_classes=0)  # features only
            m.to(device); m.eval(); m = m.to(memory_format=torch.channels_last)
            dummy = torch.zeros(1,3,RET_IMG_SIZE,RET_IMG_SIZE, device=device).to(memory_format=torch.channels_last)
            with torch.no_grad(), torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
                out = m(dummy)
            d = out.shape[-1] if out.ndim==2 else out.flatten(1).shape[-1]
            return m, d, name
        except Exception as e:
            last_err = e
            continue
    raise RuntimeError(f'No backbone available: {last_err}')

def extract_embeddings(model, loader, d):
    embs = np.zeros((len(loader.dataset), d), dtype=np.float32)
    t0=time.time()
    with torch.no_grad():
        for b, (imgs, idx) in enumerate(loader):
            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
            with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
                out = model(imgs)
            if out.ndim>2:
                out = out.flatten(1)
            out = torch.nn.functional.normalize(out.float(), dim=1)
            embs[idx.numpy()] = out.detach().cpu().numpy().astype(np.float32)
            if b % 50 == 0:
                print(f'Emb batch {b} / {len(loader)} elapsed {time.time()-t0:.1f}s', flush=True)
    return embs

def build_prototypes(train_df, per_class=4, batch_size=128):
    # Use at most 1 image per class (random) for speed on huge label space
    per_class_eff = 1
    grp = (train_df.groupby('label', group_keys=False)
           .apply(lambda g: g.sample(min(len(g), per_class_eff), random_state=42))
           .reset_index(drop=True))
    print('Prototype sampling rows:', len(grp), flush=True)
    ds = ImageTensorDS(grp, ret_tfms)
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, persistent_workers=False)
    model, d, bname = build_feat_model()
    print(f'Feature backbone: {bname}, dim={d}', flush=True)
    embs = extract_embeddings(model, dl, d)
    # One prototype per class (mean of sampled; with 1, it's the vector itself)
    proto_vecs, proto_lbls = [], []
    for lbl, sub in grp.groupby('label'):
        v = embs[sub.index.values]
        proto_vecs.append(v.mean(axis=0))
        proto_lbls.append(int(lbl))
    proto = np.stack(proto_vecs).astype(np.float32)
    proto = proto / np.linalg.norm(proto, axis=1, keepdims=True).clip(1e-9, None)
    return proto, np.array(proto_lbls, dtype=np.int32), d, bname

def build_faiss_index(proto):
    d = proto.shape[1]
    index = faiss.IndexFlatIP(d)
    index.add(proto)
    return index

def run_retrieval_submission(per_class=4, test_batch=128):
    tr = train_df[['file_path','label']].copy()
    te = build_test_df_in_submission_order()
    # Build prototypes (internally uses 1 per class for speed)
    t0 = time.time()
    proto, proto_lbls, d, bname = build_prototypes(tr, per_class=per_class, batch_size=128)
    print(f'Prototypes: {len(proto)} built in {time.time()-t0:.1f}s', flush=True)
    index = build_faiss_index(proto)
    # Test embeddings
    ds_te = ImageTensorDS(te, ret_tfms)
    dl_te = DataLoader(ds_te, batch_size=min(test_batch, 64), shuffle=False, num_workers=4, pin_memory=True, persistent_workers=False)
    model, d2, _ = build_feat_model()
    assert d2 == d, 'Backbone mismatch'
    t1 = time.time()
    preds = []
    with torch.no_grad():
        for b, (imgs, idx) in enumerate(dl_te):
            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
            with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
                out = model(imgs)
            if out.ndim>2: out = out.flatten(1)
            out = torch.nn.functional.normalize(out.float(), dim=1)
            q = out.detach().cpu().numpy().astype(np.float32)
            D, I = index.search(q, 1)
            lbls = proto_lbls[I[:,0]]
            preds.append(lbls)
            if b % 50 == 0:
                print(f'Test batch {b}/{len(dl_te)} elapsed {time.time()-t1:.1f}s', flush=True)
    preds = np.concatenate(preds).astype(np.int32)
    print(f'Test queried in {time.time()-t1:.1f}s', flush=True)
    pred_cat = [lbl2cat[int(x)] for x in preds]
    sub = pd.DataFrame({ 'Id': pd.read_csv(SAMPLE_SUB)['Id'].values, 'Predicted': pred_cat })
    sub.to_csv('submission.csv', index=False)
    print('Retrieval submission.csv written:', sub.shape, flush=True)

print('Retrieval code ready (uses FAISS IndexFlatIP). To run after deps present: \n- pip install faiss-cpu (in a separate cell) \n- run_retrieval_submission(per_class=4)', flush=True)

Retrieval code ready (uses FAISS IndexFlatIP). To run after deps present: 
- pip install faiss-cpu (in a separate cell) 
- run_retrieval_submission(per_class=4)


In [18]:
# Install retrieval dependencies
import sys, subprocess, time, importlib
t0=time.time()
print('Installing faiss-cpu (and huggingface-hub) under torch constraints...', flush=True)
def pip(*args):
    print('>', *args, flush=True)
    subprocess.run([sys.executable, '-m', 'pip', *args], check=True)
pip('install', '-c', 'constraints.txt', 'faiss-cpu', 'huggingface-hub', '--upgrade-strategy', 'only-if-needed')
print('Installed in %.1fs' % (time.time()-t0), flush=True)
import faiss, timm
print('faiss version OK; timm version:', timm.__version__, flush=True)

Installing faiss-cpu (and huggingface-hub) under torch constraints...


> install -c constraints.txt faiss-cpu huggingface-hub --upgrade-strategy only-if-needed


Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 31.4/31.4 MB 183.4 MB/s eta 0:00:00
Collecting huggingface-hub
  Downloading huggingface_hub-0.35.1-py3-none-any.whl (563 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 563.3/563.3 KB 492.3 MB/s eta 0:00:00


Collecting numpy<3.0,>=1.25.0
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 264.1 MB/s eta 0:00:00
Collecting packaging
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 350.7 MB/s eta 0:00:00


Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting tqdm>=4.42.1
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 439.3 MB/s eta 0:00:00


Collecting pyyaml>=5.1
  Downloading pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (806 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 806.6/806.6 KB 358.6 MB/s eta 0:00:00
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.7/64.7 KB 431.1 MB/s eta 0:00:00
Collecting hf-xet<2.0.0,>=1.1.3
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 123.1 MB/s eta 0:00:00
Collecting fsspec>=2023.5.0
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 469.0 MB/s eta 0:00:00
Collecting typing-extensions>=3.7.4.3
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 385.0 MB/s eta 0:00:00


Collecting idna<4,>=2.5
  Downloading idna-3.10-py3-none-any.whl (70 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.4/70.4 KB 348.2 MB/s eta 0:00:00
Collecting charset_normalizer<4,>=2
  Downloading charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (150 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 150.3/150.3 KB 479.1 MB/s eta 0:00:00
Collecting certifi>=2017.4.17
  Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.2/161.2 KB 497.5 MB/s eta 0:00:00


Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.8/129.8 KB 472.5 MB/s eta 0:00:00


Installing collected packages: urllib3, typing-extensions, tqdm, pyyaml, packaging, numpy, idna, hf-xet, fsspec, filelock, charset_normalizer, certifi, requests, faiss-cpu, huggingface-hub


Successfully installed certifi-2025.8.3 charset_normalizer-3.4.3 faiss-cpu-1.12.0 filelock-3.19.1 fsspec-2025.9.0 hf-xet-1.1.10 huggingface-hub-0.35.1 idna-3.10 numpy-1.26.4 packaging-25.0 pyyaml-6.0.3 requests-2.32.5 tqdm-4.67.1 typing-extensions-4.15.0 urllib3-2.5.0




Installed in 6.1s


faiss version OK; timm version: 1.0.9


In [20]:
# Run retrieval baseline to generate a fast submission
import time
t0=time.time()
print('Starting retrieval submission (per_class=4, test_batch=128)...', flush=True)
run_retrieval_submission(per_class=4, test_batch=128)
print('Retrieval done in %.1f min' % ((time.time()-t0)/60.0), flush=True)

Starting retrieval submission (per_class=4, test_batch=128)...


In [25]:
# Optimized sharded test inference (no TTA) per expert advice
import numpy as np, torch, time, pandas as pd, os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path

class TestDatasetSharded(Dataset):
    def __init__(self, df, tfms, start=0, end=None):
        self.df_full = df.reset_index(drop=True)
        self.start = int(start)
        self.end = int(end) if end is not None else len(self.df_full)
        self.df = self.df_full.iloc[self.start:self.end].reset_index(drop=True)
        self.tfms = tfms
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        j = i
        fp = self.df.iloc[j]['file_path']
        try:
            img = Image.open(fp).convert('RGB')
        except Exception:
            img = Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))
        img = self.tfms(img)
        return img, self.start + j

def _get_log_priors():
    # Compute log class priors from training data (align to NUM_CLASSES)
    counts = train_df['label'].value_counts().reindex(range(NUM_CLASSES), fill_value=0).values.astype(np.float32)
    counts = np.maximum(counts, 1.0)
    logp = np.log(counts)
    t = torch.from_numpy(logp).to(device)
    return t

def infer_fold_sharded(fold=0, batch_size=256, chunk_size=50000, out_memmap='test_predlbl.int32', tau=1.0):
    # tau: strength of prior correction; 0 disables
    ckpt_path = f'ckpt_fold{fold}.pt'
    print(f'Loading checkpoint: {ckpt_path}', flush=True)
    state = torch.load(ckpt_path, map_location='cpu')
    model = build_model()
    model.load_state_dict(state['model'], strict=True)
    ema = ModelEmaV2(model, decay=0.999, device=device)
    if 'ema' in state:
        ema.load_state_dict(state['ema'], strict=False)
    m = ema.module if 'ema' in state else model
    m.eval()
    test_df = build_test_df_in_submission_order()
    N = len(test_df)
    print('Test size:', N, flush=True)

    # Prepare/resume memmap
    mode = 'r+' if os.path.exists(out_memmap) else 'w+'
    mm = np.memmap(out_memmap, dtype='int32', mode=mode, shape=(N,))

    # Dataloader params per expert
    dl_kwargs = dict(batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True,
                     persistent_workers=False, prefetch_factor=4, drop_last=False)
    t_all = time.time()
    written = 0
    prior = _get_log_priors() if tau and tau > 0 else None
    with torch.inference_mode():
        for s in range(0, N, chunk_size):
            e = min(s + chunk_size, N)
            print(f'Chunk {s}:{e} ({e-s})', flush=True)
            ds = TestDatasetSharded(test_df, val_tfms, start=s, end=e)
            dl = DataLoader(ds, **dl_kwargs)
            t0 = time.time()
            for b, (imgs, idxs) in enumerate(dl):
                imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last)
                with torch.amp.autocast('cuda', dtype=torch.float16, enabled=torch.cuda.is_available()):
                    logits = m(imgs)
                    if prior is not None:
                        logits = logits - float(tau) * prior
                pred = logits.argmax(1).detach().cpu().numpy().astype(np.int32)
                mm[idxs.numpy()] = pred
                written += len(pred)
                if b % 50 == 0:
                    print(f'  batch {b}/{len(dl)} wrote {written}/{N} elapsed {time.time()-t0:.1f}s', flush=True)
            mm.flush()
            Path('last_done.txt').write_text(str(e))
            print(f'Chunk {s}:{e} done in {time.time()-t0:.1f}s (total {time.time()-t_all:.1f}s)', flush=True)
    del mm
    # Map to category ids and write submission
    preds = np.memmap(out_memmap, dtype='int32', mode='r', shape=(N,))
    pred_cat = [lbl2cat[int(x)] for x in preds]
    sub = pd.DataFrame({'Id': pd.read_csv(SAMPLE_SUB)['Id'].values, 'Predicted': pred_cat})
    sub.to_csv('submission.csv', index=False)
    print('submission.csv written with shape', sub.shape, 'total time %.1fs' % (time.time()-t_all), flush=True)
    return 'submission.csv'

print('Sharded inference utility ready: call infer_fold_sharded(fold=0, batch_size=256, chunk_size=50000, tau=1.0) after training.', flush=True)

Sharded inference utility ready: call infer_fold_sharded(fold=0, batch_size=256, chunk_size=50000, tau=1.0) after training.


In [None]:
# Launch sharded inference (no TTA) from best fold-0 checkpoint
import os, time, pandas as pd
t0=time.time()
print('Starting sharded inference: fold=0, bs=256, chunk=50k, tau=1.0', flush=True)
out = infer_fold_sharded(fold=0, batch_size=256, chunk_size=50000, tau=1.0)
assert os.path.exists('submission.csv'), 'submission.csv not found after inference'
sub = pd.read_csv('submission.csv')
print('submission.csv ready:', sub.shape, 'Elapsed: %.1fs' % (time.time()-t0), flush=True)
print(sub.head())

Starting sharded inference: fold=0, bs=256, chunk=50k, tau=1.0


Loading checkpoint: ckpt_fold0.pt


  state = torch.load(ckpt_path, map_location='cpu')


Test size: 477806


Chunk 0:50000 (50000)
