In [1]:
# Setup: install timm (respect existing torch), load JSONs, build label mapping, stratified split
import os, sys, json, math, random, time, shutil, subprocess
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.backends.cudnn as cudnn

def run(cmd):
    print('>', ' '.join(cmd), flush=True)
    subprocess.run(cmd, check=True)

def pip_install_pkgs():
    # Freeze torch stack; avoid re-installing torch/torchvision
    cons = Path('constraints.txt')
    if not cons.exists():
        cons.write_text('torch==2.4.1\ntorchvision==0.19.1\ntorchaudio==2.4.1\n')
    # Install timm without deps to prevent torch reinstallation
    run([sys.executable, '-m', 'pip', 'install', 'timm==1.0.9', '--no-deps'])
    # Install required deps including HF hub BEFORE importing timm so timm sees them
    run([sys.executable, '-m', 'pip', 'install', '-c', 'constraints.txt', 'albumentations', 'opencv-python-headless', 'huggingface_hub', 'safetensors', '--upgrade-strategy', 'only-if-needed'])

print('torch:', torch.__version__, 'cuda build:', getattr(torch.version,'cuda', None), 'cuda avail:', torch.cuda.is_available(), flush=True)
pip_install_pkgs()
import timm

cudnn.benchmark = True
torch.set_float32_matmul_precision('high')
print('GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU', flush=True)

# Load JSONs
def load_json(fp):
    with open(fp, 'r') as f:
        return json.load(f)
train_js = load_json('train2019.json')
test_js = load_json('test2019.json')

# Build dataframe from train JSON using JSON-based category_id
train_images = {im['id']: im for im in train_js['images']}
rows = []
for ann in train_js['annotations']:
    im = train_images.get(ann['image_id'])
    if im is None: continue
    rows.append({
        'image_id': ann['image_id'],
        'file_name': im['file_name'],
        'category_id': ann['category_id']
    })
df = pd.DataFrame(rows)
print('Train rows:', len(df), 'unique cats:', df['category_id'].nunique(), flush=True)

# Label mapping: map arbitrary category_id -> contiguous [0..C-1]
cat_ids = sorted(df['category_id'].unique().tolist())
cat_id_to_idx = {cid:i for i,cid in enumerate(cat_ids)}
idx_to_cat_id = {i:cid for cid,i in cat_id_to_idx.items()}
df['label'] = df['category_id'].map(cat_id_to_idx)
num_classes = len(cat_ids)
print('num_classes:', num_classes, 'min/max cat_id:', min(cat_ids), max(cat_ids), flush=True)

# Stratified split from train (since official val images are not present in extracted files)
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.0115, random_state=42)  # ~3030 holdout to mirror official val size
train_idx, val_idx = next(sss.split(df.index.values, df['label'].values))
df_train = df.iloc[train_idx].reset_index(drop=True)
df_val = df.iloc[val_idx].reset_index(drop=True)
print('Split -> train:', len(df_train), 'val:', len(df_val), flush=True)

# Save mappings for reuse
Path('artifacts').mkdir(exist_ok=True)
pd.Series(idx_to_cat_id).to_json('artifacts/idx_to_cat_id.json')
pd.Series(cat_id_to_idx).to_json('artifacts/cat_id_to_idx.json')
df_train.to_csv('artifacts/train_split.csv', index=False)
df_val.to_csv('artifacts/val_split.csv', index=False)
print('Prepared splits and mappings.', flush=True)

torch: 2.4.1+cu121 cuda build: 12.1 cuda avail: True


> /usr/bin/python3.11 -m pip install timm==1.0.9 --no-deps


Collecting timm==1.0.9
  Downloading timm-1.0.9-py3-none-any.whl (2.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.3/2.3 MB 57.2 MB/s eta 0:00:00
Installing collected packages: timm


Successfully installed timm-1.0.9
> /usr/bin/python3.11 -m pip install -c constraints.txt albumentations opencv-python-headless huggingface_hub safetensors --upgrade-strategy only-if-needed




Collecting albumentations
  Downloading albumentations-2.0.8-py3-none-any.whl (369 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 369.4/369.4 KB 13.3 MB/s eta 0:00:00
Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (54.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 MB 113.3 MB/s eta 0:00:00
Collecting huggingface_hub
  Downloading huggingface_hub-0.35.1-py3-none-any.whl (563 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 563.3/563.3 KB 234.4 MB/s eta 0:00:00


Collecting safetensors
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 485.8/485.8 KB 510.4 MB/s eta 0:00:00
Collecting PyYAML
  Downloading pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (806 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 806.6/806.6 KB 508.2 MB/s eta 0:00:00


Collecting scipy>=1.10.0
  Downloading scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.9/35.9 MB 183.7 MB/s eta 0:00:00


Collecting pydantic>=2.9.2
  Downloading pydantic-2.11.9-py3-none-any.whl (444 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 444.9/444.9 KB 515.6 MB/s eta 0:00:00
Collecting albucore==0.0.24
  Downloading albucore-0.0.24-py3-none-any.whl (15 kB)


Collecting numpy>=1.24.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 223.4 MB/s eta 0:00:00


Collecting simsimd>=5.9.2
  Downloading simsimd-6.5.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 540.2 MB/s eta 0:00:00


Collecting stringzilla>=3.10.4
  Downloading stringzilla-4.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (496 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 496.5/496.5 KB 507.2 MB/s eta 0:00:00
Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (50.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50.0/50.0 MB 221.5 MB/s eta 0:00:00


Collecting packaging>=20.9
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 423.0 MB/s eta 0:00:00
Collecting fsspec>=2023.5.0
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 505.7 MB/s eta 0:00:00
Collecting tqdm>=4.42.1
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 472.5 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting typing-extensions>=3.7.4.3
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 333.7 MB/s eta 0:00:00
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.7/64.7 KB 428.8 MB/s eta 0:00:00


Collecting hf-xet<2.0.0,>=1.1.3
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 479.9 MB/s eta 0:00:00
Collecting annotated-types>=0.6.0
  Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)
Collecting typing-inspection>=0.4.0
  Downloading typing_inspection-0.4.1-py3-none-any.whl (14 kB)


Collecting pydantic-core==2.33.2
  Downloading pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 533.8 MB/s eta 0:00:00
Collecting idna<4,>=2.5
  Downloading idna-3.10-py3-none-any.whl (70 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.4/70.4 KB 436.5 MB/s eta 0:00:00


Collecting charset_normalizer<4,>=2
  Downloading charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (150 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 150.3/150.3 KB 510.0 MB/s eta 0:00:00
Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.8/129.8 KB 496.1 MB/s eta 0:00:00
Collecting certifi>=2017.4.17
  Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.2/161.2 KB 455.4 MB/s eta 0:00:00


Installing collected packages: simsimd, urllib3, typing-extensions, tqdm, stringzilla, safetensors, PyYAML, packaging, numpy, idna, hf-xet, fsspec, filelock, charset_normalizer, certifi, annotated-types, typing-inspection, scipy, requests, pydantic-core, opencv-python-headless, pydantic, huggingface_hub, albucore, albumentations


Successfully installed PyYAML-6.0.3 albucore-0.0.24 albumentations-2.0.8 annotated-types-0.7.0 certifi-2025.8.3 charset_normalizer-3.4.3 filelock-3.19.1 fsspec-2025.9.0 hf-xet-1.1.10 huggingface_hub-0.35.1 idna-3.10 numpy-1.26.4 opencv-python-headless-4.11.0.86 packaging-25.0 pydantic-2.11.9 pydantic-core-2.33.2 requests-2.32.5 safetensors-0.6.2 scipy-1.16.2 simsimd-6.5.3 stringzilla-4.0.14 tqdm-4.67.1 typing-extensions-4.15.0 typing-inspection-0.4.1 urllib3-2.5.0




GPU: NVIDIA A10-24Q


Train rows: 232999 unique cats: 1010


num_classes: 1010 min/max cat_id: 0 1009


Split -> train: 230319 val: 2680


Prepared splits and mappings.


In [6]:
# Dataset, transforms, model, and quick smoke-train to validate pipeline (torchvision backbone to avoid HF issues)
import math, time, gc, os
from pathlib import Path
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as T
import torchvision.models as tvm
from torchvision.transforms import InterpolationMode

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class JsonImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        img_path = row['file_name']
        with Image.open(img_path) as img:
            img = img.convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        label = int(row['label'])
        return img, label

# Torchvision ResNet50 (URL-hosted weights) to bypass HF hub
weights = tvm.ResNet50_Weights.IMAGENET1K_V2
print('Creating model: torchvision resnet50, weights IMAGENET1K_V2', flush=True)
base_model = tvm.resnet50(weights=weights)
in_feats = base_model.fc.in_features
base_model.fc = nn.Linear(in_feats, num_classes)
model = base_model.to(device).to(memory_format=torch.channels_last)

# Transforms (use standard ImageNet mean/std to avoid weights.meta dependency)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img_size = 224
train_tfms = T.Compose([
    T.RandomResizedCrop(img_size, scale=(0.5, 1.0), interpolation=InterpolationMode.BICUBIC),
    T.RandomHorizontalFlip(p=0.5),
    T.AutoAugment(policy=T.AutoAugmentPolicy.IMAGENET),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std),
])
val_tfms = T.Compose([
    T.Resize(int(img_size/0.875), interpolation=InterpolationMode.BICUBIC),
    T.CenterCrop(img_size),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std),
])

# Datasets and loaders (small subsets for smoke test)
n_train_smoke = 2048
n_val_smoke = 512
train_ds_full = JsonImageDataset(df_train, transform=train_tfms)
val_ds_full = JsonImageDataset(df_val, transform=val_tfms)
train_ds = Subset(train_ds_full, np.arange(min(n_train_smoke, len(train_ds_full))))
val_ds = Subset(val_ds_full, np.arange(min(n_val_smoke, len(val_ds_full))))

bs = 96
nw = min(8, os.cpu_count() or 4)
train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_ds, batch_size=bs, shuffle=False, num_workers=nw, pin_memory=True, persistent_workers=True)

# Optim, loss
lr = 1e-3 * (bs / 256.0)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.05, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(device)
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device, non_blocking=True, memory_format=torch.channels_last)
            yb = yb.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                logits = model(xb)
            pred = logits.argmax(dim=1)
            correct += (pred == yb).sum().item()
            total += yb.numel()
    acc = correct / max(1, total)
    print(f'Eval: acc={acc:.4f}, n={total}, time={time.time()-t0:.1f}s', flush=True)
    return acc

# One-epoch smoke train
epochs = 1
print(f'Smoke train: epochs={epochs}, bs={bs}, train_n={len(train_ds)}, val_n={len(val_ds)}', flush=True)
best_acc = 0.0
t_start = time.time()
for epoch in range(epochs):
    model.train()
    t0 = time.time()
    running_loss = 0.0
    seen = 0
    for it, (xb, yb) in enumerate(train_loader):
        xb = xb.to(device, non_blocking=True, memory_format=torch.channels_last)
        yb = yb.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            logits = model(xb)
            loss = criterion(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item() * yb.size(0)
        seen += yb.size(0)
        if (it+1) % 20 == 0:
            elapsed = time.time() - t0
            print(f'ep {epoch} it {it+1}/{math.ceil(len(train_ds)/bs)} loss {running_loss/seen:.4f} elapsed {elapsed:.1f}s', flush=True)
    train_loss = running_loss / max(1, seen)
    print(f'Epoch {epoch}: train_loss={train_loss:.4f} epoch_time={time.time()-t0:.1f}s total_elapsed={time.time()-t_start:.1f}s', flush=True)
    acc = evaluate(model, val_loader)
    best_acc = max(best_acc, acc)

print('Smoke training complete. Best val acc:', f'{best_acc:.4f}')
torch.cuda.empty_cache(); gc.collect()

Creating model: torchvision resnet50, weights IMAGENET1K_V2


Smoke train: epochs=1, bs=96, train_n=2048, val_n=512


  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


ep 0 it 20/22 loss 6.8466 elapsed 5.1s


Epoch 0: train_loss=6.8426 epoch_time=5.8s total_elapsed=5.8s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Eval: acc=0.0098, n=512, time=1.2s


Smoke training complete. Best val acc: 0.0098


37501

In [9]:
# Stage A Extended: ConvNeXt-Tiny @256px, Mixup=0.3, EMA=0.99985, cosine w/ lr floor; resume to 12 epochs; 2-crop TTA inference
import math, time, gc, os, json, random
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as tvm
from torchvision.transforms import InterpolationMode
from timm.utils import ModelEmaV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
assert torch.cuda.is_available(), 'CUDA required for timely training'

class JsonImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        with Image.open(row['file_name']) as img:
            img = img.convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, int(row['label'])

# Transforms
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img_size = 256
train_tfms = T.Compose([
    T.RandomResizedCrop(img_size, scale=(0.5, 1.0), interpolation=InterpolationMode.BICUBIC),
    T.RandomHorizontalFlip(0.5),
    T.TrivialAugmentWide(interpolation=InterpolationMode.BICUBIC),
    T.ToTensor(),
    T.Normalize(mean, std),
    T.RandomErasing(p=0.25, scale=(0.02, 0.12), ratio=(0.3, 3.3), value='random'),
])
val_tfms = T.Compose([
    T.Resize(int(img_size/0.875), interpolation=InterpolationMode.BICUBIC),
    T.CenterCrop(img_size),
    T.ToTensor(),
    T.Normalize(mean, std),
])

# Datasets & loaders
train_ds = JsonImageDataset(df_train, transform=train_tfms)
val_ds = JsonImageDataset(df_val, transform=val_tfms)
bs = 128
nw = min(8, os.cpu_count() or 4)
train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_ds, batch_size=bs, shuffle=False, num_workers=nw, pin_memory=True, persistent_workers=True)
print(f'train_n={len(train_ds)} val_n={len(val_ds)} bs={bs} workers={nw}', flush=True)

# Model: ConvNeXt Tiny
weights = tvm.ConvNeXt_Tiny_Weights.IMAGENET1K_V1
model = tvm.convnext_tiny(weights=weights)
if isinstance(model.classifier, nn.Sequential) and isinstance(model.classifier[-1], nn.Linear):
    in_ch = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(in_ch, num_classes)
else:
    # Fallback in case of different head structure
    for name, m in list(model.named_modules())[::-1]:
        if isinstance(m, nn.Linear) and m.in_features > 0:
            setattr(model, name.split('.')[-1], nn.Linear(m.in_features, num_classes))
            break
model = model.to(device).to(memory_format=torch.channels_last)

# Optimizer, scheduler, loss, Mixup, EMA
base_lr = 1e-3
lr_peak = base_lr * (bs / 256.0)
lr_min = 1e-6
optimizer = torch.optim.AdamW(model.parameters(), lr=lr_peak, weight_decay=0.05, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(device)
scaler = torch.amp.GradScaler('cuda', enabled=True)
epochs = 12  # total target epochs
warmup_epochs = 1
steps_per_epoch = math.ceil(len(train_ds)/bs)
total_steps = epochs * steps_per_epoch
warmup_steps = warmup_epochs * steps_per_epoch

def cosine_lr(step):
    # returns multiplicative factor for lr between [lr_min/lr_peak, 1]
    if step < warmup_steps:
        return (step + 1) / max(1, warmup_steps)
    prog = (step - warmup_steps) / max(1, total_steps - warmup_steps)
    cos_val = 0.5 * (1 + math.cos(math.pi * prog))
    scaled = (lr_min / lr_peak) + (1 - (lr_min / lr_peak)) * cos_val
    return scaled

# Mixup utilities
mixup_alpha = 0.3
def mixup_batch(x, y, alpha=mixup_alpha):
    if alpha <= 0:
        return x, y, 1.0, None
    lam = float(np.random.beta(alpha, alpha)) if alpha > 0 else 1.0
    batch_size = x.size(0)
    index = torch.randperm(batch_size, device=x.device)
    y_a, y_b = y, y[index]
    x = x * lam + x[index] * (1.0 - lam)
    return x, (y_a, y_b), lam, index

# EMA
ema_decay = 0.99985
model_ema = ModelEmaV2(model, decay=ema_decay, device=device)

# Optional resume from best EMA checkpoint if exists
ckpt_path = Path('artifacts/ckpt_convnext_tiny_stageA_fast.pth')
best_acc = 0.0
if ckpt_path.exists():
    try:
        ckpt = torch.load(ckpt_path, map_location='cpu')
        model.load_state_dict(ckpt['state_dict'], strict=False)
        model_ema.module.load_state_dict(ckpt['state_dict'], strict=False)
        best_acc = float(ckpt.get('val_acc', 0.0))
        print(f'Resumed weights from {ckpt_path} (prev best_acc={best_acc:.4f})', flush=True)
    except Exception as e:
        print('Resume failed:', e, flush=True)

def evaluate(model_eval, loader):
    model_eval.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device, non_blocking=True, memory_format=torch.channels_last)
            yb = yb.to(device, non_blocking=True)
            with torch.amp.autocast('cuda', enabled=True):
                logits = model_eval(xb)
            pred = logits.argmax(1)
            correct += (pred == yb).sum().item()
            total += yb.numel()
    return correct / max(1, total)

# Train loop
t0_all = time.time()
step = 0
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    seen = 0
    t0 = time.time()
    for it, (xb, yb) in enumerate(train_loader):
        lr_now = lr_peak * cosine_lr(step)
        for pg in optimizer.param_groups:
            pg['lr'] = lr_now
        step += 1
        xb = xb.to(device, non_blocking=True, memory_format=torch.channels_last)
        yb = yb.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        # Mixup
        xb_m, y_pair, lam, _ = mixup_batch(xb, yb, mixup_alpha)
        with torch.amp.autocast('cuda', enabled=True):
            logits = model(xb_m)
            if isinstance(y_pair, tuple):
                ya, yb2 = y_pair
                # Use criterion for both terms (respects label smoothing) - expert fix
                loss = lam * criterion(logits, ya) + (1.0 - lam) * criterion(logits, yb2)
            else:
                loss = criterion(logits, y_pair)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # EMA update
        model_ema.update(model)
        running_loss += loss.item() * yb.size(0)
        seen += yb.size(0)
        if (it + 1) % 100 == 0:
            print(f'ep {epoch} it {it+1}/{steps_per_epoch} loss {running_loss/max(1,seen):.4f} lr {lr_now:.6f} elapsed {time.time()-t0:.1f}s', flush=True)
    train_loss = running_loss / max(1, seen)
    # Evaluate EMA model
    val_acc = evaluate(model_ema.module, val_loader)
    print(f'Epoch {epoch}: train_loss={train_loss:.4f} val_acc={val_acc:.4f} epoch_time={time.time()-t0:.1f}s total_elapsed={time.time()-t0_all:.1f}s', flush=True)
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({'state_dict': model_ema.module.state_dict(), 'val_acc': best_acc}, ckpt_path)
        print(f'Saved new best to {ckpt_path} (acc={best_acc:.4f})', flush=True)

print(f'Training done. Best val_acc={best_acc:.4f}', flush=True)
del train_loader; torch.cuda.empty_cache(); gc.collect()

# Inference on test set with 2-crop TTA (orig + hflip) using EMA weights
with open('artifacts/idx_to_cat_id.json','r') as f:
    idx_to_cat_id = {int(k): int(v) for k,v in json.load(f).items()}
with open('test2019.json','r') as f:
    test_js = json.load(f)
test_images = test_js['images']
test_df = pd.DataFrame(test_images)
test_df['path'] = test_df['file_name']

class TestDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.tf = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        with Image.open(row['path']) as img:
            img = img.convert('RGB')
        return self.tf(img), int(row['id'])

test_ds = TestDataset(test_df, val_tfms)
test_loader = DataLoader(test_ds, batch_size=256, shuffle=False, num_workers=nw, pin_memory=True, persistent_workers=True)

# Load best EMA ckpt for inference
ckpt = torch.load(ckpt_path, map_location='cpu')
model = tvm.convnext_tiny(weights=None)
if isinstance(model.classifier, nn.Sequential) and isinstance(model.classifier[-1], nn.Linear):
    in_ch = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(in_ch, num_classes)
else:
    for name, m in list(model.named_modules())[::-1]:
        if isinstance(m, nn.Linear) and m.in_features > 0:
            setattr(model, name.split('.')[-1], nn.Linear(m.in_features, num_classes))
            break
model.load_state_dict(ckpt['state_dict'], strict=True)
model = model.to(device).to(memory_format=torch.channels_last).eval()

pred_rows = []
t_inf0 = time.time()
with torch.no_grad():
    for it, (xb, ids) in enumerate(test_loader):
        xb = xb.to(device, non_blocking=True, memory_format=torch.channels_last)
        with torch.amp.autocast('cuda', enabled=True):
            logits1 = model(xb)
            logits2 = model(torch.flip(xb, dims=[3]))  # horizontal flip
            logits = (logits1 + logits2) * 0.5
        preds = logits.argmax(1).detach().cpu().numpy()
        for img_id, p in zip(ids.tolist(), preds.tolist()):
            pred_rows.append((img_id, idx_to_cat_id[p]))
        if (it + 1) % 50 == 0:
            print(f'infer it {it+1}/{math.ceil(len(test_ds)/256)} elapsed {time.time()-t_inf0:.1f}s', flush=True)

sub = pd.DataFrame(pred_rows, columns=['image_id','category_id'])
sub = sub.sort_values('image_id').reset_index(drop=True)
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv with', len(sub), 'rows', flush=True)

torch.cuda.empty_cache(); gc.collect()

# Next: Stage B fine-tune @384px (4-5 epochs, lr~1e-4, mixup=0.1, RE p=0.1), 2-crop TTA.

train_n=230319 val_n=2680 bs=128 workers=8


Resumed weights from artifacts/ckpt_convnext_tiny_stageA_fast.pth (prev best_acc=0.6698)


  ckpt = torch.load(ckpt_path, map_location='cpu')


ep 0 it 100/1800 loss 3.3352 lr 0.000028 elapsed 32.2s


ep 0 it 200/1800 loss 3.2771 lr 0.000056 elapsed 63.1s


ep 0 it 300/1800 loss 3.2747 lr 0.000083 elapsed 94.3s


ep 0 it 400/1800 loss 3.2535 lr 0.000111 elapsed 125.5s


ep 0 it 500/1800 loss 3.2130 lr 0.000139 elapsed 157.0s


ep 0 it 600/1800 loss 3.1894 lr 0.000167 elapsed 188.5s


ep 0 it 700/1800 loss 3.1731 lr 0.000194 elapsed 220.2s


ep 0 it 800/1800 loss 3.1653 lr 0.000222 elapsed 251.9s


ep 0 it 900/1800 loss 3.1660 lr 0.000250 elapsed 283.6s


ep 0 it 1000/1800 loss 3.1630 lr 0.000278 elapsed 315.3s


ep 0 it 1100/1800 loss 3.1599 lr 0.000306 elapsed 347.1s


ep 0 it 1200/1800 loss 3.1531 lr 0.000333 elapsed 378.9s


ep 0 it 1300/1800 loss 3.1570 lr 0.000361 elapsed 410.8s


ep 0 it 1400/1800 loss 3.1607 lr 0.000389 elapsed 442.8s


ep 0 it 1500/1800 loss 3.1671 lr 0.000417 elapsed 474.8s


ep 0 it 1600/1800 loss 3.1785 lr 0.000444 elapsed 506.8s


ep 0 it 1700/1800 loss 3.1843 lr 0.000472 elapsed 538.7s


ep 0 it 1800/1800 loss 3.1875 lr 0.000500 elapsed 570.5s


Epoch 0: train_loss=3.1875 val_acc=0.6951 epoch_time=574.6s total_elapsed=574.6s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.6951)


ep 1 it 100/1800 loss 3.3506 lr 0.000500 elapsed 33.0s


ep 1 it 200/1800 loss 3.3426 lr 0.000500 elapsed 64.8s


ep 1 it 300/1800 loss 3.2864 lr 0.000500 elapsed 96.7s


ep 1 it 400/1800 loss 3.3231 lr 0.000500 elapsed 128.5s


ep 1 it 500/1800 loss 3.3394 lr 0.000499 elapsed 160.4s


ep 1 it 600/1800 loss 3.3173 lr 0.000499 elapsed 192.7s


ep 1 it 700/1800 loss 3.3178 lr 0.000498 elapsed 224.7s


ep 1 it 800/1800 loss 3.3138 lr 0.000498 elapsed 256.7s


ep 1 it 900/1800 loss 3.3091 lr 0.000497 elapsed 288.7s


ep 1 it 1000/1800 loss 3.3034 lr 0.000497 elapsed 320.5s


ep 1 it 1100/1800 loss 3.3054 lr 0.000496 elapsed 352.4s


ep 1 it 1200/1800 loss 3.3107 lr 0.000495 elapsed 384.2s


ep 1 it 1300/1800 loss 3.2999 lr 0.000495 elapsed 416.0s


ep 1 it 1400/1800 loss 3.2982 lr 0.000494 elapsed 447.9s


ep 1 it 1500/1800 loss 3.2969 lr 0.000493 elapsed 479.8s


ep 1 it 1600/1800 loss 3.3050 lr 0.000492 elapsed 511.8s


ep 1 it 1700/1800 loss 3.3095 lr 0.000491 elapsed 543.8s


ep 1 it 1800/1800 loss 3.3066 lr 0.000490 elapsed 575.6s


Epoch 1: train_loss=3.3066 val_acc=0.7209 epoch_time=579.5s total_elapsed=1154.4s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7209)


ep 2 it 100/1800 loss 3.3660 lr 0.000489 elapsed 33.0s


ep 2 it 200/1800 loss 3.2242 lr 0.000488 elapsed 64.8s


ep 2 it 300/1800 loss 3.1940 lr 0.000486 elapsed 96.7s


ep 2 it 400/1800 loss 3.1639 lr 0.000485 elapsed 128.7s


ep 2 it 500/1800 loss 3.1422 lr 0.000484 elapsed 160.7s


ep 2 it 600/1800 loss 3.1159 lr 0.000482 elapsed 192.7s


ep 2 it 700/1800 loss 3.1122 lr 0.000481 elapsed 224.7s


ep 2 it 800/1800 loss 3.1030 lr 0.000479 elapsed 256.5s


ep 2 it 900/1800 loss 3.0938 lr 0.000477 elapsed 288.4s


ep 2 it 1000/1800 loss 3.1118 lr 0.000476 elapsed 320.4s


ep 2 it 1100/1800 loss 3.1057 lr 0.000474 elapsed 352.3s


ep 2 it 1200/1800 loss 3.1155 lr 0.000472 elapsed 384.3s


ep 2 it 1300/1800 loss 3.1171 lr 0.000470 elapsed 416.2s


ep 2 it 1400/1800 loss 3.1205 lr 0.000469 elapsed 448.3s


ep 2 it 1500/1800 loss 3.1170 lr 0.000467 elapsed 480.3s


ep 2 it 1600/1800 loss 3.1280 lr 0.000465 elapsed 512.3s


ep 2 it 1700/1800 loss 3.1296 lr 0.000463 elapsed 544.3s


ep 2 it 1800/1800 loss 3.1389 lr 0.000460 elapsed 576.1s


Epoch 2: train_loss=3.1389 val_acc=0.7410 epoch_time=579.9s total_elapsed=1734.6s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7410)


ep 3 it 100/1800 loss 2.9167 lr 0.000458 elapsed 33.3s


ep 3 it 200/1800 loss 2.9591 lr 0.000456 elapsed 65.2s


ep 3 it 300/1800 loss 3.0031 lr 0.000454 elapsed 97.1s


ep 3 it 400/1800 loss 3.0138 lr 0.000451 elapsed 128.9s


ep 3 it 500/1800 loss 3.0508 lr 0.000449 elapsed 160.8s


ep 3 it 600/1800 loss 3.0396 lr 0.000447 elapsed 192.6s


ep 3 it 700/1800 loss 3.0393 lr 0.000444 elapsed 224.5s


ep 3 it 800/1800 loss 3.0389 lr 0.000442 elapsed 256.3s


ep 3 it 900/1800 loss 3.0500 lr 0.000439 elapsed 288.1s


ep 3 it 1000/1800 loss 3.0503 lr 0.000436 elapsed 319.9s


ep 3 it 1100/1800 loss 3.0478 lr 0.000434 elapsed 351.9s


ep 3 it 1200/1800 loss 3.0430 lr 0.000431 elapsed 383.9s


ep 3 it 1300/1800 loss 3.0437 lr 0.000428 elapsed 415.9s


ep 3 it 1400/1800 loss 3.0399 lr 0.000426 elapsed 447.9s


ep 3 it 1500/1800 loss 3.0427 lr 0.000423 elapsed 479.7s


ep 3 it 1600/1800 loss 3.0295 lr 0.000420 elapsed 511.6s


ep 3 it 1700/1800 loss 3.0295 lr 0.000417 elapsed 543.4s


ep 3 it 1800/1800 loss 3.0298 lr 0.000414 elapsed 575.1s


Epoch 3: train_loss=3.0298 val_acc=0.7556 epoch_time=578.8s total_elapsed=2313.8s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7556)


ep 4 it 100/1800 loss 2.6601 lr 0.000411 elapsed 32.9s


ep 4 it 200/1800 loss 2.6969 lr 0.000408 elapsed 64.7s


ep 4 it 300/1800 loss 2.7577 lr 0.000405 elapsed 96.5s


ep 4 it 400/1800 loss 2.8440 lr 0.000402 elapsed 128.3s


ep 4 it 500/1800 loss 2.8778 lr 0.000398 elapsed 160.3s


ep 4 it 600/1800 loss 2.8876 lr 0.000395 elapsed 192.1s


ep 4 it 700/1800 loss 2.8967 lr 0.000392 elapsed 224.1s


ep 4 it 800/1800 loss 2.9092 lr 0.000389 elapsed 256.1s


ep 4 it 900/1800 loss 2.9226 lr 0.000385 elapsed 288.1s


ep 4 it 1000/1800 loss 2.9314 lr 0.000382 elapsed 319.9s


ep 4 it 1100/1800 loss 2.9441 lr 0.000379 elapsed 351.8s


ep 4 it 1200/1800 loss 2.9528 lr 0.000375 elapsed 383.8s


ep 4 it 1300/1800 loss 2.9581 lr 0.000372 elapsed 415.8s


ep 4 it 1400/1800 loss 2.9583 lr 0.000368 elapsed 447.7s


ep 4 it 1500/1800 loss 2.9617 lr 0.000365 elapsed 479.5s


ep 4 it 1600/1800 loss 2.9651 lr 0.000361 elapsed 511.4s


ep 4 it 1700/1800 loss 2.9611 lr 0.000358 elapsed 543.4s


ep 4 it 1800/1800 loss 2.9663 lr 0.000354 elapsed 575.2s


Epoch 4: train_loss=2.9663 val_acc=0.7638 epoch_time=578.9s total_elapsed=2893.1s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7638)


ep 5 it 100/1800 loss 3.0492 lr 0.000351 elapsed 33.2s


ep 5 it 200/1800 loss 2.9001 lr 0.000347 elapsed 65.0s


ep 5 it 300/1800 loss 2.8430 lr 0.000343 elapsed 96.9s


ep 5 it 400/1800 loss 2.8270 lr 0.000340 elapsed 128.9s


ep 5 it 500/1800 loss 2.8428 lr 0.000336 elapsed 160.9s


ep 5 it 600/1800 loss 2.8206 lr 0.000332 elapsed 192.9s


ep 5 it 700/1800 loss 2.8142 lr 0.000328 elapsed 224.8s


ep 5 it 800/1800 loss 2.8123 lr 0.000325 elapsed 256.6s


ep 5 it 900/1800 loss 2.8005 lr 0.000321 elapsed 288.6s


ep 5 it 1000/1800 loss 2.8040 lr 0.000317 elapsed 320.5s


ep 5 it 1100/1800 loss 2.8136 lr 0.000313 elapsed 352.4s


ep 5 it 1200/1800 loss 2.8288 lr 0.000309 elapsed 384.4s


ep 5 it 1300/1800 loss 2.8341 lr 0.000306 elapsed 416.4s


ep 5 it 1400/1800 loss 2.8396 lr 0.000302 elapsed 448.5s


ep 5 it 1500/1800 loss 2.8250 lr 0.000298 elapsed 480.5s


ep 5 it 1600/1800 loss 2.8103 lr 0.000294 elapsed 512.5s


ep 5 it 1700/1800 loss 2.8045 lr 0.000290 elapsed 544.5s


ep 5 it 1800/1800 loss 2.7986 lr 0.000286 elapsed 576.3s


Epoch 5: train_loss=2.7986 val_acc=0.7806 epoch_time=580.1s total_elapsed=3473.4s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7806)


ep 6 it 100/1800 loss 2.7335 lr 0.000282 elapsed 33.1s


ep 6 it 200/1800 loss 2.7744 lr 0.000278 elapsed 65.0s


ep 6 it 300/1800 loss 2.7503 lr 0.000274 elapsed 97.0s


ep 6 it 400/1800 loss 2.7660 lr 0.000270 elapsed 129.0s


ep 6 it 500/1800 loss 2.7489 lr 0.000266 elapsed 160.9s


ep 6 it 600/1800 loss 2.7559 lr 0.000262 elapsed 192.7s


ep 6 it 700/1800 loss 2.7313 lr 0.000258 elapsed 224.5s


ep 6 it 800/1800 loss 2.7326 lr 0.000254 elapsed 256.4s


ep 6 it 900/1800 loss 2.7386 lr 0.000251 elapsed 288.4s


ep 6 it 1000/1800 loss 2.7548 lr 0.000247 elapsed 320.2s


ep 6 it 1100/1800 loss 2.7482 lr 0.000243 elapsed 352.1s


ep 6 it 1200/1800 loss 2.7448 lr 0.000239 elapsed 383.9s


ep 6 it 1300/1800 loss 2.7432 lr 0.000235 elapsed 415.9s


ep 6 it 1400/1800 loss 2.7379 lr 0.000231 elapsed 447.9s


ep 6 it 1500/1800 loss 2.7340 lr 0.000227 elapsed 479.9s


ep 6 it 1600/1800 loss 2.7310 lr 0.000223 elapsed 511.7s


ep 6 it 1700/1800 loss 2.7260 lr 0.000219 elapsed 543.6s


ep 6 it 1800/1800 loss 2.7271 lr 0.000215 elapsed 575.3s


Epoch 6: train_loss=2.7271 val_acc=0.7918 epoch_time=579.1s total_elapsed=4052.8s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7918)


ep 7 it 100/1800 loss 2.6061 lr 0.000211 elapsed 32.9s


ep 7 it 200/1800 loss 2.5467 lr 0.000207 elapsed 64.7s


ep 7 it 300/1800 loss 2.5729 lr 0.000203 elapsed 96.5s


ep 7 it 400/1800 loss 2.6166 lr 0.000199 elapsed 128.6s


ep 7 it 500/1800 loss 2.6388 lr 0.000196 elapsed 160.6s


ep 7 it 600/1800 loss 2.6561 lr 0.000192 elapsed 192.5s


ep 7 it 700/1800 loss 2.6325 lr 0.000188 elapsed 224.5s


ep 7 it 800/1800 loss 2.6117 lr 0.000184 elapsed 256.5s


ep 7 it 900/1800 loss 2.6109 lr 0.000180 elapsed 288.5s


ep 7 it 1000/1800 loss 2.6027 lr 0.000176 elapsed 320.5s


ep 7 it 1100/1800 loss 2.5971 lr 0.000173 elapsed 352.4s


ep 7 it 1200/1800 loss 2.5998 lr 0.000169 elapsed 384.3s


ep 7 it 1300/1800 loss 2.6062 lr 0.000165 elapsed 416.2s


ep 7 it 1400/1800 loss 2.6001 lr 0.000161 elapsed 448.2s


ep 7 it 1500/1800 loss 2.5942 lr 0.000158 elapsed 480.2s


ep 7 it 1600/1800 loss 2.5976 lr 0.000154 elapsed 512.2s


ep 7 it 1700/1800 loss 2.5849 lr 0.000151 elapsed 544.1s


ep 7 it 1800/1800 loss 2.5769 lr 0.000147 elapsed 575.8s


Epoch 7: train_loss=2.5769 val_acc=0.7996 epoch_time=579.6s total_elapsed=4632.8s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.7996)


ep 8 it 100/1800 loss 2.5521 lr 0.000143 elapsed 33.0s


ep 8 it 200/1800 loss 2.5223 lr 0.000140 elapsed 64.9s


ep 8 it 300/1800 loss 2.5041 lr 0.000136 elapsed 96.8s


ep 8 it 400/1800 loss 2.4817 lr 0.000133 elapsed 128.7s


ep 8 it 500/1800 loss 2.5068 lr 0.000129 elapsed 160.7s


ep 8 it 600/1800 loss 2.5249 lr 0.000126 elapsed 192.7s


ep 8 it 700/1800 loss 2.5159 lr 0.000122 elapsed 224.6s


ep 8 it 800/1800 loss 2.5075 lr 0.000119 elapsed 256.6s


ep 8 it 900/1800 loss 2.5078 lr 0.000116 elapsed 288.5s


ep 8 it 1000/1800 loss 2.4949 lr 0.000112 elapsed 320.5s


ep 8 it 1100/1800 loss 2.4914 lr 0.000109 elapsed 352.5s


ep 8 it 1200/1800 loss 2.4882 lr 0.000106 elapsed 384.5s


ep 8 it 1300/1800 loss 2.4769 lr 0.000103 elapsed 416.6s


ep 8 it 1400/1800 loss 2.4866 lr 0.000099 elapsed 448.5s


ep 8 it 1500/1800 loss 2.4776 lr 0.000096 elapsed 480.5s


ep 8 it 1600/1800 loss 2.4689 lr 0.000093 elapsed 512.5s


ep 8 it 1700/1800 loss 2.4603 lr 0.000090 elapsed 544.5s


ep 8 it 1800/1800 loss 2.4642 lr 0.000087 elapsed 576.3s


Epoch 8: train_loss=2.4642 val_acc=0.8063 epoch_time=580.0s total_elapsed=5213.0s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.8063)


ep 9 it 100/1800 loss 2.3584 lr 0.000084 elapsed 33.0s


ep 9 it 200/1800 loss 2.3389 lr 0.000081 elapsed 64.8s


ep 9 it 300/1800 loss 2.3504 lr 0.000078 elapsed 96.6s


ep 9 it 400/1800 loss 2.3961 lr 0.000076 elapsed 128.4s


ep 9 it 500/1800 loss 2.4076 lr 0.000073 elapsed 160.3s


ep 9 it 600/1800 loss 2.4155 lr 0.000070 elapsed 192.1s


ep 9 it 700/1800 loss 2.3963 lr 0.000067 elapsed 223.9s


ep 9 it 800/1800 loss 2.4063 lr 0.000065 elapsed 255.8s


ep 9 it 900/1800 loss 2.4020 lr 0.000062 elapsed 287.6s


ep 9 it 1000/1800 loss 2.3876 lr 0.000059 elapsed 319.6s


ep 9 it 1100/1800 loss 2.3769 lr 0.000057 elapsed 351.6s


ep 9 it 1200/1800 loss 2.3715 lr 0.000054 elapsed 383.5s


ep 9 it 1300/1800 loss 2.3720 lr 0.000052 elapsed 415.3s


ep 9 it 1400/1800 loss 2.3850 lr 0.000050 elapsed 447.2s


ep 9 it 1500/1800 loss 2.3836 lr 0.000047 elapsed 479.0s


ep 9 it 1600/1800 loss 2.3797 lr 0.000045 elapsed 510.9s


ep 9 it 1700/1800 loss 2.3749 lr 0.000043 elapsed 542.9s


ep 9 it 1800/1800 loss 2.3778 lr 0.000041 elapsed 574.7s


Epoch 9: train_loss=2.3778 val_acc=0.8160 epoch_time=578.4s total_elapsed=5791.8s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.8160)


ep 10 it 100/1800 loss 2.2043 lr 0.000039 elapsed 33.2s


ep 10 it 200/1800 loss 2.2677 lr 0.000036 elapsed 65.0s


ep 10 it 300/1800 loss 2.3434 lr 0.000034 elapsed 96.8s


ep 10 it 400/1800 loss 2.3537 lr 0.000032 elapsed 129.0s


ep 10 it 500/1800 loss 2.3733 lr 0.000031 elapsed 160.9s


ep 10 it 600/1800 loss 2.3407 lr 0.000029 elapsed 192.7s


ep 10 it 700/1800 loss 2.3667 lr 0.000027 elapsed 224.6s


ep 10 it 800/1800 loss 2.3697 lr 0.000025 elapsed 256.6s


ep 10 it 900/1800 loss 2.3585 lr 0.000024 elapsed 288.6s


ep 10 it 1000/1800 loss 2.3401 lr 0.000022 elapsed 320.6s


ep 10 it 1100/1800 loss 2.3431 lr 0.000020 elapsed 352.6s


ep 10 it 1200/1800 loss 2.3541 lr 0.000019 elapsed 384.6s


ep 10 it 1300/1800 loss 2.3520 lr 0.000017 elapsed 416.6s


ep 10 it 1400/1800 loss 2.3455 lr 0.000016 elapsed 448.5s


ep 10 it 1500/1800 loss 2.3440 lr 0.000015 elapsed 480.5s


ep 10 it 1600/1800 loss 2.3452 lr 0.000013 elapsed 512.3s


ep 10 it 1700/1800 loss 2.3422 lr 0.000012 elapsed 544.2s


ep 10 it 1800/1800 loss 2.3439 lr 0.000011 elapsed 576.0s


Epoch 10: train_loss=2.3439 val_acc=0.8235 epoch_time=579.8s total_elapsed=6371.9s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.8235)


ep 11 it 100/1800 loss 2.1789 lr 0.000010 elapsed 33.0s


ep 11 it 200/1800 loss 2.2233 lr 0.000009 elapsed 64.9s


ep 11 it 300/1800 loss 2.2568 lr 0.000008 elapsed 96.8s


ep 11 it 400/1800 loss 2.2592 lr 0.000007 elapsed 128.8s


ep 11 it 500/1800 loss 2.2631 lr 0.000006 elapsed 160.8s


ep 11 it 600/1800 loss 2.2876 lr 0.000006 elapsed 192.8s


ep 11 it 700/1800 loss 2.2673 lr 0.000005 elapsed 224.7s


ep 11 it 800/1800 loss 2.2655 lr 0.000004 elapsed 256.6s


ep 11 it 900/1800 loss 2.2785 lr 0.000004 elapsed 288.6s


ep 11 it 1000/1800 loss 2.2792 lr 0.000003 elapsed 320.6s


ep 11 it 1100/1800 loss 2.2783 lr 0.000003 elapsed 352.6s


ep 11 it 1200/1800 loss 2.2959 lr 0.000002 elapsed 384.6s


ep 11 it 1300/1800 loss 2.3008 lr 0.000002 elapsed 416.7s


ep 11 it 1400/1800 loss 2.2989 lr 0.000002 elapsed 448.7s


ep 11 it 1500/1800 loss 2.2892 lr 0.000001 elapsed 480.6s


ep 11 it 1600/1800 loss 2.2913 lr 0.000001 elapsed 512.4s


ep 11 it 1700/1800 loss 2.2885 lr 0.000001 elapsed 544.4s


ep 11 it 1800/1800 loss 2.2858 lr 0.000001 elapsed 576.1s


Epoch 11: train_loss=2.2858 val_acc=0.8272 epoch_time=579.9s total_elapsed=6952.1s


Saved new best to artifacts/ckpt_convnext_tiny_stageA_fast.pth (acc=0.8272)


Training done. Best val_acc=0.8272


  ckpt = torch.load(ckpt_path, map_location='cpu')


infer it 50/126 elapsed 23.1s


infer it 100/126 elapsed 43.3s


Saved submission.csv with 32214 rows


52

In [10]:
# Validate and fix submission.csv header/types for Kaggle format
import pandas as pd
import numpy as np
from pathlib import Path

sub_fp = Path('submission.csv')
assert sub_fp.exists(), 'submission.csv not found'
sub = pd.read_csv(sub_fp)
print('Before:', sub.shape, sub.columns.tolist(), sub.dtypes.to_dict())

# Kaggle iNat2019 expects columns: id, category_id
if 'image_id' in sub.columns and 'id' not in sub.columns:
    sub = sub.rename(columns={'image_id': 'id'})

# Ensure correct columns and dtypes
assert set(sub.columns) == {'id','category_id'}, f'Unexpected columns: {sub.columns.tolist()}'
sub['id'] = sub['id'].astype(np.int64)
sub['category_id'] = sub['category_id'].astype(np.int64)
sub = sub.sort_values('id').reset_index(drop=True)

sub.to_csv(sub_fp, index=False)
print('After:', sub.shape, sub.columns.tolist(), sub.dtypes.to_dict())
print(sub.head().to_string(index=False))
print(sub.tail().to_string(index=False))

Before: (32214, 2) ['image_id', 'category_id'] {'image_id': dtype('int64'), 'category_id': dtype('int64')}
After: (32214, 2) ['id', 'category_id'] {'id': dtype('int64'), 'category_id': dtype('int64')}
 id  category_id
 20          167
 86          159
101          194
108          471
112          410
    id  category_id
265186          353
265188          902
265193          350
265194          712
265197          328


In [12]:
# Force submission header to ['image_id','category_id'] (some competitions expect this exact header)
import pandas as pd
from pathlib import Path

sub_fp = Path('submission.csv')
assert sub_fp.exists(), 'submission.csv not found'
sub = pd.read_csv(sub_fp)
print('Current columns:', sub.columns.tolist(), 'shape:', sub.shape, flush=True)
if 'id' in sub.columns and 'image_id' not in sub.columns:
    sub = sub.rename(columns={'id': 'image_id'})
assert set(sub.columns) == {'image_id','category_id'}, f'Unexpected columns: {sub.columns.tolist()}'
sub = sub[['image_id','category_id']].copy()
sub['image_id'] = sub['image_id'].astype('int64')
sub['category_id'] = sub['category_id'].astype('int64')
sub = sub.sort_values('image_id').reset_index(drop=True)
sub.to_csv(sub_fp, index=False)
print('Fixed columns:', sub.columns.tolist(), 'shape:', sub.shape, flush=True)
print(sub.head().to_string(index=False))
print(sub.tail().to_string(index=False))

Current columns: ['id', 'category_id'] shape: (32214, 2)


Fixed columns: ['image_id', 'category_id'] shape: (32214, 2)


 image_id  category_id
       20          167
       86          159
      101          194
      108          471
      112          410
 image_id  category_id
   265186          353
   265188          902
   265193          350
   265194          712
   265197          328


In [11]:
# Robust submission validator: ensure rows/ids match test2019.json and header ['id','category_id']
import json, pandas as pd, numpy as np
from pathlib import Path

sub_fp = Path('submission.csv')
assert sub_fp.exists(), 'submission.csv not found'
sub = pd.read_csv(sub_fp)
if 'image_id' in sub.columns and 'id' not in sub.columns:
    sub = sub.rename(columns={'image_id':'id'})
assert 'id' in sub.columns and 'category_id' in sub.columns, f'Unexpected cols: {sub.columns.tolist()}'

# Load test ids from JSON
with open('test2019.json','r') as f:
    test_js = json.load(f)
test_ids = pd.DataFrame({'id': [int(im['id']) for im in test_js['images']]})
test_ids = test_ids.sort_values('id').reset_index(drop=True)

# Check id coverage
sub_ids = set(sub['id'].astype(int).tolist())
test_id_set = set(test_ids['id'].tolist())
missing = test_id_set - sub_ids
extra = sub_ids - test_id_set
print(f'missing_in_sub={len(missing)} extra_in_sub={len(extra)} sub_n={len(sub)} test_n={len(test_ids)}', flush=True)

# Restrict to test ids, drop extras, align order exactly to test ids
sub = sub[['id','category_id']].copy()
sub['id'] = sub['id'].astype(np.int64)
sub['category_id'] = sub['category_id'].astype(np.int64)
sub = test_ids.merge(sub, on='id', how='left')
assert len(sub) == len(test_ids), 'Row count mismatch after align'
assert sub['category_id'].notna().all(), 'Found NaNs in category_id after align; predictions missing for some ids'

sub = sub[['id','category_id']].astype({'id':'int64','category_id':'int64'})
sub.to_csv(sub_fp, index=False)
print('Final submission:', sub.shape, sub.columns.tolist(), sub.dtypes.to_dict(), flush=True)
print(sub.head().to_string(index=False))
print(sub.tail().to_string(index=False))

missing_in_sub=0 extra_in_sub=0 sub_n=32214 test_n=32214


Final submission: (32214, 2) ['id', 'category_id'] {'id': dtype('int64'), 'category_id': dtype('int64')}


 id  category_id
 20          167
 86          159
101          194
108          471
112          410
    id  category_id
265186          353
265188          902
265193          350
265194          712
265197          328


In [13]:
# Convert submission.csv header to ['id','predicted'] (MLE-Benchmark/Kaggle variant expectation)
import pandas as pd
from pathlib import Path

sub_fp = Path('submission.csv')
assert sub_fp.exists(), 'submission.csv not found'
sub = pd.read_csv(sub_fp)
cols = sub.columns.tolist()
if 'image_id' in cols and 'id' not in cols:
    sub = sub.rename(columns={'image_id': 'id'})
if 'category_id' in sub.columns and 'predicted' not in sub.columns:
    sub = sub.rename(columns={'category_id': 'predicted'})
assert set(sub.columns) == {'id','predicted'}, f'Unexpected columns: {sub.columns.tolist()}'
sub['id'] = sub['id'].astype('int64')
sub['predicted'] = sub['predicted'].astype('int64')
sub = sub.sort_values('id').reset_index(drop=True)
sub.to_csv(sub_fp, index=False)
print('submission.csv fixed to columns:', sub.columns.tolist(), 'shape:', sub.shape, flush=True)
print(sub.head().to_string(index=False))
print(sub.tail().to_string(index=False))

submission.csv fixed to columns: ['id', 'predicted'] shape: (32214, 2)


 id  predicted
 20        167
 86        159
101        194
108        471
112        410
    id  predicted
265186        353
265188        902
265193        350
265194        712
265197        328
