# Plan to Win a Medal: Plant Pathology 2021 - FGVC8

Objectives:
- Achieve micro-F1 medal threshold via strong CV, GPU-accelerated CNNs, and robust ensembling.

Strategy Overview:
1) Environment & GPU:
- Install and validate PyTorch + GPU. If GPU not available, stop and re-provision.

2) Data audit & CV definition:
- Inspect train.csv, labels format (multi-label vs multi-class), image counts, missing values.
- Determine proper CV (StratifiedKFold for multilabel via iterative stratification). Save folds to disk for reuse.

3) Baseline model (fast):
- Pretrained timm backbone (e.g., tf_efficientnet_b3_ns or convnext_tiny) @ 384px.
- Loss: BCEWithLogitsLoss for multilabel.
- Augmentations: light albumentations (flips, brightness/contrast).
- 5-fold CV, early stopping, mixed precision, gradient accumulation if needed.
- Log OOF predictions and per-class F1; compute micro-F1.

4) Iterations:
- Improve aug: color jitter, CutMix/Mixup for multilabel.
- Higher res (512), stronger backbones (swin_t, convnext_base) if time allows.
- TTA for test (e.g., 4-8 flips).
- Blend diverse seeds/backbones via logits average; calibrate threshold via OOF to maximize micro-F1.

5) Submission:
- Use OOF-optimized thresholds per class or global, apply to test logits.
- Save predictions to submission.csv matching sample_submission format.

Checkpoints for Expert Review:
- After this plan.
- After data audit + CV folds.
- After first baseline OOF.
- After each major model/ensemble improvement.

Risk Mitigation:
- Subsample smoke runs to validate pipeline.
- Ensure no leakage: fit transforms within folds only.
- Save artifacts (folds, OOF logits, test logits).

Next:
- Run environment check + quick dataset inspection.

In [1]:
# Environment check + quick dataset audit
import sys, subprocess, time, os, json, gc
from pathlib import Path
import pandas as pd

def pip_install(pkgs):
    print(f"Installing: {pkgs}", flush=True)
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q'] + pkgs, check=True)

# 1) Install torch and basic libs
pip_install(['torch'])
try:
    import torch
    print(f"GPU Available: {torch.cuda.is_available()}")
    print(f"GPU Count: {torch.cuda.device_count()}")
    if torch.cuda.is_available():
        print(f"GPU Name: {torch.cuda.get_device_name(0)}")
        props = torch.cuda.get_device_properties(0)
        print(f"GPU Memory: {props.total_memory / 1024**3:.1f} GB")
except Exception as e:
    print('Torch import/check failed:', e)

# 2) Inspect files
base = Path('.')
print('CWD:', base.resolve())
print('Files:', os.listdir())

train_csv = Path('train.csv')
ss_csv = Path('sample_submission.csv')
assert train_csv.exists() and ss_csv.exists(), 'Missing train.csv or sample_submission.csv'

df = pd.read_csv(train_csv)
print('train.csv shape:', df.shape)
print(df.head(3))

# Expected columns: image, labels (space-separated) or one-hot; infer
cols = df.columns.tolist()
print('Columns:', cols)
label_cols = [c for c in cols if c not in ('image','id','image_id','filepath','path')]
if 'labels' in df.columns:
    # space-separated multilabel
    all_labels = sorted({l for s in df['labels'].astype(str).tolist() for l in s.split()})
    print('Detected multilabel with label set:', all_labels)
    # one-hot frame
    for l in all_labels:
        df[l] = df['labels'].astype(str).str.contains(fr'\b{l}\b').astype(int)
    label_cols = all_labels
elif set({'image','id','image_id'}).intersection(df.columns) and len(label_cols) > 0:
    # likely one-hot multilabel
    print('Detected one-hot multilabel columns:', label_cols)
else:
    print('Unable to confidently detect label schema; will inspect more later.')

# Basic sanity checks
if 'healthy' in df.columns:
    conflict = ((df[[c for c in df.columns if c != 'labels' and c != 'healthy' and c in label_cols]].sum(axis=1) > 0) & (df['healthy'] == 1)).sum()
    print('Rows with healthy + disease (should be few/none):', int(conflict))

# Image dirs
train_dir = Path('train_images')
test_dir = Path('test_images')
print('Train images dir exists:', train_dir.exists(), 'count:', len(list(train_dir.glob('*.jpg'))))
print('Test images dir exists:', test_dir.exists(), 'count:', len(list(test_dir.glob('*.jpg'))))

# Sample submission check
ss = pd.read_csv(ss_csv)
print('sample_submission shape:', ss.shape)
print(ss.head(3))

print('Environment & audit complete.')

Installing: ['torch']


GPU Available: True
GPU Count: 1
GPU Name: NVIDIA A10-24Q
GPU Memory: 23.7 GB
CWD: /var/lib/simon/agent_run_states/plant-pathology-2021-fgvc8-20250921-221229
Files: ['sample_submission.csv', '.00_eda_and_planning_kernel_state.json', 'docker_run.log', 'requirements.txt', 'test_images', 'submission.csv', 'train_images', 'train.csv', 'agent_metadata', 'task.txt', '00_eda_and_planning.ipynb', 'description.md']
train.csv shape: (14905, 2)
                  image                      labels
0  95cb4b75ad1d842a.jpg                     complex
1  be80d450dac87d7c.jpg  frog_eye_leaf_spot complex
2  80cac5abcfa9c9b4.jpg                     healthy
Columns: ['image', 'labels']
Detected multilabel with label set: ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']
Rows with healthy + disease (should be few/none): 0
Train images dir exists: True count: 14905
Test images dir exists: True count: 3727
sample_submission shape: (3727, 2)
                  image   labels
0  ca6

In [3]:
# Create multilabel folds (Iterative Stratification) and save
import sys, subprocess, os
from pathlib import Path
import pandas as pd
import numpy as np

def pip_install(pkgs):
    print(f"Installing: {pkgs}", flush=True)
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q'] + pkgs, check=True)

# Install needed libs for CV and modeling later
pip_install(['iterative-stratification', 'timm', 'albumentations', 'opencv-python-headless'])

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

train_csv = Path('train.csv')
df = pd.read_csv(train_csv)

# Parse multilabels from space-separated 'labels' column
assert 'labels' in df.columns and 'image' in df.columns, 'Expected columns image and labels'
all_labels = sorted({l for s in df['labels'].astype(str).tolist() for l in s.split()})
for l in all_labels:
    df[l] = df['labels'].astype(str).str.contains(fr'\b{l}\b').astype(int)

label_cols = all_labels
X = df[['image']].copy()
Y = df[label_cols].values

print('Label set:', label_cols)
print('Class frequencies:', df[label_cols].sum().to_dict())

# Build 5-fold multilabel iterative stratification
mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
df['fold'] = -1
for fold, (trn_idx, val_idx) in enumerate(mskf.split(X, Y)):
    df.loc[val_idx, 'fold'] = fold
    print(f'Assigned fold {fold}: val size {len(val_idx)}')

assert (df['fold'] >= 0).all(), 'Some rows not assigned a fold'

# Inspect per-fold label distribution
for fold in range(5):
    cnts = df[df['fold'] == fold][label_cols].sum().to_dict()
    print(f'Fold {fold} counts:', cnts)

# Save folds to disk
folds_path = Path('folds.csv')
df[['image', 'labels', 'fold'] + label_cols].to_csv(folds_path, index=False)
print('Saved folds to', folds_path.resolve())

Installing: ['iterative-stratification', 'timm', 'albumentations', 'opencv-python-headless']








Label set: ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']
Class frequencies: {'complex': 1711, 'frog_eye_leaf_spot': 3511, 'healthy': 3673, 'powdery_mildew': 1008, 'rust': 1650, 'scab': 4600}
Assigned fold 0: val size 2977


Assigned fold 1: val size 2992
Assigned fold 2: val size 2975
Assigned fold 3: val size 2993
Assigned fold 4: val size 2968
Fold 0 counts: {'complex': 342, 'frog_eye_leaf_spot': 702, 'healthy': 735, 'powdery_mildew': 202, 'rust': 330, 'scab': 920}
Fold 1 counts: {'complex': 342, 'frog_eye_leaf_spot': 702, 'healthy': 734, 'powdery_mildew': 202, 'rust': 330, 'scab': 920}
Fold 2 counts: {'complex': 343, 'frog_eye_leaf_spot': 703, 'healthy': 735, 'powdery_mildew': 201, 'rust': 330, 'scab': 920}
Fold 3 counts: {'complex': 342, 'frog_eye_leaf_spot': 702, 'healthy': 734, 'powdery_mildew': 202, 'rust': 330, 'scab': 920}
Fold 4 counts: {'complex': 342, 'frog_eye_leaf_spot': 702, 'healthy': 735, 'powdery_mildew': 201, 'rust': 330, 'scab': 920}
Saved folds to /var/lib/simon/agent_run_states/plant-pathology-2021-fgvc8-20250921-221229/folds.csv


In [4]:
# Near-duplicate detection (phash) and fold assignment by groups
import sys, subprocess, os, math, time
from pathlib import Path
import pandas as pd
import numpy as np

def pip_install(pkgs):
    print(f"Installing: {pkgs}", flush=True)
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q'] + pkgs, check=True)

pip_install(['imagehash', 'Pillow'])
from PIL import Image
import imagehash
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

train_df = pd.read_csv('train.csv')
all_labels = sorted({l for s in train_df['labels'].astype(str).tolist() for l in s.split()})
for l in all_labels:
    train_df[l] = train_df['labels'].astype(str).str.contains(fr'\b{l}\b').astype(int)
label_cols = all_labels

img_dir = Path('train_images')
hash_cache_path = Path('phash_train.csv')

def compute_phashes(df):
    rows = []
    t0 = time.time()
    for i, img_name in enumerate(df['image'].tolist()):
        p = img_dir / img_name
        try:
            with Image.open(p) as im:
                im = im.convert('RGB')
                h = imagehash.phash(im)  # 64-bit hash
        except Exception as e:
            print(f'Error hashing {img_name}: {e}', flush=True)
            h = None
        rows.append({'image': img_name, 'phash': str(h) if h is not None else ''})
        if (i+1) % 1000 == 0:
            elapsed = time.time() - t0
            print(f'Hashed {i+1}/{len(df)} images in {elapsed:.1f}s', flush=True)
    return pd.DataFrame(rows)

if hash_cache_path.exists():
    ph = pd.read_csv(hash_cache_path)
    print('Loaded cached phashes:', ph.shape)
else:
    ph = compute_phashes(train_df)
    ph.to_csv(hash_cache_path, index=False)
    print('Saved phashes to', hash_cache_path.resolve())

# Union-Find for grouping near duplicates
parent = {}
rank = {}
imgs = train_df['image'].tolist()
for img in imgs:
    parent[img] = img
    rank[img] = 0

def find(x):
    while parent[x] != x:
        parent[x] = parent[parent[x]]
        x = parent[x]
    return x

def union(a, b):
    ra, rb = find(a), find(b)
    if ra == rb: return
    if rank[ra] < rank[rb]:
        parent[ra] = rb
    elif rank[ra] > rank[rb]:
        parent[rb] = ra
    else:
        parent[rb] = ra
        rank[ra] += 1

# Build LSH buckets using overlapping slices of the hex hash to limit comparisons
ph_map = dict(zip(ph['image'], ph['phash']))
buckets = {}  # key -> list of image names
slice_len = 6  # hex chars per slice
overlaps = [(0, slice_len), (4, 4+slice_len), (8, 8+slice_len)]  # overlapping windows

def add_to_bucket(key, img):
    if key not in buckets: buckets[key] = []
    buckets[key].append(img)

valid = 0
for img, h in ph_map.items():
    if not h or len(h) < 16:
        continue
    valid += 1
    for s, e in overlaps:
        if e <= len(h):
            add_to_bucket(f'{s}:{h[s:e]}', img)
print('Valid hashed images:', valid, 'Buckets:', len(buckets))

def hamming_hex(a, b):
    # a,b hex strings of equal length
    if len(a) != len(b):
        return 64  # large
    # Convert hex to int and count bits
    return bin(int(a, 16) ^ int(b, 16)).count('1')

threshold = 5  # max Hamming distance to consider duplicates
checked_pairs = 0
linked = 0
t0 = time.time()
for key, lst in buckets.items():
    n = len(lst)
    if n <= 1: continue
    # pairwise within bucket
    for i in range(n):
        hi = ph_map.get(lst[i], '')
        if not hi: continue
        for j in range(i+1, n):
            hj = ph_map.get(lst[j], '')
            if not hj: continue
            checked_pairs += 1
            if hamming_hex(hi, hj) <= threshold:
                union(lst[i], lst[j])
                linked += 1
    if checked_pairs and checked_pairs % 100000 == 0:
        print(f'Checked {checked_pairs} pairs, linked {linked}, elapsed {time.time()-t0:.1f}s', flush=True)
print(f'Pair checks: {checked_pairs}, links: {linked}, time: {time.time()-t0:.1f}s')

# Build groups
groups = {}  # root -> list of images
for img in imgs:
    r = find(img)
    groups.setdefault(r, []).append(img)
group_sizes = [len(v) for v in groups.values()]
print('Groups:', len(groups), 'Avg size:', np.mean(group_sizes), 'Max:', np.max(group_sizes))

# Representative per group
rep_list = []
rep_index = {}  # image -> rep
for root, members in groups.items():
    # pick lexicographically smallest as representative
    rep = sorted(members)[0]
    for m in members:
        rep_index[m] = rep
    rep_list.append(rep)
rep_df = train_df.set_index('image').loc[rep_list].reset_index()

# Multilabel stratification on representatives
X_rep = rep_df[['image']].copy()
Y_rep = rep_df[label_cols].values
mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
rep_df['fold'] = -1
for fold, (_, val_idx) in enumerate(mskf.split(X_rep, Y_rep)):
    rep_df.loc[val_idx, 'fold'] = fold
assert (rep_df['fold'] >= 0).all()

# Broadcast folds to all members
rep_fold_map = dict(zip(rep_df['image'], rep_df['fold']))
train_df['fold_grouped'] = train_df['image'].map(lambda x: rep_fold_map[rep_index[x]])

# Inspect distribution
for fold in range(5):
    cnts = train_df[train_df['fold_grouped'] == fold][label_cols].sum().to_dict()
    print(f'Grouped Fold {fold} counts:', cnts)

# Save grouped folds
out_path = Path('folds_grouped.csv')
cols_out = ['image', 'labels', 'fold_grouped'] + label_cols
train_df[cols_out].to_csv(out_path, index=False)
print('Saved grouped folds to', out_path.resolve())

Installing: ['imagehash', 'Pillow']




Hashed 1000/14905 images in 62.4s


Hashed 2000/14905 images in 123.8s


Hashed 3000/14905 images in 184.8s


Hashed 4000/14905 images in 245.7s


Hashed 5000/14905 images in 307.1s


Hashed 6000/14905 images in 368.2s


Hashed 7000/14905 images in 429.7s


Hashed 8000/14905 images in 491.1s


Hashed 9000/14905 images in 552.4s


Hashed 10000/14905 images in 613.5s


Hashed 11000/14905 images in 675.0s


Hashed 12000/14905 images in 735.8s


Hashed 13000/14905 images in 797.0s


Hashed 14000/14905 images in 858.1s


Saved phashes to /var/lib/simon/agent_run_states/plant-pathology-2021-fgvc8-20250921-221229/phash_train.csv
Valid hashed images: 14905 Buckets: 44576
Pair checks: 139, links: 63, time: 0.0s
Groups: 14879 Avg size: 1.0017474292627193 Max: 2


Grouped Fold 0 counts: {'complex': 343, 'frog_eye_leaf_spot': 702, 'healthy': 735, 'powdery_mildew': 201, 'rust': 333, 'scab': 920}
Grouped Fold 1 counts: {'complex': 342, 'frog_eye_leaf_spot': 703, 'healthy': 735, 'powdery_mildew': 201, 'rust': 330, 'scab': 920}
Grouped Fold 2 counts: {'complex': 341, 'frog_eye_leaf_spot': 702, 'healthy': 734, 'powdery_mildew': 202, 'rust': 329, 'scab': 920}
Grouped Fold 3 counts: {'complex': 342, 'frog_eye_leaf_spot': 702, 'healthy': 734, 'powdery_mildew': 202, 'rust': 327, 'scab': 920}
Grouped Fold 4 counts: {'complex': 343, 'frog_eye_leaf_spot': 702, 'healthy': 735, 'powdery_mildew': 202, 'rust': 331, 'scab': 920}
Saved grouped folds to /var/lib/simon/agent_run_states/plant-pathology-2021-fgvc8-20250921-221229/folds_grouped.csv


In [None]:
# Smoke baseline training: convnext_tiny @384, 1-fold, 3 epochs, AMP
import os, time, math, json, random, gc
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
from typing import List, Tuple

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import albumentations as A
from albumentations.pytorch import ToTensorV2
import timm
from sklearn.metrics import f1_score

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Config
IMG_SIZE = 384
BATCH_SIZE = 32
EPOCHS = 3
FOLD = 0
MODEL_NAME = 'convnext_tiny'  # timm
LR = 2e-4
WD = 1e-4
NUM_WORKERS = 4
OUTPUT_DIR = Path('outputs_smoke'); OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

# Data & labels
train_csv = pd.read_csv('train.csv')
all_labels = sorted({l for s in train_csv['labels'].astype(str).tolist() for l in s.split()})
label_to_idx = {l:i for i,l in enumerate(all_labels)}
num_classes = len(all_labels)
print('Labels:', all_labels)

# Use grouped folds if available
folds_path = Path('folds_grouped.csv') if Path('folds_grouped.csv').exists() else Path('folds.csv')
folds = pd.read_csv(folds_path)
fold_col = 'fold_grouped' if 'fold_grouped' in folds.columns else 'fold'
print('Using folds from', folds_path, 'with fold column', fold_col)

# Build one-hot targets
def labels_to_onehot(s: str) -> np.ndarray:
    vec = np.zeros(num_classes, dtype=np.float32)
    if isinstance(s, str):
        for t in s.split():
            if t in label_to_idx: vec[label_to_idx[t]] = 1.0
    return vec

folds['target_vec'] = folds['labels'].astype(str).apply(labels_to_onehot)

train_df = folds[folds[fold_col] != FOLD].reset_index(drop=True)
valid_df = folds[folds[fold_col] == FOLD].reset_index(drop=True)
print('Train size:', len(train_df), 'Valid size:', len(valid_df))

IMG_DIR = Path('train_images')
TEST_DIR = Path('test_images')

class PlantDataset(Dataset):
    def __init__(self, df: pd.DataFrame, img_dir: Path, aug: A.Compose):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.aug = aug
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['image']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            im = np.array(im)
        if self.aug is not None:
            img = self.aug(image=im)['image']
        else:
            img = T.ToTensor()(Image.fromarray(im))
        target = np.array(row['target_vec'], dtype=np.float32) if 'target_vec' in row else None
        return img, torch.from_numpy(target) if target is not None else torch.zeros(num_classes, dtype=torch.float32)

class TestDataset(Dataset):
    def __init__(self, img_names: List[str], img_dir: Path, aug: A.Compose):
        self.names = img_names
        self.img_dir = img_dir
        self.aug = aug
    def __len__(self): return len(self.names)
    def __getitem__(self, idx):
        name = self.names[idx]
        img_path = self.img_dir / name
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            im = np.array(im)
        img = self.aug(image=im)['image'] if self.aug is not None else T.ToTensor()(Image.fromarray(im))
        return img, name

# Albumentations transforms (aligned with current API requiring size tuple for RRC)
train_tfms = A.Compose([
    A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.85, 1.0), ratio=(0.9, 1.1)),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=20, p=0.7),
    A.RandomBrightnessContrast(0.2, 0.2, p=0.5),
    A.HueSaturationValue(10, 15, 10, p=0.5),
    A.CoarseDropout(max_holes=6, max_height=IMG_SIZE//16, max_width=IMG_SIZE//16, p=0.3),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2(),
])
val_tfms = A.Compose([
    A.Resize(height=IMG_SIZE, width=IMG_SIZE),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2(),
])

train_ds = PlantDataset(train_df, IMG_DIR, train_tfms)
valid_ds = PlantDataset(valid_df, IMG_DIR, val_tfms)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

# Model
model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
num_train_steps = EPOCHS * math.ceil(len(train_loader))
warmup_steps = max(1, len(train_loader))
def lr_lambda(step):
    if step < warmup_steps:
        return float(step + 1) / float(warmup_steps)
    # cosine decay
    progress = (step - warmup_steps) / max(1, (num_train_steps - warmup_steps))
    return 0.5 * (1 + math.cos(math.pi * progress))
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
criterion = nn.BCEWithLogitsLoss()
scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))

def sigmoid_np(x):
    return 1.0 / (1.0 + np.exp(-x))

def eval_micro_f1_from_logits(logits: np.ndarray, targets: np.ndarray, thr: float=0.5) -> float:
    probs = sigmoid_np(logits)
    preds = (probs >= thr).astype(int)
    return f1_score(targets.reshape(-1), preds.reshape(-1), average='micro')

best_f1 = -1.0
oof_logits = np.zeros((len(valid_df), num_classes), dtype=np.float32)
val_targets = np.stack(valid_df['target_vec'].values, axis=0).astype(np.float32)

global_step = 0
t_start = time.time()
for epoch in range(EPOCHS):
    model.train()
    ep_loss = 0.0
    t0 = time.time()
    for it, (imgs, targets) in enumerate(train_loader):
        imgs = imgs.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
            logits = model(imgs)
            loss = criterion(logits, targets)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        ep_loss += loss.item()
        global_step += 1
        if (it+1) % 50 == 0:
            elapsed = time.time() - t0
            print(f'Epoch {epoch+1}/{EPOCHS} it {it+1}/{len(train_loader)} loss {ep_loss/(it+1):.4f} lr {scheduler.get_last_lr()[0]:.6f} elapsed {elapsed:.1f}s', flush=True)
    # Validate
    model.eval()
    val_logits_epoch = []
    with torch.no_grad():
        for imgs, targets in valid_loader:
            imgs = imgs.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
                logits = model(imgs)
            val_logits_epoch.append(logits.float().cpu().numpy())
    val_logits_epoch = np.concatenate(val_logits_epoch, axis=0)
    # Save as current oof
    oof_logits = val_logits_epoch
    micro_f1_050 = eval_micro_f1_from_logits(oof_logits, val_targets, thr=0.5)
    print(f'Epoch {epoch+1}: val micro-F1@0.50 = {micro_f1_050:.4f} | epoch time {(time.time()-t0):.1f}s | total {(time.time()-t_start)/60:.1f}m')
    if micro_f1_050 > best_f1:
        best_f1 = micro_f1_050
        torch.save({'model': model.state_dict(), 'epoch': epoch, 'f1': best_f1}, OUTPUT_DIR / f'best_fold{FOLD}.pth')

# Threshold optimization (global then per-class)
def optimize_thresholds(logits: np.ndarray, targets: np.ndarray) -> Tuple[np.ndarray, float, float]:
    probs = sigmoid_np(logits)
    # global search
    global_thrs = np.arange(0.1, 0.6, 0.01)
    best_global, best_score = 0.5, -1.0
    for th in global_thrs:
        s = f1_score(targets.reshape(-1), (probs >= th).astype(int).reshape(-1), average='micro')
        if s > best_score: best_score, best_global = s, th
    # per-class refine
    thrs = np.full(num_classes, best_global, dtype=np.float32)
    for c in range(num_classes):
        best_c, best_s = thrs[c], -1.0
        for th in np.arange(max(0.05, best_global-0.2), min(0.7, best_global+0.2), 0.01):
            preds = (probs >= thrs)  # start from current
            preds[:, c] = (probs[:, c] >= th)
            s = f1_score(targets.reshape(-1), preds.astype(int).reshape(-1), average='micro')
            if s > best_s: best_s, best_c = s, th
        thrs[c] = best_c
    final_preds = (probs >= thrs).astype(int)
    final_score = f1_score(targets.reshape(-1), final_preds.reshape(-1), average='micro')
    return thrs, best_global, final_score

thrs, best_global, oof_score = optimize_thresholds(oof_logits, val_targets)
print('OOF micro-F1 (per-class optimized):', f'{oof_score:.4f}', 'global@', f'{best_global:.2f}')
np.save(OUTPUT_DIR / f'oof_logits_fold{FOLD}.npy', oof_logits)
np.save(OUTPUT_DIR / f'val_targets_fold{FOLD}.npy', val_targets)
np.save(OUTPUT_DIR / f'thresholds_fold{FOLD}.npy', thrs)

# Train on train folds already done; proceed to test inference with best checkpoint
ckpt = torch.load(OUTPUT_DIR / f'best_fold{FOLD}.pth', map_location='cpu')
model.load_state_dict(ckpt['model'])
model.to(device); model.eval()

# Prepare test loader
test_names = sorted([p.name for p in TEST_DIR.glob('*.jpg')])
test_ds = TestDataset(test_names, TEST_DIR, val_tfms)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_logits = []
t0 = time.time()
with torch.no_grad():
    for it, (imgs, names) in enumerate(test_loader):
        imgs = imgs.to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
            logits = model(imgs)
        test_logits.append(logits.float().cpu().numpy())
        if (it+1) % 50 == 0:
            print(f'Test infer {it+1}/{len(test_loader)} elapsed {time.time()-t0:.1f}s', flush=True)
test_logits = np.concatenate(test_logits, axis=0)
np.save(OUTPUT_DIR / f'test_logits_fold{FOLD}.npy', test_logits)

# Post-processing for submission: per-class thresholds, healthy exclusivity, at-least-one
probs_test = sigmoid_np(test_logits)
preds_bin = (probs_test >= thrs).astype(int)

# healthy exclusivity
if 'healthy' in all_labels:
    h_idx = all_labels.index('healthy')
    disease_idx = [i for i,l in enumerate(all_labels) if l != 'healthy']
    for i in range(preds_bin.shape[0]):
        if preds_bin[i, disease_idx].max() == 1:
            preds_bin[i, h_idx] = 0
        # at-least-one
        if preds_bin[i].sum() == 0:
            top1 = probs_test[i].argmax()
            preds_bin[i, top1] = 1
else:
    # at-least-one
    for i in range(preds_bin.shape[0]):
        if preds_bin[i].sum() == 0:
            top1 = probs_test[i].argmax()
            preds_bin[i, top1] = 1

pred_labels = []
for i in range(preds_bin.shape[0]):
    labs = [all_labels[j] for j in range(num_classes) if preds_bin[i, j] == 1]
    pred_labels.append(' '.join(labs) if len(labs)>0 else all_labels[int(probs_test[i].argmax())])

sub = pd.DataFrame({'image': test_names, 'labels': pred_labels})
sub_path = Path('submission.csv')
sub.to_csv(sub_path, index=False)
print('Saved submission to', sub_path.resolve())
print('Smoke baseline complete. Elapsed total: {:.1f}m'.format((time.time()-t_start)/60.0))

gc.collect(); torch.cuda.empty_cache()

Using device: cuda
Labels: ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']
Using folds from folds_grouped.csv with fold column fold_grouped
Train size: 11918 Valid size: 2987


  original_init(self, **validated_kwargs)
  A.CoarseDropout(max_holes=6, max_height=IMG_SIZE//16, max_width=IMG_SIZE//16, p=0.3),


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
