# Plant Pathology 2020 - FGVC7 | Plan and Experiment Log

## Plan
- Goal: Win a medal via strong mean-column-wise-roc-auc on multi-label leaf disease classification.
- Steps:
  1) Load train/test CSVs, inspect columns and label structure (confirm if multi-label: healthy, multiple_diseases, rust, scab).
  2) Verify image paths and counts, basic EDA: label prevalence, leakage checks.
  3) Baseline model: timm CNN (e.g., tf_efficientnet_b3/b4) with cross-validation, BCEWithLogitsLoss, AUC metrics.
  4) Strong aug: flips, rotations, color jitter, slight blur; image size 512 to start.
  5) 5-fold StratifiedKFold (multilabel stratification via iterative stratification if needed).
  6) Train with early stopping; log fold metrics, times; produce out-of-fold AUC and test predictions.
  7) Iterate: try larger img size (576/640), CutMix/Mixup, label-smoothing, TTA, model ensembling (b3+b4+nfnet), and balanced sampling.
  8) Generate submission.csv; target medal thresholds.

## Experiment Log
- [T0] Init notebook, inspect data and labels.
- [T1] Baseline EDA and CV plan.
- [T2] Implement dataset/dataloader and baseline model with timm; train 5-fold @size=512.
- [T3] Evaluate OOF AUC; refine aug/hparams.
- [T4] TTA and ensemble; finalize submission.

## Environment/Runtime Notes
- HW: T4 16GB, plenty of RAM. Use mixed precision and num_workers.
- Always print progress and timing per fold/epoch.

In [None]:
# T0: Load data, inspect schema, check files, basic EDA
import os, sys, time, json, random, math, gc
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

DATA_DIR = Path('.')
IMG_DIR = DATA_DIR / 'images'
TRAIN_CSV = DATA_DIR / 'train.csv'
TEST_CSV = DATA_DIR / 'test.csv'

print('CWD:', os.getcwd())
print('Files present:', os.listdir())
print('Images dir exists:', IMG_DIR.exists())

train = pd.read_csv(TRAIN_CSV)
test = pd.read_csv(TEST_CSV)
print('\nTrain shape:', train.shape)
print('Test shape:', test.shape)
print('\nTrain head:')
print(train.head())
print('\nColumns:', list(train.columns))

# Identify label columns (expect 4 classes: healthy, multiple_diseases, rust, scab)
label_cols = [c for c in train.columns if c not in ['image_id']]
print('\nLabel columns detected:', label_cols)

# Check basic stats and class prevalence
print('\nLabel prevalence:')
print(train[label_cols].mean().sort_values(ascending=False))

# Verify a few image files exist
missing = []
for img_id in train['image_id'].head(10).tolist():
    p = IMG_DIR / f"{img_id}.jpg"
    if not p.exists():
        missing.append(str(p))
print(f"\nSample path checks missing={len(missing)}")
if missing:
    print('Missing examples (first 5):', missing[:5])

# Show a small grid of sample images with labels
def show_samples(df, n=9):
    ids = df.sample(n=min(n, len(df)), random_state=42)['image_id'].tolist()
    cols = int(math.sqrt(len(ids))) or 1
    rows = math.ceil(len(ids)/cols)
    plt.figure(figsize=(3*cols, 3*rows))
    for i, img_id in enumerate(ids, 1):
        img_path = IMG_DIR / f"{img_id}.jpg"
        try:
            img = Image.open(img_path).convert('RGB')
        except Exception as e:
            print('Error opening', img_path, e)
            continue
        ax = plt.subplot(rows, cols, i)
        ax.imshow(img)
        lbl = train.loc[train.image_id==img_id, label_cols].iloc[0].to_dict()
        ax.set_title('\n'.join([f"{k}:{v}" for k,v in lbl.items()]))
        ax.axis('off')
    plt.tight_layout()
    plt.show()

show_samples(train, n=9)

# Save basic info for downstream steps
meta = {
    'n_train': int(len(train)),
    'n_test': int(len(test)),
    'label_cols': label_cols,
    'img_dir': str(IMG_DIR.resolve()),
}
with open('data_meta.json', 'w') as f:
    json.dump(meta, f, indent=2)
print('\nSaved data_meta.json:', meta)

In [None]:
# T1: Install required packages (PyTorch CUDA 12.1, timm, albumentations, iterstrat)
import sys, subprocess, time
def pip_install(pkgs):
    print('Installing:', pkgs); sys.stdout.flush()
    start=time.time()
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q'] + pkgs)
    print(f'Done in {time.time()-start:.1f}s'); sys.stdout.flush()

# Check torch first
try:
    import torch
    print('Torch version:', torch.__version__)
except Exception as e:
    print('Torch not installed or import error:', e)

pkgs = [
    'torch==2.4.0+cu121',
    'torchvision==0.19.0+cu121',
    'torchaudio==2.4.0+cu121',
    '--extra-index-url', 'https://download.pytorch.org/whl/cu121'
]
pip_install(pkgs)

pip_install(['timm>=1.0.7', 'albumentations>=1.4.8', 'iterative-stratification', 'scikit-learn>=1.4.0'])
import torch, torchvision, timm, albumentations as A
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import sklearn
print('Installed versions -> torch:', torch.__version__, '| torchvision:', torchvision.__version__, '| timm:', timm.__version__, '| albumentations:', A.__version__, '| sklearn:', sklearn.__version__)

In [None]:
# T2: Create 5-fold Multilabel Stratified CV splits and save
import pandas as pd
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import time

train = pd.read_csv('train.csv')
label_cols = ['healthy','multiple_diseases','rust','scab']

mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
folds = np.full(len(train), -1, dtype=int)
X = train[['image_id']].values
Y = train[label_cols].values

t0=time.time()
for i, (trn_idx, val_idx) in enumerate(mskf.split(X, Y)):
    folds[val_idx] = i
    print(f'Assigned fold {i}: val size={len(val_idx)}; elapsed {time.time()-t0:.1f}s', flush=True)

train['fold'] = folds
assert (train['fold']>=0).all(), 'Unassigned fold entries exist!'

# Show distribution per fold
summary = []
for f in range(5):
    df = train[train.fold==f]
    cnt = len(df)
    means = df[label_cols].mean().to_dict()
    summary.append({'fold': f, 'count': cnt, **{f'mean_{k}': v for k,v in means.items()}})
    print(f"Fold {f}: n={cnt}, means={{" + ', '.join([f'{k}:{v:.3f}' for k,v in means.items()]) + '}}')

train.to_csv('train_folds.csv', index=False)
print('\nSaved train_folds.csv with fold assignments.')

In [None]:
# T3: Dataset, model, training loop (5-fold) and inference to submission (with Mixup, LS, RRCrop, Warmup+Cosine, 8-way TTA logit-avg)
import os, time, math, gc, random
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import albumentations as A
from albumentations.pytorch import ToTensorV2
import timm
import cv2

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

SEED = 42
def set_seed(seed=SEED):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
set_seed()
torch.backends.cudnn.benchmark = False  # reduce workspace usage
torch.backends.cuda.matmul.allow_tf32 = True
torch.set_float32_matmul_precision('high')

IMG_DIR = Path('images')
LABEL_COLS = ['healthy','multiple_diseases','rust','scab']

class LeafDataset(Dataset):
    def __init__(self, df, img_dir, transforms=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.transforms = transforms
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / f"{row.image_id}.jpg"
        img = np.array(Image.open(img_path).convert('RGB'))
        if self.transforms:
            img = self.transforms(image=img)['image']
        target = row[LABEL_COLS].values.astype('float32') if 'fold' in self.df.columns else None
        return img, (torch.from_numpy(target) if target is not None else row.image_id)

def get_transforms(img_size=512):
    train_tfms = A.Compose([
        A.RandomResizedCrop(size=(img_size, img_size), scale=(0.7, 1.0), ratio=(0.95, 1.05)),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.6, border_mode=cv2.BORDER_REFLECT_101),
        A.ColorJitter(0.2,0.2,0.2,0.05, p=0.7),
        A.CLAHE(clip_limit=2.0, p=0.3),
        A.GaussianBlur(blur_limit=(3,3), p=0.2),
        A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
        ToTensorV2(),
    ])
    valid_tfms = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
        ToTensorV2(),
    ])
    return train_tfms, valid_tfms

class Model(nn.Module):
    def __init__(self, backbone='tf_efficientnet_b2_ns', pretrained=True, n_out=4):
        super().__init__()
        self.net = timm.create_model(backbone, pretrained=pretrained, num_classes=n_out, in_chans=3)
        if hasattr(self.net, 'set_grad_checkpointing'):
            self.net.set_grad_checkpointing(True)
    def forward(self, x):
        return self.net(x)

def compute_auc(y_true, y_pred):
    aucs = []
    for i in range(y_true.shape[1]):
        try:
            aucs.append(roc_auc_score(y_true[:, i], y_pred[:, i]))
        except Exception:
            aucs.append(np.nan)
    return float(np.nanmean(aucs)), aucs

# Mixup utilities
def sample_beta_distribution(alpha, size):
    return np.random.beta(alpha, alpha, size).astype('float32')

def mixup_batch(x, y, alpha=0.3):
    lam = sample_beta_distribution(alpha, 1)[0]
    bs = x.size(0)
    index = torch.randperm(bs, device=x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def smooth_targets(y, eps=0.1):
    # Multilabel smoothing towards 0.5
    return y * (1.0 - eps) + 0.5 * eps

def train_one_fold(fold, df_folds, img_size=320, epochs=18, batch_size=2, lr=2e-4, weight_decay=1e-4, device='cuda', mixup_p=0.7, mixup_alpha=0.3, label_smoothing=0.1, accum_steps=4):
    t0=time.time()
    trn_df = df_folds[df_folds.fold!=fold].reset_index(drop=True)
    val_df = df_folds[df_folds.fold==fold].reset_index(drop=True)
    train_tfms, valid_tfms = get_transforms(img_size)
    trn_ds = LeafDataset(trn_df, IMG_DIR, train_tfms)
    val_ds = LeafDataset(val_df, IMG_DIR, valid_tfms)
    trn_dl = DataLoader(trn_ds, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False, drop_last=True, persistent_workers=False)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False, persistent_workers=False)

    gc.collect(); torch.cuda.empty_cache()
    model = Model().to(device)
    model = model.to(memory_format=torch.channels_last)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    # Warmup (2 epochs) + Cosine
    warmup_epochs = 2
    main_epochs = max(1, epochs - warmup_epochs)
    sched_warmup = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=warmup_epochs)
    sched_cos = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=main_epochs)
    scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers=[sched_warmup, sched_cos], milestones=[warmup_epochs])
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=True)

    best_auc = -1.0
    best_path = f'fold{fold}_best.pt'
    patience, wait = 4, 0

    for epoch in range(1, epochs+1):
        model.train()
        tr_loss = 0.0
        start = time.time()
        optimizer.zero_grad(set_to_none=True)
        for bi, (imgs, targets) in enumerate(trn_dl):
            imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            targets = targets.to(device)
            use_mix = (random.random() < mixup_p)
            with torch.amp.autocast('cuda'):
                if use_mix:
                    mix_imgs, y_a, y_b, lam = mixup_batch(imgs, targets, alpha=mixup_alpha)
                    y_a_s = smooth_targets(y_a, label_smoothing)
                    y_b_s = smooth_targets(y_b, label_smoothing)
                    logits = model(mix_imgs)
                    loss = lam * loss_fn(logits, y_a_s) + (1 - lam) * loss_fn(logits, y_b_s)
                else:
                    logits = model(imgs)
                    targets_s = smooth_targets(targets, label_smoothing)
                    loss = loss_fn(logits, targets_s)
                loss = loss / accum_steps
            scaler.scale(loss).backward()
            if (bi + 1) % accum_steps == 0 or (bi + 1) == len(trn_dl):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)
            tr_loss += loss.item() * accum_steps
            if (bi+1) % 20 == 0:
                print(f'[Fold {fold}] Epoch {epoch} Batch {bi+1}/{len(trn_dl)} loss={tr_loss/(bi+1):.4f}', flush=True)
        scheduler.step()
        # Validate
        model.eval()
        v_preds = []; v_targets = []
        with torch.no_grad():
            for imgs, targets in val_dl:
                imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
                logits = model(imgs)
                v_preds.append(torch.sigmoid(logits).cpu().numpy())
                v_targets.append(targets.numpy())
        v_preds = np.concatenate(v_preds); v_targets = np.concatenate(v_targets)
        val_auc, per_col = compute_auc(v_targets, v_preds)
        print(f'[Fold {fold}] Epoch {epoch} done in {time.time()-start:.1f}s | tr_loss={tr_loss/max(1,len(trn_dl)):.4f} | val_auc={val_auc:.5f} | cols={per_col}', flush=True)
        if val_auc > best_auc:
            best_auc = val_auc; wait = 0
            torch.save({'state_dict': model.state_dict(), 'auc': best_auc}, best_path)
        else:
            wait += 1
            if wait >= patience:
                print(f'[Fold {fold}] Early stopping at epoch {epoch}', flush=True)
                break
    # Load best and create OOF preds
    ckpt = torch.load(best_path, map_location=device)
    model.load_state_dict(ckpt['state_dict'])
    model.eval()
    v_preds = []; v_targets = []
    with torch.no_grad():
        for imgs, targets in val_dl:
            imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            logits = model(imgs)
            v_preds.append(torch.sigmoid(logits).cpu().numpy())
            v_targets.append(targets.numpy())
    v_preds = np.concatenate(v_preds); v_targets = np.concatenate(v_targets)
    fold_time = time.time()-t0
    print(f'[Fold {fold}] Best AUC={best_auc:.5f} | time={fold_time/60:.1f} min', flush=True)
    return model, v_preds, v_targets, best_auc, optimizer, scheduler

def tta_logits(model, imgs):
    # 8-way dihedral TTA, return averaged logits (before sigmoid)
    outs = []
    with torch.no_grad():
        outs.append(model(imgs))
        outs.append(model(torch.flip(imgs, dims=[3])))
        outs.append(model(torch.flip(imgs, dims=[2])))
        outs.append(model(torch.rot90(imgs, k=1, dims=(2,3))))
        outs.append(model(torch.rot90(imgs, k=2, dims=(2,3))))
        outs.append(model(torch.rot90(imgs, k=3, dims=(2,3))))
        tmp = torch.rot90(imgs, k=1, dims=(2,3))
        outs.append(model(torch.flip(tmp, dims=[3])))
        outs.append(model(torch.flip(tmp, dims=[2])))
    return torch.stack(outs, dim=0).mean(0)

def run_training():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    df = pd.read_csv('train_folds.csv')
    test_df = pd.read_csv('test.csv')
    IMG_SIZE = 320
    EPOCHS = 18
    BATCH_SIZE = 2
    LR = 2e-4
    WD = 1e-4
    ACCUM_STEPS = 4

    oof_preds = np.zeros((len(df), 4), dtype=float)
    oof_targets = df[LABEL_COLS].values.astype(float)
    fold_aucs = []

    for fold in range(5):
        print(f'==== Training fold {fold} ====', flush=True)
        model, v_pred, v_true, best_auc, optimizer, scheduler = train_one_fold(
            fold, df, img_size=IMG_SIZE, epochs=EPOCHS, batch_size=BATCH_SIZE, lr=LR, weight_decay=WD, device=device, accum_steps=ACCUM_STEPS)
        val_idx = df.index[df.fold==fold].to_numpy()
        oof_preds[val_idx] = v_pred
        fold_aucs.append(best_auc)
        del model, optimizer, scheduler; gc.collect(); torch.cuda.empty_cache()

    oof_auc, per_col = compute_auc(oof_targets, oof_preds)
    print(f'OOF mean AUC: {oof_auc:.5f} | per-col {per_col}', flush=True)
    np.save('oof_preds.npy', oof_preds)
    pd.DataFrame(oof_preds, columns=LABEL_COLS).to_csv('oof_preds.csv', index=False)

    # Inference on test with TTA and fold-averaged logits
    _, valid_tfms = get_transforms(IMG_SIZE)
    test_ds = LeafDataset(test_df, IMG_DIR, valid_tfms)
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=False, persistent_workers=False)
    all_logits = []
    for fold in range(5):
        print(f'Loading best model fold {fold} for test inference', flush=True)
        model = Model().to(device)
        ckpt = torch.load(f'fold{fold}_best.pt', map_location=device)
        model.load_state_dict(ckpt['state_dict'])
        model.eval()
        fold_logits = []
        with torch.no_grad():
            for imgs, ids in test_dl:
                imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
                logits = tta_logits(model, imgs)  # average logits across TTA
                fold_logits.append(logits.cpu().numpy())
        all_logits.append(np.concatenate(fold_logits))
        del model; gc.collect(); torch.cuda.empty_cache()
    mean_logits = np.mean(np.stack(all_logits, axis=0), axis=0)
    test_pred = 1.0 / (1.0 + np.exp(-mean_logits))  # sigmoid
    sub = pd.read_csv('sample_submission.csv')
    sub[LABEL_COLS] = test_pred
    sub.to_csv('submission.csv', index=False)
    print('Saved submission.csv. OOF AUC:', oof_auc)

run_training()

In [None]:
# T4: EMA fine-tune existing fold checkpoints at higher resolution and re-infer
import gc, time, random, numpy as np, pandas as pd, torch, torch.nn as nn
from torch.utils.data import DataLoader
from timm.utils import ModelEmaV2

def fine_tune_one_fold_with_ema(fold, df_folds, img_size=352, epochs=3, batch_size=2, lr=1e-4, weight_decay=1e-4, device='cuda',
                                mixup_p=0.7, mixup_alpha=0.3, label_smoothing=0.1, accum_steps=4, ema_decay=0.9998):
    from albumentations.pytorch import ToTensorV2
    import albumentations as A, cv2
    def smooth_targets(y, eps=0.1):
        return y * (1.0 - eps) + 0.5 * eps

    trn_df = df_folds[df_folds.fold!=fold].reset_index(drop=True)
    val_df = df_folds[df_folds.fold==fold].reset_index(drop=True)
    train_tfms, valid_tfms = get_transforms(img_size)
    trn_ds = LeafDataset(trn_df, IMG_DIR, train_tfms)
    val_ds = LeafDataset(val_df, IMG_DIR, valid_tfms)
    trn_dl = DataLoader(trn_ds, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False, drop_last=True, persistent_workers=False)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False, persistent_workers=False)

    gc.collect(); torch.cuda.empty_cache()
    model = Model().to(device)
    # Load previous best weights
    ckpt = torch.load(f'fold{fold}_best.pt', map_location=device)
    model.load_state_dict(ckpt['state_dict'], strict=True)
    model = model.to(memory_format=torch.channels_last)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    warmup_epochs = 1
    main_epochs = max(1, epochs - warmup_epochs)
    sched_warm = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=warmup_epochs)
    sched_cos = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=main_epochs)
    scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, [sched_warm, sched_cos], milestones=[warmup_epochs])
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=True)
    ema = ModelEmaV2(model, decay=ema_decay)  # track EMA

    best_auc = -1.0
    best_path = f'fold{fold}_best_ft.pt'
    patience, wait = 2, 0

    for epoch in range(1, epochs+1):
        model.train()
        tr_loss = 0.0; t0 = time.time()
        optimizer.zero_grad(set_to_none=True)
        for bi, (imgs, targets) in enumerate(trn_dl):
            imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            targets = targets.to(device)
            use_mix = (random.random() < mixup_p)
            with torch.amp.autocast('cuda'):
                if use_mix:
                    bs = imgs.size(0)
                    index = torch.randperm(bs, device=imgs.device)
                    lam = float(np.random.beta(mixup_alpha, mixup_alpha))
                    mix_imgs = lam * imgs + (1 - lam) * imgs[index]
                    y_a, y_b = targets, targets[index]
                    y_a_s = smooth_targets(y_a, label_smoothing)
                    y_b_s = smooth_targets(y_b, label_smoothing)
                    logits = model(mix_imgs)
                    loss = lam * loss_fn(logits, y_a_s) + (1 - lam) * loss_fn(logits, y_b_s)
                else:
                    logits = model(imgs)
                    targets_s = smooth_targets(targets, label_smoothing)
                    loss = loss_fn(logits, targets_s)
                loss = loss / accum_steps
            scaler.scale(loss).backward()
            if (bi + 1) % accum_steps == 0 or (bi + 1) == len(trn_dl):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)
                ema.update(model)  # update EMA after optimizer step
            tr_loss += loss.item() * accum_steps
            if (bi+1) % 20 == 0:
                print(f'[FT Fold {fold}] Epoch {epoch} Batch {bi+1}/{len(trn_dl)} loss={tr_loss/(bi+1):.4f}', flush=True)
        scheduler.step()

        # Validate with EMA weights
        model.eval()
        v_preds = []; v_targets = []
        with torch.no_grad():
            for imgs, targets in val_dl:
                imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
                logits = ema.module(imgs)
                v_preds.append(torch.sigmoid(logits).cpu().numpy())
                v_targets.append(targets.numpy())
        v_preds = np.concatenate(v_preds); v_targets = np.concatenate(v_targets)
        val_auc, per_col = compute_auc(v_targets, v_preds)
        print(f'[FT Fold {fold}] Epoch {epoch} time={time.time()-t0:.1f}s | tr_loss={tr_loss/max(1,len(trn_dl)):.4f} | val_auc={val_auc:.5f}', flush=True)
        if val_auc > best_auc:
            best_auc = val_auc; wait = 0
            torch.save({'state_dict': ema.module.state_dict(), 'auc': best_auc}, best_path)
        else:
            wait += 1
            if wait >= patience:
                print(f'[FT Fold {fold}] Early stopping at epoch {epoch}', flush=True)
                break

    # Load best ft and return val preds for OOF
    ckpt = torch.load(best_path, map_location=device)
    ema.module.load_state_dict(ckpt['state_dict'], strict=True)
    ema.module.eval()
    v_preds = []; v_targets = []
    with torch.no_grad():
        for imgs, targets in val_dl:
            imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            logits = ema.module(imgs)
            v_preds.append(torch.sigmoid(logits).cpu().numpy())
            v_targets.append(targets.numpy())
    v_preds = np.concatenate(v_preds); v_targets = np.concatenate(v_targets)
    return best_auc, v_preds, v_targets

def run_finetune_and_infer():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    df = pd.read_csv('train_folds.csv')
    IMG_SIZE_FT = 352
    EPOCHS_FT = 3
    BATCH_SIZE = 2
    LR = 1e-4
    WD = 1e-4
    ACCUM_STEPS = 4
    EMA_DECAY = 0.9998

    print('=== Fine-tuning existing fold checkpoints with EMA ===', flush=True)
    oof_preds = np.zeros((len(df), 4), dtype=float)
    oof_targets = df[LABEL_COLS].values.astype(float)
    fold_aucs = []
    for fold in range(5):
        print(f'-- Fine-tune fold {fold} --', flush=True)
        best_auc, v_pred, v_true = fine_tune_one_fold_with_ema(
            fold, df, img_size=IMG_SIZE_FT, epochs=EPOCHS_FT, batch_size=BATCH_SIZE, lr=LR, weight_decay=WD,
            device=device, accum_steps=ACCUM_STEPS, ema_decay=EMA_DECAY)
        val_idx = df.index[df.fold==fold].to_numpy()
        oof_preds[val_idx] = v_pred
        fold_aucs.append(best_auc)
        gc.collect(); torch.cuda.empty_cache()

    oof_auc, per_col = compute_auc(oof_targets, oof_preds)
    print(f'[FT] OOF mean AUC: {oof_auc:.5f} | per-col {per_col}', flush=True)
    np.save('oof_preds_ft.npy', oof_preds)
    pd.DataFrame(oof_preds, columns=LABEL_COLS).to_csv('oof_preds_ft.csv', index=False)

    # Inference on test using EMA fine-tuned checkpoints with TTA
    _, valid_tfms = get_transforms(IMG_SIZE_FT)
    test_df = pd.read_csv('test.csv')
    test_ds = LeafDataset(test_df, IMG_DIR, valid_tfms)
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=False, persistent_workers=False)
    all_logits = []
    for fold in range(5):
        print(f'Loading fine-tuned EMA model fold {fold} for test inference', flush=True)
        model = Model().to(device)
        ckpt = torch.load(f'fold{fold}_best_ft.pt', map_location=device)
        model.load_state_dict(ckpt['state_dict'], strict=True)
        model.eval()
        fold_logits = []
        with torch.no_grad():
            for imgs, ids in test_dl:
                imgs = imgs.to(device, non_blocking=True).to(memory_format=torch.channels_last)
                logits = tta_logits(model, imgs)
                fold_logits.append(logits.cpu().numpy())
        all_logits.append(np.concatenate(fold_logits))
        del model; gc.collect(); torch.cuda.empty_cache()
    mean_logits = np.mean(np.stack(all_logits, axis=0), axis=0)
    test_pred = 1.0 / (1.0 + np.exp(-mean_logits))
    sub = pd.read_csv('sample_submission.csv')
    sub[LABEL_COLS] = test_pred
    sub.to_csv('submission.csv', index=False)
    print('[FT] Saved submission.csv | OOF AUC:', oof_auc)

run_finetune_and_infer()

In [None]:
# T5: Evaluate fine-tuned OOF AUC and sanity-check submission
import pandas as pd, numpy as np
from sklearn.metrics import roc_auc_score

LABEL_COLS = ['healthy','multiple_diseases','rust','scab']
df = pd.read_csv('train_folds.csv')
oof = pd.read_csv('oof_preds_ft.csv')
assert len(df)==len(oof), f'Length mismatch: {len(df)} vs {len(oof)}'
y_true = df[LABEL_COLS].values.astype(float)
y_pred = oof[LABEL_COLS].values.astype(float)
per_col = []
for i, c in enumerate(LABEL_COLS):
    auc = roc_auc_score(y_true[:, i], y_pred[:, i])
    per_col.append(auc)
mean_auc = float(np.mean(per_col))
print(f'[CHECK] FT OOF mean AUC: {mean_auc:.6f} | per-col: {per_col}')

# Submission sanity checks
sub = pd.read_csv('submission.csv')
print('[CHECK] submission.csv shape:', sub.shape)
print('[CHECK] submission head:')
print(sub.head())
print('[CHECK] value ranges per column:')
for c in LABEL_COLS:
    s = sub[c]
    print(c, 'min', float(s.min()), 'max', float(s.max()), 'mean', float(s.mean()))

In [None]:
# T6: Mega-Ensemble (pre-FT + FT) with multi-scale TTA; OOF validation and test submission with optional post-processing
import gc, time, numpy as np, pandas as pd, torch
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LABEL_COLS = ['healthy','multiple_diseases','rust','scab']

def get_valid_tfms_size(img_size):
    import albumentations as A
    from albumentations.pytorch import ToTensorV2
    return A.Compose([A.Resize(height=img_size, width=img_size), A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)), ToTensorV2()])

def predict_logits_with_tta(model, dl):
    model.eval()
    logits_list = []
    with torch.no_grad():
        for imgs, idx_or_id in dl:
            imgs = imgs.to(DEVICE, non_blocking=True).to(memory_format=torch.channels_last)
            logits = tta_logits(model, imgs)  # uses global tta function from earlier
            logits_list.append(logits.cpu().numpy())
    return np.concatenate(logits_list)

def load_model_for_fold(fold, use_ft):
    m = Model().to(DEVICE)
    ckpt_path = f'fold{fold}_best_ft.pt' if use_ft else f'fold{fold}_best.pt'
    ckpt = torch.load(ckpt_path, map_location=DEVICE)
    m.load_state_dict(ckpt['state_dict'], strict=True)
    m = m.to(memory_format=torch.channels_last)
    m.eval()
    return m

def compute_auc(y_true, y_pred):
    aucs = []
    for i in range(y_true.shape[1]):
        try:
            aucs.append(roc_auc_score(y_true[:, i], y_pred[:, i]))
        except Exception:
            aucs.append(np.nan)
    return float(np.nanmean(aucs)), aucs

def make_oof_ensemble(scales=(320,352), ft_weight=0.7, batch_size=2):
    df = pd.read_csv('train_folds.csv')
    from pathlib import Path
    IMG_DIR = Path('images')
    oof_logits = np.zeros((len(df), 4), dtype=np.float32)
    y_true = df[LABEL_COLS].values.astype(float)
    for f in range(5):
        val_df = df[df.fold==f].reset_index(drop=True)
        print(f'[ENS OOF] Fold {f} | val_n={len(val_df)}')
        logits_scales = []
        for sz in scales:
            try:
                t_sz = time.time()
                valid_tfms = get_valid_tfms_size(sz)
                val_ds = LeafDataset(val_df, 'images', valid_tfms)
                val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
                # Pre-FT ensemble using other folds
                sum_logits_pre = 0.0
                cnt_models = 0
                for mfold in range(5):
                    if mfold == f: continue
                    m = load_model_for_fold(mfold, use_ft=False)
                    sum_logits_pre += predict_logits_with_tta(m, val_dl)
                    cnt_models += 1
                    del m; gc.collect(); torch.cuda.empty_cache()
                logits_pre = sum_logits_pre / max(1, cnt_models)
                # FT ensemble using other folds
                sum_logits_ft = 0.0
                cnt_models = 0
                for mfold in range(5):
                    if mfold == f: continue
                    m = load_model_for_fold(mfold, use_ft=True)
                    sum_logits_ft += predict_logits_with_tta(m, val_dl)
                    cnt_models += 1
                    del m; gc.collect(); torch.cuda.empty_cache()
                logits_ft = sum_logits_ft / max(1, cnt_models)
                logits_blend = ft_weight * logits_ft + (1.0 - ft_weight) * logits_pre
                logits_scales.append(logits_blend.astype(np.float32))
                print(f'[ENS OOF] Fold {f} size {sz} done in {time.time()-t_sz:.1f}s')
                del val_dl, val_ds; gc.collect(); torch.cuda.empty_cache()
            except RuntimeError as e:
                print(f'[ENS OOF] Skipping size {sz} due to error: {e}')
                gc.collect(); torch.cuda.empty_cache()
                continue
        if len(logits_scales)==0:
            raise RuntimeError('All scales failed for OOF ensemble')
        logits_avg = np.mean(np.stack(logits_scales, axis=0), axis=0)
        val_idx = df.index[df.fold==f].to_numpy()
        oof_logits[val_idx] = logits_avg
    oof_probs = 1.0 / (1.0 + np.exp(-oof_logits))
    oof_auc, per_col = compute_auc(y_true, oof_probs)
    pd.DataFrame(oof_probs, columns=LABEL_COLS).to_csv('oof_preds_ens.csv', index=False)
    print(f'[ENS OOF] mean AUC: {oof_auc:.6f} | per-col: {per_col}')
    return oof_probs, y_true, oof_auc, per_col

def make_test_ensemble(scales=(320,352), ft_weight=0.7, batch_size=2):
    test_df = pd.read_csv('test.csv')
    logits_scales_all = []
    for sz in scales:
        try:
            t_sz = time.time()
            valid_tfms = get_valid_tfms_size(sz)
            test_ds = LeafDataset(test_df, 'images', valid_tfms)
            test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
            # Pre-FT across all 5 folds
            sum_logits_pre = 0.0
            for mfold in range(5):
                m = load_model_for_fold(mfold, use_ft=False)
                sum_logits_pre = (sum_logits_pre + predict_logits_with_tta(m, test_dl)) if isinstance(sum_logits_pre, float) else (sum_logits_pre + predict_logits_with_tta(m, test_dl))
                del m; gc.collect(); torch.cuda.empty_cache()
            logits_pre = sum_logits_pre / 5.0
            # FT across all 5 folds
            sum_logits_ft = 0.0
            for mfold in range(5):
                m = load_model_for_fold(mfold, use_ft=True)
                sum_logits_ft = (sum_logits_ft + predict_logits_with_tta(m, test_dl)) if isinstance(sum_logits_ft, float) else (sum_logits_ft + predict_logits_with_tta(m, test_dl))
                del m; gc.collect(); torch.cuda.empty_cache()
            logits_ft = sum_logits_ft / 5.0
            logits_blend = ft_weight * logits_ft + (1.0 - ft_weight) * logits_pre
            logits_scales_all.append(logits_blend.astype(np.float32))
            print(f'[ENS TEST] size {sz} done in {time.time()-t_sz:.1f}s')
            del test_dl, test_ds; gc.collect(); torch.cuda.empty_cache()
        except RuntimeError as e:
            print(f'[ENS TEST] Skipping size {sz} due to error: {e}')
            gc.collect(); torch.cuda.empty_cache()
            continue
    if len(logits_scales_all)==0:
        raise RuntimeError('All scales failed for TEST ensemble')
    logits_avg = np.mean(np.stack(logits_scales_all, axis=0), axis=0)
    probs = 1.0 / (1.0 + np.exp(-logits_avg))
    sub = pd.read_csv('sample_submission.csv')
    sub[LABEL_COLS] = probs
    return sub

def apply_md_rule(probs, k):
    # probs: numpy array shape (N,4) with order LABEL_COLS
    res = probs.copy()
    md_idx = LABEL_COLS.index('multiple_diseases')
    rust_idx = LABEL_COLS.index('rust')
    scab_idx = LABEL_COLS.index('scab')
    res[:, md_idx] = np.maximum(res[:, md_idx], k * res[:, rust_idx] * res[:, scab_idx])
    return res

def run_mega_ensemble():
    scales = [320, 352]  # reduced for speed
    ft_weight = 0.7
    print('[RUN] Building OOF ensemble ...')
    oof_probs, y_true, base_auc, base_cols = make_oof_ensemble(tuple(scales), ft_weight=ft_weight, batch_size=2)
    print(f'[RUN] Base ENS OOF AUC: {base_auc:.6f} | per-col: {base_cols}')
    # Try MD heuristic
    best_auc = base_auc; best_k = None; best_probs = oof_probs
    for k in [0.7, 0.8, 0.9]:
        mod = apply_md_rule(oof_probs, k)
        auc, cols = compute_auc(y_true, mod)
        print(f'[RUN] k={k} -> AUC={auc:.6f} | per-col={cols}')
        if auc > best_auc:
            best_auc, best_k, best_probs = auc, k, mod
    pd.DataFrame(best_probs, columns=LABEL_COLS).to_csv('oof_preds_ens_pp.csv', index=False)
    print(f'[RUN] Selected OOF AUC: {best_auc:.6f} (k={best_k})')

    print('[RUN] Building TEST ensemble ...')
    sub = make_test_ensemble(tuple(scales), ft_weight=ft_weight, batch_size=2)
    test_probs = sub[LABEL_COLS].values.astype(float)
    if best_k is not None:
        test_probs = apply_md_rule(test_probs, best_k)
        sub[LABEL_COLS] = test_probs
    sub.to_csv('submission.csv', index=False)
    print('[RUN] Saved submission.csv | ENS OOF AUC:', best_auc, '| k:', best_k)

run_mega_ensemble()

In [6]:
# T7: Fast blend using existing OOF preds (pre-FT vs FT) via logit-avg; build test blend using pre-FT inference + FT submission logits
import numpy as np, pandas as pd, torch, gc, time, os
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score

LABEL_COLS = ['healthy','multiple_diseases','rust','scab']
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

def logit(p, eps=1e-6):
    p = np.clip(p, eps, 1 - eps)
    return np.log(p / (1 - p))

def compute_auc(y_true, y_pred):
    aucs = []
    for i in range(y_true.shape[1]):
        try:
            aucs.append(roc_auc_score(y_true[:, i], y_pred[:, i]))
        except Exception:
            aucs.append(np.nan)
    return float(np.nanmean(aucs)), aucs

def apply_md_rule_np(probs, k):
    res = probs.copy()
    md = LABEL_COLS.index('multiple_diseases')
    rust = LABEL_COLS.index('rust')
    scab = LABEL_COLS.index('scab')
    res[:, md] = np.maximum(res[:, md], k * res[:, rust] * res[:, scab])
    return res

def infer_test_preft_logits(img_size=320, batch_size=8, use_tta=False):
    test_df = pd.read_csv('test.csv')
    # Build DL
    import albumentations as A
    from albumentations.pytorch import ToTensorV2
    valid_tfms = A.Compose([A.Resize(height=img_size, width=img_size), A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)), ToTensorV2()])
    ds = LeafDataset(test_df, 'images', valid_tfms)
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
    fold_logits_all = []
    for fold in range(5):
        cache_path = f'test_logits_preft_fold{fold}_{img_size}.npy'
        if os.path.exists(cache_path):
            print(f'[BLEND TEST] Using cached pre-FT logits fold {fold} @ {img_size}')
            fold_logits = np.load(cache_path)
            fold_logits_all.append(fold_logits)
            continue
        print(f'[BLEND TEST] Pre-FT fold {fold} inference @ {img_size} | bs={batch_size} | TTA={use_tta}')
        t_fold = time.time()
        m = Model().to(DEVICE)
        ckpt = torch.load(f'fold{fold}_best.pt', map_location=DEVICE)
        m.load_state_dict(ckpt['state_dict'], strict=True)
        m = m.to(memory_format=torch.channels_last)
        m.eval()
        fold_batches = []
        with torch.no_grad():
            for bi, (imgs, ids) in enumerate(dl):
                imgs = imgs.to(DEVICE, non_blocking=True).to(memory_format=torch.channels_last)
                if use_tta:
                    logits = tta_logits(m, imgs)
                else:
                    logits = m(imgs)
                fold_batches.append(logits.cpu().numpy())
                if (bi + 1) % 10 == 0:
                    print(f'  - fold {fold} batch {bi+1}/{len(dl)}')
        fold_logits = np.concatenate(fold_batches)
        np.save(cache_path, fold_logits)
        print(f'[BLEND TEST] Fold {fold} done in {time.time()-t_fold:.1f}s | saved {cache_path}')
        fold_logits_all.append(fold_logits)
        del m; gc.collect(); torch.cuda.empty_cache()
    mean_logits = np.mean(np.stack(fold_logits_all, axis=0), axis=0)
    return mean_logits

def run_fast_blend(ft_weight=0.7):
    # OOF blend
    df = pd.read_csv('train_folds.csv')
    y_true = df[LABEL_COLS].values.astype(float)
    oof_pre = pd.read_csv('oof_preds.csv')[LABEL_COLS].values.astype(float)
    oof_ft = pd.read_csv('oof_preds_ft.csv')[LABEL_COLS].values.astype(float)
    logits_pre = logit(oof_pre)
    logits_ft = logit(oof_ft)
    logits_blend = ft_weight * logits_ft + (1.0 - ft_weight) * logits_pre
    oof_blend = 1.0 / (1.0 + np.exp(-logits_blend))
    base_auc, base_cols = compute_auc(y_true, oof_blend)
    print(f'[BLEND OOF] base AUC={base_auc:.6f} | per-col={base_cols}')
    best_auc, best_k, best_probs = base_auc, None, oof_blend
    for k in [0.7, 0.8, 0.9]:
        mod = apply_md_rule_np(oof_blend, k)
        auc, cols = compute_auc(y_true, mod)
        print(f'[BLEND OOF] k={k} -> AUC={auc:.6f} | per-col={cols}')
        if auc > best_auc:
            best_auc, best_k, best_probs = auc, k, mod
    pd.DataFrame(best_probs, columns=LABEL_COLS).to_csv('oof_preds_blend.csv', index=False)
    print(f'[BLEND OOF] selected AUC={best_auc:.6f} (k={best_k})')

    # Test blend: use FT submission probs as FT logits via inverse sigmoid; compute pre-FT logits via fast inference @320 (no TTA) and blend
    sub_ft = pd.read_csv('submission.csv')  # current file from FT run
    ft_probs = sub_ft[LABEL_COLS].values.astype(float)
    ft_logits = logit(ft_probs)
    t0 = time.time()
    pre_logits = infer_test_preft_logits(img_size=320, batch_size=8, use_tta=False)
    print(f'[BLEND TEST] Pre-FT inference done in {time.time()-t0:.1f}s')
    blend_logits = ft_weight * ft_logits + (1.0 - ft_weight) * pre_logits
    blend_probs = 1.0 / (1.0 + np.exp(-blend_logits))
    if best_k is not None:
        blend_probs = apply_md_rule_np(blend_probs, best_k)
    sub = pd.read_csv('sample_submission.csv')
    sub[LABEL_COLS] = blend_probs
    sub.to_csv('submission.csv', index=False)
    print('[BLEND TEST] Saved submission.csv | OOF AUC:', best_auc, '| k:', best_k)

run_fast_blend(ft_weight=0.7)

[BLEND OOF] base AUC=0.964111 | per-col=[0.9929541968003507, 0.8920116662247641, 0.9916535461993498, 0.9798231151278294]
[BLEND OOF] k=0.7 -> AUC=0.960181 | per-col=[0.9929541968003507, 0.8762925646755805, 0.9916535461993498, 0.9798231151278294]
[BLEND OOF] k=0.8 -> AUC=0.958683 | per-col=[0.9929541968003507, 0.8703003674103255, 0.9916535461993498, 0.9798231151278294]
[BLEND OOF] k=0.9 -> AUC=0.957092 | per-col=[0.9929541968003507, 0.8639369720843906, 0.9916535461993498, 0.9798231151278294]
[BLEND OOF] selected AUC=0.964111 (k=None)
[BLEND TEST] Pre-FT fold 0 inference @ 320 | bs=8 | TTA=False


  model = create_fn(


  ckpt = torch.load(f'fold{fold}_best.pt', map_location=DEVICE)


  - fold 0 batch 10/23


  - fold 0 batch 20/23


[BLEND TEST] Fold 0 done in 5.7s | saved test_logits_preft_fold0_320.npy


[BLEND TEST] Pre-FT fold 1 inference @ 320 | bs=8 | TTA=False


  - fold 1 batch 10/23


  - fold 1 batch 20/23


[BLEND TEST] Fold 1 done in 4.7s | saved test_logits_preft_fold1_320.npy


[BLEND TEST] Pre-FT fold 2 inference @ 320 | bs=8 | TTA=False


  - fold 2 batch 10/23


  - fold 2 batch 20/23


[BLEND TEST] Fold 2 done in 4.8s | saved test_logits_preft_fold2_320.npy
[BLEND TEST] Pre-FT fold 3 inference @ 320 | bs=8 | TTA=False


  - fold 3 batch 10/23


  - fold 3 batch 20/23


[BLEND TEST] Fold 3 done in 4.8s | saved test_logits_preft_fold3_320.npy
[BLEND TEST] Pre-FT fold 4 inference @ 320 | bs=8 | TTA=False


  - fold 4 batch 10/23


  - fold 4 batch 20/23


[BLEND TEST] Fold 4 done in 4.9s | saved test_logits_preft_fold4_320.npy


[BLEND TEST] Pre-FT inference done in 25.9s
[BLEND TEST] Saved submission.csv | OOF AUC: 0.9641106310880734 | k: None


In [1]:
# T8: Quick OOF-only blend check (no inference) to validate uplift
import pandas as pd, numpy as np
from sklearn.metrics import roc_auc_score

LABEL_COLS = ['healthy','multiple_diseases','rust','scab']

def logit(p, eps=1e-6):
    p = np.clip(p, eps, 1 - eps)
    return np.log(p / (1 - p))

def compute_auc(y_true, y_pred):
    aucs = []
    for i in range(y_true.shape[1]):
        try:
            aucs.append(roc_auc_score(y_true[:, i], y_pred[:, i]))
        except Exception:
            aucs.append(np.nan)
    return float(np.nanmean(aucs)), aucs

def apply_md_rule_np(probs, k):
    res = probs.copy()
    md = LABEL_COLS.index('multiple_diseases')
    rust = LABEL_COLS.index('rust')
    scab = LABEL_COLS.index('scab')
    res[:, md] = np.maximum(res[:, md], k * res[:, rust] * res[:, scab])
    return res

df = pd.read_csv('train_folds.csv')
y_true = df[LABEL_COLS].values.astype(float)
oof_pre = pd.read_csv('oof_preds.csv')[LABEL_COLS].values.astype(float)
oof_ft = pd.read_csv('oof_preds_ft.csv')[LABEL_COLS].values.astype(float)
logits_pre = logit(oof_pre)
logits_ft = logit(oof_ft)
for w in [0.5, 0.6, 0.7, 0.8]:
    logits_blend = w * logits_ft + (1.0 - w) * logits_pre
    oof_blend = 1.0 / (1.0 + np.exp(-logits_blend))
    base_auc, base_cols = compute_auc(y_true, oof_blend)
    print(f'[OOF BLEND] w_ft={w:.1f} -> AUC={base_auc:.6f} | per-col={base_cols}')
    best_auc, best_k = base_auc, None
    for k in [0.7, 0.8, 0.9]:
        mod = apply_md_rule_np(oof_blend, k)
        auc, cols = compute_auc(y_true, mod)
        if auc > best_auc:
            best_auc, best_k = auc, k
    print(f'   -> with MD rule best AUC={best_auc:.6f} (k={best_k})')

[OOF BLEND] w_ft=0.5 -> AUC=0.962927 | per-col=[0.9930911680911682, 0.8889436006211887, 0.9914314861162359, 0.9782422108234272]
   -> with MD rule best AUC=0.962927 (k=None)
[OOF BLEND] w_ft=0.6 -> AUC=0.963633 | per-col=[0.9930637738330046, 0.8907541381008295, 0.991603457458798, 0.9791106904412962]
   -> with MD rule best AUC=0.963633 (k=None)
[OOF BLEND] w_ft=0.7 -> AUC=0.964111 | per-col=[0.9929541968003507, 0.8920116662247641, 0.9916535461993498, 0.9798231151278294]
   -> with MD rule best AUC=0.964111 (k=None)
[OOF BLEND] w_ft=0.8 -> AUC=0.964442 | per-col=[0.9928044415223902, 0.8929813264649066, 0.9915917700860024, 0.9803913586278021]
   -> with MD rule best AUC=0.964442 (k=None)


In [2]:
# T9: Heuristic post-processing search on multiple_diseases to boost OOF; apply best rule to submission.csv
import pandas as pd, numpy as np
from sklearn.metrics import roc_auc_score

LABEL_COLS = ['healthy','multiple_diseases','rust','scab']

def compute_auc(y_true, y_pred):
    aucs = []
    for i in range(y_true.shape[1]):
        try:
            aucs.append(roc_auc_score(y_true[:, i], y_pred[:, i]))
        except Exception:
            aucs.append(np.nan)
    return float(np.nanmean(aucs)), aucs

df = pd.read_csv('train_folds.csv')
y_true = df[LABEL_COLS].values.astype(float)
oof_ft = pd.read_csv('oof_preds_ft.csv')[LABEL_COLS].values.astype(float)

def apply_rule(probs, rule, k):
    p = probs.copy()
    h = p[:, LABEL_COLS.index('healthy')]
    md = p[:, LABEL_COLS.index('multiple_diseases')]
    r = p[:, LABEL_COLS.index('rust')]
    s = p[:, LABEL_COLS.index('scab')]
    if rule == 'k_r_s':
        new_md = np.maximum(md, k * r * s)
    elif rule == 'k_max_rs_1mh':
        new_md = np.maximum(md, k * np.maximum(r, s) * (1.0 - h))
    elif rule == 'k_mean_rs_1mh':
        new_md = np.maximum(md, k * (0.5 * (r + s)) * (1.0 - h))
    elif rule == 'k_rs_1mh':
        new_md = np.maximum(md, k * r * s * (1.0 - h))
    elif rule == 'k_sum_rs_min1':
        new_md = np.maximum(md, k * np.minimum(1.0, r + s) * (1.0 - h))
    else:
        new_md = md
    p[:, LABEL_COLS.index('multiple_diseases')] = new_md
    return p

base_auc, base_cols = compute_auc(y_true, oof_ft)
print(f'[PP SEARCH] Base FT OOF AUC={base_auc:.6f} | per-col={base_cols}')
rules = ['k_r_s', 'k_max_rs_1mh', 'k_mean_rs_1mh', 'k_rs_1mh', 'k_sum_rs_min1']
ks = [0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2]
best_auc = base_auc; best_rule=None; best_k=None; best_probs = oof_ft
for rule in rules:
    for k in ks:
        mod = apply_rule(oof_ft, rule, k)
        auc, cols = compute_auc(y_true, mod)
        print(f'[PP SEARCH] rule={rule} k={k} -> AUC={auc:.6f} | per-col={cols}')
        if auc > best_auc:
            best_auc, best_rule, best_k, best_probs = auc, rule, k, mod
print(f'[PP SEARCH] Selected AUC={best_auc:.6f} | rule={best_rule} | k={best_k}')
pd.DataFrame(best_probs, columns=LABEL_COLS).to_csv('oof_preds_ft_pp.csv', index=False)

# Apply best rule to submission.csv if any improvement found
sub = pd.read_csv('submission.csv')
test_probs = sub[LABEL_COLS].values.astype(float)
if best_rule is not None:
    test_probs_pp = apply_rule(test_probs, best_rule, best_k)
    sub[LABEL_COLS] = np.clip(test_probs_pp, 1e-6, 1-1e-6)
    sub.to_csv('submission.csv', index=False)
    print('[PP APPLY] Applied post-processing to submission.csv with', best_rule, best_k)
else:
    print('[PP APPLY] No OOF improvement from rules; submission.csv left unchanged.')

[PP SEARCH] Base FT OOF AUC=0.964544 | per-col=[0.992190810139528, 0.8935646377031172, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=0.3 -> AUC=0.964292 | per-col=[0.992190810139528, 0.8925571001098443, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=0.5 -> AUC=0.962635 | per-col=[0.992190810139528, 0.8859285633119958, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=0.7 -> AUC=0.960187 | per-col=[0.992190810139528, 0.8761334797924322, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=0.8 -> AUC=0.958779 | per-col=[0.992190810139528, 0.8705049051172304, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=0.9 -> AUC=0.957408 | per-col=[0.992190810139528, 0.8650202643839249, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=1.0 -> AUC=0.956041 | per-col=[0.992190810139528, 0.8595507745918717, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_r_s k=1.2 -> AUC=0.952778 | per-col=[0.99219

[PP SEARCH] rule=k_mean_rs_1mh k=1.2 -> AUC=0.907484 | per-col=[0.992190810139528, 0.6653232832089694, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=0.3 -> AUC=0.964249 | per-col=[0.992190810139528, 0.8923828642854438, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=0.5 -> AUC=0.963088 | per-col=[0.992190810139528, 0.8877391007916366, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=0.7 -> AUC=0.961012 | per-col=[0.992190810139528, 0.8794363849854172, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=0.8 -> AUC=0.959874 | per-col=[0.992190810139528, 0.8748835271391237, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=0.9 -> AUC=0.958649 | per-col=[0.992190810139528, 0.8699821976440287, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=1.0 -> AUC=0.957439 | per-col=[0.992190810139528, 0.8651414719139428, 0.9911960690356414, 0.9812259132605982]
[PP SEARCH] rule=k_rs_1mh k=1

In [4]:
# T10: Minimal helpers for inference (LeafDataset, Model) to support fast blend without re-running training cell
import numpy as np, torch
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
import timm

IMG_DIR = Path('images')
LABEL_COLS = ['healthy','multiple_diseases','rust','scab']

class LeafDataset(Dataset):
    def __init__(self, df, img_dir, transforms=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.transforms = transforms
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / f"{row.image_id}.jpg"
        img = np.array(Image.open(img_path).convert('RGB'))
        if self.transforms:
            img = self.transforms(image=img)['image']
        return img, row.image_id

class Model(torch.nn.Module):
    def __init__(self, backbone='tf_efficientnet_b2_ns', n_out=4):
        super().__init__()
        self.net = timm.create_model(backbone, pretrained=False, num_classes=n_out, in_chans=3)
        if hasattr(self.net, 'set_grad_checkpointing'):
            self.net.set_grad_checkpointing(True)
    def forward(self, x):
        return self.net(x)