# Aerial Cactus Identification – Medal Plan

Goal: Achieve 1.000 AUC (medal).

Plan:
- Environment/GPU: Verify GPU availability; use PyTorch + torchvision.
- Data pipeline:
  - Unzip train/test, read train.csv (id, has_cactus).
  - Build robust Dataset with augmentations (Albumentations or torchvision).
  - 32x32 RGB images; normalize with ImageNet stats for pretrained models or compute dataset stats.
- Validation:
  - Stratified KFold (e.g., 5 folds), deterministic seed.
  - Track OOF predictions and per-fold ROC-AUC.
  - Save OOF/test logits for future blends.
- Baseline model:
  - Transfer learning: EfficientNet-B0 or ResNet18 pretrained on ImageNet; replace head for binary.
  - Loss: BCEWithLogitsLoss; optimizer AdamW; one-cycle or cosine schedule.
  - Augs: flips, small shifts/rotations, Cutout optional.
  - Training: small epochs first (e.g., 5) with early stopping; then extend if helpful.
- Inference:
  - TTA (hflip, vflip) if beneficial.
  - Average fold predictions; produce submission.csv.
- Logging & checks:
  - Print progress, fold times, AUCs.
  - Cache datasets and predictions.
- Next steps:
  - If baseline < 1.0 AUC, try:
    - Higher-res upscaling with light conv stem;
    - Stronger augs;
    - Different backbone (ConvNeXt-Tiny, EfficientNet-B3) or custom small CNN tuned for 32x32.
    - Blend multiple seeds/backbones.

We will request expert review after environment/data setup, after baseline OOF, and before long runs.

In [1]:
# Setup: packages, GPU check, unzip data, basic EDA
import os, sys, time, zipfile, subprocess, json, math, random, hashlib, gc
from pathlib import Path

def pip_install(pkgs):
    for p in pkgs:
        print(f"[pip] installing {p}...")
        subprocess.run([sys.executable, '-m', 'pip', 'install', p, '--quiet'], check=False)

required = ['torch', 'torchvision', 'timm', 'albumentations', 'imagehash']
pip_install(required)

import torch
import torchvision
import pandas as pd
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold
from PIL import Image
import imagehash

print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    props = torch.cuda.get_device_properties(0)
    print(f"GPU Memory: {props.total_memory/1024**3:.1f} GB")

DATA_DIR = Path('.')
TRAIN_ZIP = DATA_DIR/'train.zip'
TEST_ZIP = DATA_DIR/'test.zip'
TRAIN_DIR = DATA_DIR/'train'
TEST_DIR = DATA_DIR/'test'

def unzip_if_needed(zip_path: Path, out_dir: Path):
    if out_dir.exists() and any(out_dir.iterdir()):
        print(f"[unzip] {out_dir} already extracted.")
        return
    print(f"[unzip] extracting {zip_path} -> {out_dir}")
    out_dir.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zf:
        zf.extractall(out_dir)
    print(f"[unzip] done: {out_dir}")

t0 = time.time()
unzip_if_needed(TRAIN_ZIP, TRAIN_DIR)
unzip_if_needed(TEST_ZIP, TEST_DIR)
print(f"[timer] unzip elapsed: {time.time()-t0:.2f}s")

train_csv = pd.read_csv(DATA_DIR/'train.csv')
sample_sub = pd.read_csv(DATA_DIR/'sample_submission.csv')
print("train.csv shape:", train_csv.shape)
print("sample_submission shape:", sample_sub.shape)
print(train_csv.head(3))
print(sample_sub.head(3))

train_imgs = sorted([p.name for p in TRAIN_DIR.glob('*.jpg')])
test_imgs = sorted([p.name for p in TEST_DIR.glob('*.jpg')])
print(f"#train images: {len(train_imgs)} | #test images: {len(test_imgs)}")

# Sanity check: CSV ids match files
missing_in_fs = set(train_csv['id']) - set(train_imgs)
missing_in_csv = set(train_imgs) - set(train_csv['id'])
print(f"Missing in filesystem: {len(missing_in_fs)} | Extra files not in CSV: {len(missing_in_csv)}")

# Quick image probe
probe_paths = [TRAIN_DIR/train_csv['id'].iloc[i] for i in range(min(3, len(train_csv)))]
for p in probe_paths:
    im = Image.open(p)
    print(p.name, im.size, im.mode)
    im.close()

print("[setup] Completed. Next: compute exact/perceptual hashes for leakage checks.")

[pip] installing torch...




[pip] installing torchvision...






[pip] installing timm...




[pip] installing albumentations...




[pip] installing imagehash...




GPU Available: True
GPU Name: Tesla T4
GPU Memory: 14.6 GB
[unzip] extracting train.zip -> train


[unzip] done: train
[unzip] extracting test.zip -> test


[unzip] done: test
[timer] unzip elapsed: 1.84s
train.csv shape: (14175, 2)
sample_submission shape: (3325, 2)
                                     id  has_cactus
0  2de8f189f1dce439766637e75df0ee27.jpg           1
1  36704d250f236238e7f996812c48235d.jpg           1
2  eacde22fdc8c175972a5768e3daa8bc9.jpg           1
                                     id  has_cactus
0  09034a34de0e2015a8a28dfe18f423f6.jpg         0.5
1  134f04305c795d6d202502c2ce3578f3.jpg         0.5
2  41fad8d145e6c41868ce3617e30a2545.jpg         0.5
#train images: 14175 | #test images: 3325
Missing in filesystem: 0 | Extra files not in CSV: 0
2de8f189f1dce439766637e75df0ee27.jpg (32, 32) RGB
36704d250f236238e7f996812c48235d.jpg (32, 32) RGB
eacde22fdc8c175972a5768e3daa8bc9.jpg (32, 32) RGB
[setup] Completed. Next: compute exact/perceptual hashes for leakage checks.


In [2]:
# Compute exact (SHA1) and perceptual (phash) hashes; detect duplicates/leakage
import pandas as pd
from pathlib import Path
from PIL import Image
import imagehash, hashlib, time, os, gc

def sha1_file(path: Path) -> str:
    with open(path, 'rb') as f:
        return hashlib.sha1(f.read()).hexdigest()

def phash_file(path: Path) -> str:
    with Image.open(path) as im:
        return str(imagehash.phash(im))  # 16-char hex

def compute_hashes(img_dir: Path, ids: list[str], do_phash: bool = True, log_every: int = 1000):
    rows = []
    t0 = time.time()
    for i, img_id in enumerate(ids):
        p = img_dir / img_id
        sha1 = sha1_file(p)
        ph = phash_file(p) if do_phash else ''
        rows.append((img_id, sha1, ph))
        if (i+1) % log_every == 0:
            print(f"[hash] {i+1}/{len(ids)} processed; elapsed {time.time()-t0:.1f}s", flush=True)
    df = pd.DataFrame(rows, columns=['id','sha1','phash'])
    return df

# Load IDs
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('sample_submission.csv')
train_ids = train_df['id'].tolist()
test_ids = test_df['id'].tolist()

t0 = time.time()
train_hash_df = compute_hashes(Path('train'), train_ids, do_phash=True, log_every=2000)
train_hash_df = train_hash_df.merge(train_df, on='id', how='left')
print(f"[hash] train hashing done in {time.time()-t0:.1f}s; rows={len(train_hash_df)}")

t1 = time.time()
test_hash_df = compute_hashes(Path('test'), test_ids, do_phash=True, log_every=1000)
print(f"[hash] test hashing done in {time.time()-t1:.1f}s; rows={len(test_hash_df)}")

# Save for reuse
train_hash_df.to_csv('train_hashes.csv', index=False)
test_hash_df.to_csv('test_hashes.csv', index=False)
print('[hash] saved train_hashes.csv and test_hashes.csv')

# Duplicate analysis in train
dup_counts = train_hash_df.groupby('sha1').size().reset_index(name='n')
train_dups = dup_counts[dup_counts['n'] > 1]['sha1']
print(f"[dups] exact-duplicate groups in train: {len(train_dups)} (total dup images: {int(dup_counts['n'].sum() - (dup_counts['n']>0).sum())})")

# Check for label conflicts within duplicate groups
conflicts = (train_hash_df.groupby('sha1')['has_cactus']
             .nunique().reset_index(name='n_labels'))
conflicts = conflicts[conflicts['n_labels'] > 1]
print(f"[dups] label conflicts across identical images: {len(conflicts)}")
if len(conflicts):
    print(conflicts.head())

# Test-train leakage via exact hash
test_leak = test_hash_df.merge(train_hash_df[['sha1','has_cactus']].drop_duplicates('sha1'), on='sha1', how='left')
n_match = test_leak['has_cactus'].notna().sum()
print(f"[leak] test images with exact-hash match to train: {n_match} / {len(test_hash_df)} ({n_match/len(test_hash_df)*100:.1f}%)")

# Build lookup map (sha1 -> label) for inference; prefer majority label if duplicates exist
label_by_sha1 = (train_hash_df.groupby('sha1')['has_cactus']
                 .mean().round().astype(int).to_dict())
import json
with open('train_sha1_label_map.json', 'w') as f:
    json.dump(label_by_sha1, f)
print('[leak] saved train_sha1_label_map.json (sha1 -> majority label)')

# Prepare groups for CV: use exact sha1 as group
train_hash_df[['id','sha1','has_cactus']].to_csv('cv_groups.csv', index=False)
print('[cv] saved cv_groups.csv (id, sha1, has_cactus) for StratifiedGroupKFold')

gc.collect();

[hash] 2000/14175 processed; elapsed 0.9s


[hash] 4000/14175 processed; elapsed 1.8s


[hash] 6000/14175 processed; elapsed 2.7s


[hash] 8000/14175 processed; elapsed 3.7s


[hash] 10000/14175 processed; elapsed 4.6s


[hash] 12000/14175 processed; elapsed 5.5s


[hash] 14000/14175 processed; elapsed 6.5s


[hash] train hashing done in 6.6s; rows=14175


[hash] 1000/3325 processed; elapsed 0.5s


[hash] 2000/3325 processed; elapsed 0.9s


[hash] 3000/3325 processed; elapsed 1.4s


[hash] test hashing done in 1.5s; rows=3325


[hash] saved train_hashes.csv and test_hashes.csv
[dups] exact-duplicate groups in train: 0 (total dup images: 0)
[dups] label conflicts across identical images: 0
[leak] test images with exact-hash match to train: 0 / 3325 (0.0%)
[leak] saved train_sha1_label_map.json (sha1 -> majority label)
[cv] saved cv_groups.csv (id, sha1, has_cactus) for StratifiedGroupKFold


In [7]:
# ResNet18 (torchvision) with StratifiedGroupKFold, OOF AUC, TTA, and submission
import os, math, time, random, json, gc
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet18_Weights

# Ensure writable caches for any libs that may try to use ~/.cache
CACHE_DIR = Path('./.model_cache')
for env_key in ['TORCH_HOME', 'XDG_CACHE_HOME']:
    os.environ[env_key] = str(CACHE_DIR)
CACHE_DIR.mkdir(parents=True, exist_ok=True)

SEED = 42
IMG_SIZE = 96
N_FOLDS = 5
EPOCHS = 12
BATCH_SIZE = 256
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

def set_seed(seed=SEED):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class CactusDataset(Dataset):
    def __init__(self, img_dir, df, mode='train', tta_flip=None, tta_vflip=False):
        # mode in {'train','valid','test'}
        self.img_dir = Path(img_dir)
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.tta_flip = tta_flip  # None or 'hflip'
        self.tta_vflip = tta_vflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.tta_flip == 'hflip':
            img = np.ascontiguousarray(img[:, ::-1, :])
        if self.tta_vflip:
            img = np.ascontiguousarray(img[::-1, :, :])
        aug = self.tfms(image=img)
        img_t = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return img_t, y
        return img_t, row['id']

def build_model():
    model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, 1)
    return model

def make_optimizer(model):
    # Differential LR: smaller for backbone, larger for head
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train()
    running = 0.0
    for i, (x, y) in enumerate(loader):
        x = x.to(DEVICE, non_blocking=True); y = y.to(DEVICE, non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits = model(x)
            loss = loss_fn(logits, y)
        scaler.scale(loss).backward()
        scaler.step(optimizer); scaler.update()
        running += loss.item()*x.size(0)
        if (i+1) % 20 == 0:
            print(f"  [train] step {i+1}/{len(loader)} loss={running/((i+1)*loader.batch_size):.4f}", flush=True)
    return running/len(loader.dataset)

def valid_one_epoch(model, loader):
    model.eval()
    preds, targs = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(DEVICE, non_blocking=True); y = y.view(-1,1)
            logits = model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds = np.concatenate(preds); targs = np.concatenate(targs)
    auc = roc_auc_score(targs, preds)
    return auc, preds

def predict_loader(model, loader):
    model.eval()
    out_ids, out_preds = [], []
    with torch.no_grad():
        for x, ids in loader:
            x = x.to(DEVICE, non_blocking=True)
            logits = model(x)
            out_preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            out_ids += list(ids)
    return np.concatenate(out_preds), out_ids

# Load data and groups
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
oof = np.zeros(len(train_df), dtype=np.float32)
test_pred_accum = np.zeros(len(test_ids), dtype=np.float32)

all_fold_aucs = []
t_all = time.time()
for fold, (tr_idx, va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
    print(f"===== Fold {fold+1}/{N_FOLDS} =====")
    tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
    va_df = train_df.iloc[va_idx].reset_index(drop=True)
    print(f"train size: {len(tr_df)} | valid size: {len(va_df)}")

    tr_ds = CactusDataset('train', tr_df, mode='train')
    va_ds = CactusDataset('train', va_df, mode='valid')
    tr_loader = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
    va_loader = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

    model = build_model().to(DEVICE)
    optimizer = make_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))

    best_auc = -1.0; best_state = None; no_improve = 0
    t_fold0 = time.time()
    for epoch in range(1, EPOCHS+1):
        t0 = time.time()
        tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler)
        val_auc, val_preds = valid_one_epoch(model, va_loader)
        scheduler.step()
        elapsed = time.time()-t0
        # Log current LRs
        lrs = [pg['lr'] for pg in optimizer.param_groups]
        print(f"[fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {elapsed:.1f}s", flush=True)
        if val_auc > best_auc:
            best_auc = val_auc; best_state = {k: v.cpu() for k,v in model.state_dict().items()}; no_improve = 0
        else:
            no_improve += 1
        if no_improve >= 3:
            print(f"[fold {fold}] Early stopping at epoch {epoch}")
            break
    print(f"[fold {fold}] best val AUC: {best_auc:.6f} | fold time {time.time()-t_fold0:.1f}s")
    all_fold_aucs.append(best_auc)
    # Load best and infer OOF
    model.load_state_dict({k: v.to(DEVICE) for k, v in best_state.items()}, strict=True)
    va_loader = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
    _, val_preds = valid_one_epoch(model, va_loader)
    oof[va_idx] = val_preds

    # Test inference with TTA (orig + hflip + vflip)
    test_ds = CactusDataset('test', test_ids, mode='test', tta_flip=None, tta_vflip=False)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
    p0, ids = predict_loader(model, test_loader)

    test_ds_h = CactusDataset('test', test_ids, mode='test', tta_flip='hflip', tta_vflip=False)
    test_loader_h = DataLoader(test_ds_h, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
    p1, _ = predict_loader(model, test_loader_h)

    test_ds_v = CactusDataset('test', test_ids, mode='test', tta_flip=None, tta_vflip=True)
    test_loader_v = DataLoader(test_ds_v, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
    p2, _ = predict_loader(model, test_loader_v)

    p = (p0 + p1 + p2) / 3.0
    test_pred_accum += p.astype(np.float32)
    print(f"[fold {fold}] test inference done.")
    del model, optimizer, loss_fn, scaler, tr_loader, va_loader, test_loader, test_loader_h, test_loader_v
    gc.collect(); torch.cuda.empty_cache()

oof_auc = roc_auc_score(train_df['has_cactus'].values, oof)
print(f"OOF AUC: {oof_auc:.6f}")
print("Fold AUCs:", [f"{a:.6f}" for a in all_fold_aucs])

# Save OOF
np.save('oof_preds.npy', oof)
pd.DataFrame({'id': train_df['id'], 'oof_pred': oof, 'has_cactus': train_df['has_cactus']}).to_csv('oof_preds.csv', index=False)

# Average across folds
test_pred = test_pred_accum / N_FOLDS

# Exact-hash lookup (expected 0 overrides here)
with open('train_sha1_label_map.json', 'r') as f:
    sha1_to_label = json.load(f)
test_hash_df = pd.read_csv('test_hashes.csv')
lookup = test_hash_df[['id','sha1']].copy()
lookup['label_from_train'] = lookup['sha1'].map(sha1_to_label).astype('float32')
overrides = lookup['label_from_train'].notna().sum()
print(f"[inference] exact-hash overrides in test: {overrides}")

sub = test_ids.copy()
sub = sub.merge(lookup[['id','label_from_train']], on='id', how='left')
sub['pred'] = test_pred
sub['has_cactus'] = np.where(sub['label_from_train'].notna(), sub['label_from_train'], sub['pred'])
submission = sub[['id','has_cactus']].copy()
submission.to_csv('submission.csv', index=False)
print('Saved submission.csv with shape', submission.shape)

===== Fold 1/5 =====
train size: 11340 | valid size: 2835
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to .model_cache/hub/checkpoints/resnet18-f37072fd.pth


  original_init(self, **validated_kwargs)


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

 21%|██        | 9.25M/44.7M [00:00<00:00, 96.6MB/s]

 71%|███████   | 31.5M/44.7M [00:00<00:00, 176MB/s] 

100%|██████████| 44.7M/44.7M [00:00<00:00, 180MB/s]

  [train] step 20/44 loss=0.1614


  [train] step 40/44 loss=0.0958


[fold 0] epoch 01 | tr_loss 0.0883 | val_auc 0.999877 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.2s


  [train] step 20/44 loss=0.0183


  [train] step 40/44 loss=0.0144


[fold 0] epoch 02 | tr_loss 0.0153 | val_auc 0.999963 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.3s


  [train] step 20/44 loss=0.0090


  [train] step 40/44 loss=0.0074


[fold 0] epoch 03 | tr_loss 0.0080 | val_auc 0.999987 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.3s


  [train] step 20/44 loss=0.0061


  [train] step 40/44 loss=0.0091


[fold 0] epoch 04 | tr_loss 0.0090 | val_auc 0.999962 | lrs [7.75e-05, 0.0007524999999999999] | 7.4s


  [train] step 20/44 loss=0.0049


  [train] step 40/44 loss=0.0048


[fold 0] epoch 05 | tr_loss 0.0046 | val_auc 0.999980 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.3s


  [train] step 20/44 loss=0.0029


  [train] step 40/44 loss=0.0029


[fold 0] epoch 06 | tr_loss 0.0027 | val_auc 0.999989 | lrs [5.5e-05, 0.000505] | 7.3s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0022


[fold 0] epoch 07 | tr_loss 0.0021 | val_auc 0.999993 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.2s


  [train] step 20/44 loss=0.0027


  [train] step 40/44 loss=0.0031


[fold 0] epoch 08 | tr_loss 0.0028 | val_auc 0.999990 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.3s


  [train] step 20/44 loss=0.0018


  [train] step 40/44 loss=0.0023


[fold 0] epoch 09 | tr_loss 0.0022 | val_auc 0.999984 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.4s


  [train] step 20/44 loss=0.0015


  [train] step 40/44 loss=0.0019


[fold 0] epoch 10 | tr_loss 0.0018 | val_auc 0.999986 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.4s


[fold 0] Early stopping at epoch 10
[fold 0] best val AUC: 0.999993 | fold time 73.5s


[fold 0] test inference done.


===== Fold 2/5 =====
train size: 11340 | valid size: 2835


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1494


  [train] step 40/44 loss=0.0881


[fold 1] epoch 01 | tr_loss 0.0814 | val_auc 0.999604 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0191


  [train] step 40/44 loss=0.0178


[fold 1] epoch 02 | tr_loss 0.0174 | val_auc 0.999864 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0099


  [train] step 40/44 loss=0.0085


[fold 1] epoch 03 | tr_loss 0.0080 | val_auc 0.999925 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0068


  [train] step 40/44 loss=0.0072


[fold 1] epoch 04 | tr_loss 0.0076 | val_auc 0.999937 | lrs [7.75e-05, 0.0007524999999999999] | 7.3s


  [train] step 20/44 loss=0.0057


  [train] step 40/44 loss=0.0058


[fold 1] epoch 05 | tr_loss 0.0059 | val_auc 0.999929 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.3s


  [train] step 20/44 loss=0.0043


  [train] step 40/44 loss=0.0043


[fold 1] epoch 06 | tr_loss 0.0047 | val_auc 0.999947 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0014


  [train] step 40/44 loss=0.0030


[fold 1] epoch 07 | tr_loss 0.0029 | val_auc 0.999958 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0027


  [train] step 40/44 loss=0.0024


[fold 1] epoch 08 | tr_loss 0.0023 | val_auc 0.999957 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.5s


  [train] step 20/44 loss=0.0015


  [train] step 40/44 loss=0.0017


[fold 1] epoch 09 | tr_loss 0.0018 | val_auc 0.999956 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0028


[fold 1] epoch 10 | tr_loss 0.0026 | val_auc 0.999960 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.3s


  [train] step 20/44 loss=0.0011


  [train] step 40/44 loss=0.0011


[fold 1] epoch 11 | tr_loss 0.0010 | val_auc 0.999957 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.5s


  [train] step 20/44 loss=0.0014


  [train] step 40/44 loss=0.0015


[fold 1] epoch 12 | tr_loss 0.0014 | val_auc 0.999960 | lrs [1e-05, 1e-05] | 7.4s


[fold 1] best val AUC: 0.999960 | fold time 89.1s


[fold 1] test inference done.


===== Fold 3/5 =====
train size: 11340 | valid size: 2835


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1599


  [train] step 40/44 loss=0.0944


[fold 2] epoch 01 | tr_loss 0.0868 | val_auc 0.999868 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0148


  [train] step 40/44 loss=0.0134


[fold 2] epoch 02 | tr_loss 0.0146 | val_auc 0.999971 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0100


  [train] step 40/44 loss=0.0125


[fold 2] epoch 03 | tr_loss 0.0118 | val_auc 0.999986 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0084


  [train] step 40/44 loss=0.0067


[fold 2] epoch 04 | tr_loss 0.0066 | val_auc 0.999995 | lrs [7.75e-05, 0.0007524999999999999] | 7.3s


  [train] step 20/44 loss=0.0077


  [train] step 40/44 loss=0.0080


[fold 2] epoch 05 | tr_loss 0.0077 | val_auc 0.999993 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.4s


  [train] step 20/44 loss=0.0060


  [train] step 40/44 loss=0.0057


[fold 2] epoch 06 | tr_loss 0.0057 | val_auc 0.999997 | lrs [5.5e-05, 0.000505] | 7.3s


  [train] step 20/44 loss=0.0038


  [train] step 40/44 loss=0.0051


[fold 2] epoch 07 | tr_loss 0.0048 | val_auc 0.999993 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.3s


  [train] step 20/44 loss=0.0028


  [train] step 40/44 loss=0.0034


[fold 2] epoch 08 | tr_loss 0.0037 | val_auc 1.000000 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.3s


  [train] step 20/44 loss=0.0035


  [train] step 40/44 loss=0.0030


[fold 2] epoch 09 | tr_loss 0.0028 | val_auc 0.999999 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.3s


  [train] step 20/44 loss=0.0017


  [train] step 40/44 loss=0.0033


[fold 2] epoch 10 | tr_loss 0.0031 | val_auc 0.999999 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.3s


  [train] step 20/44 loss=0.0032


  [train] step 40/44 loss=0.0024


[fold 2] epoch 11 | tr_loss 0.0024 | val_auc 1.000000 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.2s


[fold 2] Early stopping at epoch 11
[fold 2] best val AUC: 1.000000 | fold time 81.0s


[fold 2] test inference done.


===== Fold 4/5 =====
train size: 11340 | valid size: 2835


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1843


  [train] step 40/44 loss=0.1079


[fold 3] epoch 01 | tr_loss 0.0991 | val_auc 0.999838 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0222


  [train] step 40/44 loss=0.0173


[fold 3] epoch 02 | tr_loss 0.0163 | val_auc 0.999898 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0118


  [train] step 40/44 loss=0.0114


[fold 3] epoch 03 | tr_loss 0.0118 | val_auc 0.999964 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0096


  [train] step 40/44 loss=0.0087


[fold 3] epoch 04 | tr_loss 0.0091 | val_auc 0.999970 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0077


  [train] step 40/44 loss=0.0076


[fold 3] epoch 05 | tr_loss 0.0074 | val_auc 0.999847 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0035


  [train] step 40/44 loss=0.0046


[fold 3] epoch 06 | tr_loss 0.0044 | val_auc 0.999986 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0044


  [train] step 40/44 loss=0.0046


[fold 3] epoch 07 | tr_loss 0.0043 | val_auc 0.999985 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.5s


  [train] step 20/44 loss=0.0021


  [train] step 40/44 loss=0.0022


[fold 3] epoch 08 | tr_loss 0.0022 | val_auc 0.999981 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.5s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0028


[fold 3] epoch 09 | tr_loss 0.0030 | val_auc 0.999977 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.4s


[fold 3] Early stopping at epoch 9
[fold 3] best val AUC: 0.999986 | fold time 67.4s


[fold 3] test inference done.


===== Fold 5/5 =====
train size: 11340 | valid size: 2835


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1557


  [train] step 40/44 loss=0.0923


[fold 4] epoch 01 | tr_loss 0.0849 | val_auc 0.999915 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0189


  [train] step 40/44 loss=0.0168


[fold 4] epoch 02 | tr_loss 0.0160 | val_auc 0.999974 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.3s


  [train] step 20/44 loss=0.0142


  [train] step 40/44 loss=0.0117


[fold 4] epoch 03 | tr_loss 0.0113 | val_auc 0.999988 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.3s


  [train] step 20/44 loss=0.0073


  [train] step 40/44 loss=0.0083


[fold 4] epoch 04 | tr_loss 0.0079 | val_auc 0.999990 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0048


  [train] step 40/44 loss=0.0052


[fold 4] epoch 05 | tr_loss 0.0056 | val_auc 0.999988 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.4s


  [train] step 20/44 loss=0.0065


  [train] step 40/44 loss=0.0059


[fold 4] epoch 06 | tr_loss 0.0055 | val_auc 0.999992 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0072


  [train] step 40/44 loss=0.0060


[fold 4] epoch 07 | tr_loss 0.0055 | val_auc 0.999992 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0016


  [train] step 40/44 loss=0.0021


[fold 4] epoch 08 | tr_loss 0.0021 | val_auc 0.999992 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.4s


  [train] step 20/44 loss=0.0026


  [train] step 40/44 loss=0.0020


[fold 4] epoch 09 | tr_loss 0.0020 | val_auc 0.999993 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.3s


  [train] step 20/44 loss=0.0026


  [train] step 40/44 loss=0.0019


[fold 4] epoch 10 | tr_loss 0.0022 | val_auc 0.999994 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.5s


  [train] step 20/44 loss=0.0023


  [train] step 40/44 loss=0.0021


[fold 4] epoch 11 | tr_loss 0.0019 | val_auc 0.999994 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.4s


  [train] step 20/44 loss=0.0014


  [train] step 40/44 loss=0.0024


[fold 4] epoch 12 | tr_loss 0.0023 | val_auc 0.999994 | lrs [1e-05, 1e-05] | 7.5s


[fold 4] best val AUC: 0.999994 | fold time 88.9s


[fold 4] test inference done.


OOF AUC: 0.999985
Fold AUCs: ['0.999993', '0.999960', '1.000000', '0.999986', '0.999994']
[inference] exact-hash overrides in test: 0
Saved submission.csv with shape (3325, 2)


In [8]:
# ResNet18 re-run with Dihedral-8 TTA and LOGIT averaging across TTAs and folds
import os, time, json, gc, random, math
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet18_Weights

CACHE_DIR = Path('./.model_cache'); CACHE_DIR.mkdir(parents=True, exist_ok=True)
os.environ['TORCH_HOME'] = str(CACHE_DIR)
os.environ['XDG_CACHE_HOME'] = str(CACHE_DIR)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SEED = 42
IMG_SIZE = 96
N_FOLDS = 5
EPOCHS = 12
BATCH_SIZE = 256
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4

def set_seed(seed=SEED):
    import numpy as _np, random as _random, torch as _torch
    _random.seed(seed); _np.random.seed(seed); _torch.manual_seed(seed); _torch.cuda.manual_seed_all(seed)
    _torch.backends.cudnn.deterministic = True
    _torch.backends.cudnn.benchmark = False
set_seed()

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class CactusTTADataset(Dataset):
    def __init__(self, img_dir, df, mode='train', rot_k=0, hflip=False):
        self.img_dir = Path(img_dir)
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.rot_k = rot_k  # 0,1,2,3
        self.hflip = hflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        img_t = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return img_t, y
        return img_t, row['id']

def build_model():
    m = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, 1)
    return m

def make_optimizer(model):
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train(); total=0.0
    for i,(x,y) in enumerate(loader):
        x=x.to(DEVICE,non_blocking=True); y=y.to(DEVICE,non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits=model(x); loss=loss_fn(logits,y)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        total += loss.item()*x.size(0)
        if (i+1)%20==0: print(f"  [train] step {i+1}/{len(loader)} loss={total/((i+1)*loader.batch_size):.4f}", flush=True)
    return total/len(loader.dataset)

def valid_one_epoch(model, loader):
    model.eval(); preds=[]; targs=[]
    with torch.no_grad():
        for x,y in loader:
            x=x.to(DEVICE,non_blocking=True); y=y.view(-1,1)
            logits=model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds=np.concatenate(preds); targs=np.concatenate(targs)
    return roc_auc_score(targs,preds), preds

def predict_logits(model, loader):
    model.eval(); out_logits=[]; out_ids=[]
    with torch.no_grad():
        for x,ids in loader:
            x=x.to(DEVICE,non_blocking=True)
            logits=model(x).squeeze(1).cpu().numpy()
            out_logits.append(logits); out_ids += list(ids)
    return np.concatenate(out_logits), out_ids

# Data
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
oof = np.zeros(len(train_df), dtype=np.float32)
test_logit_accum = np.zeros(len(test_ids), dtype=np.float32)

fold_aucs=[]
t0_all=time.time()
for fold,(tr_idx,va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
    print(f"===== D8 ReRun Fold {fold+1}/{N_FOLDS} =====")
    tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
    va_df = train_df.iloc[va_idx].reset_index(drop=True)
    tr_loader = DataLoader(CactusTTADataset('train', tr_df, mode='train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
    va_loader = DataLoader(CactusTTADataset('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

    model = build_model().to(DEVICE)
    optimizer = make_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))

    best_auc=-1.0; best_state=None; no_imp=0; t_fold=time.time()
    for epoch in range(1,EPOCHS+1):
        t_ep=time.time()
        tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler)
        val_auc, _ = valid_one_epoch(model, va_loader)
        scheduler.step()
        lrs=[pg['lr'] for pg in optimizer.param_groups]
        print(f"[fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {time.time()-t_ep:.1f}s", flush=True)
        if val_auc > best_auc: best_auc=val_auc; best_state={k:v.cpu() for k,v in model.state_dict().items()}; no_imp=0
        else: no_imp+=1
        if no_imp>=3: print(f"[fold {fold}] early stop at {epoch}"); break
    print(f"[fold {fold}] best val AUC: {best_auc:.6f} | fold time {time.time()-t_fold:.1f}s")
    fold_aucs.append(best_auc)
    model.load_state_dict({k:v.to(DEVICE) for k,v in best_state.items()}, strict=True)
    # OOF with best
    va_loader = DataLoader(CactusTTADataset('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
    _, va_preds = valid_one_epoch(model, va_loader)
    oof[va_idx] = va_preds

    # Dihedral-8 TTA logits: rot_k in {0,1,2,3} x hflip {False,True}
    tta_logits_sum = np.zeros(len(test_ids), dtype=np.float32)
    for rot_k in (0,1,2,3):
        for hf in (False, True):
            ds = CactusTTADataset('test', test_ids, mode='test', rot_k=rot_k, hflip=hf)
            dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
            logits, ids = predict_logits(model, dl)
            tta_logits_sum += logits.astype(np.float32)
    fold_logits = tta_logits_sum / 8.0
    test_logit_accum += fold_logits
    print(f"[fold {fold}] test D8 TTA inference done.")
    del model, optimizer, loss_fn, scaler, tr_loader, va_loader
    gc.collect(); torch.cuda.empty_cache()

oof_auc = roc_auc_score(train_df['has_cactus'].values, oof)
print(f"D8 OOF AUC: {oof_auc:.6f}")
print('D8 Fold AUCs:', [f"{a:.6f}" for a in fold_aucs])
np.save('oof_preds_d8.npy', oof)
pd.DataFrame({'id': train_df['id'], 'oof_pred': oof, 'has_cactus': train_df['has_cactus']}).to_csv('oof_preds_d8.csv', index=False)

# Logit averaging across folds -> sigmoid once
avg_test_logits = test_logit_accum / N_FOLDS
test_pred = 1.0 / (1.0 + np.exp(-avg_test_logits))

# Exact-hash override (should be zero)
with open('train_sha1_label_map.json', 'r') as f:
    sha1_to_label = json.load(f)
test_hash_df = pd.read_csv('test_hashes.csv')
lookup = test_hash_df[['id','sha1']].copy()
lookup['label_from_train'] = lookup['sha1'].map(sha1_to_label).astype('float32')
overrides = lookup['label_from_train'].notna().sum()
print(f"[inference-D8] exact-hash overrides in test: {overrides}")

sub = test_ids.copy()
sub = sub.merge(lookup[['id','label_from_train']], on='id', how='left')
sub['pred'] = test_pred
sub['has_cactus'] = np.where(sub['label_from_train'].notna(), sub['label_from_train'], sub['pred'])
submission = sub[['id','has_cactus']].copy()
submission.to_csv('submission_d8.csv', index=False)
print('Saved submission_d8.csv with shape', submission.shape)

  [train] step 20/44 loss=0.1658


  [train] step 40/44 loss=0.0961


[fold 0] epoch 01 | tr_loss 0.0880 | val_auc 0.999871 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0163


  [train] step 40/44 loss=0.0150


[fold 0] epoch 02 | tr_loss 0.0161 | val_auc 0.999982 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0080


  [train] step 40/44 loss=0.0072


[fold 0] epoch 03 | tr_loss 0.0079 | val_auc 0.999989 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0060


  [train] step 40/44 loss=0.0087


[fold 0] epoch 04 | tr_loss 0.0080 | val_auc 0.999986 | lrs [7.75e-05, 0.0007524999999999999] | 7.4s


  [train] step 20/44 loss=0.0048


  [train] step 40/44 loss=0.0040


[fold 0] epoch 05 | tr_loss 0.0038 | val_auc 0.999984 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.4s


  [train] step 20/44 loss=0.0045


  [train] step 40/44 loss=0.0036


[fold 0] epoch 06 | tr_loss 0.0042 | val_auc 0.999988 | lrs [5.5e-05, 0.000505] | 7.4s


[fold 0] early stop at 6
[fold 0] best val AUC: 0.999989 | fold time 44.8s


[fold 0] test D8 TTA inference done.


===== D8 ReRun Fold 2/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1467


  [train] step 40/44 loss=0.0950


[fold 1] epoch 01 | tr_loss 0.0872 | val_auc 0.999568 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0146


  [train] step 40/44 loss=0.0155


[fold 1] epoch 02 | tr_loss 0.0145 | val_auc 0.999811 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0121


  [train] step 40/44 loss=0.0098


[fold 1] epoch 03 | tr_loss 0.0095 | val_auc 0.999806 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0045


  [train] step 40/44 loss=0.0055


[fold 1] epoch 04 | tr_loss 0.0054 | val_auc 0.999829 | lrs [7.75e-05, 0.0007524999999999999] | 7.3s


  [train] step 20/44 loss=0.0055


  [train] step 40/44 loss=0.0063


[fold 1] epoch 05 | tr_loss 0.0060 | val_auc 0.999901 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.2s


  [train] step 20/44 loss=0.0042


  [train] step 40/44 loss=0.0039


[fold 1] epoch 06 | tr_loss 0.0036 | val_auc 0.999934 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0052


  [train] step 40/44 loss=0.0058


[fold 1] epoch 07 | tr_loss 0.0057 | val_auc 0.999909 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0030


  [train] step 40/44 loss=0.0023


[fold 1] epoch 08 | tr_loss 0.0028 | val_auc 0.999942 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.3s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0024


[fold 1] epoch 09 | tr_loss 0.0024 | val_auc 0.999944 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.4s


  [train] step 20/44 loss=0.0019


  [train] step 40/44 loss=0.0020


[fold 1] epoch 10 | tr_loss 0.0019 | val_auc 0.999958 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.3s


  [train] step 20/44 loss=0.0014


  [train] step 40/44 loss=0.0011


[fold 1] epoch 11 | tr_loss 0.0012 | val_auc 0.999957 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.5s


  [train] step 20/44 loss=0.0007


  [train] step 40/44 loss=0.0007


[fold 1] epoch 12 | tr_loss 0.0007 | val_auc 0.999958 | lrs [1e-05, 1e-05] | 7.4s


[fold 1] best val AUC: 0.999958 | fold time 88.9s


[fold 1] test D8 TTA inference done.


===== D8 ReRun Fold 3/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1671


  [train] step 40/44 loss=0.1002


[fold 2] epoch 01 | tr_loss 0.0926 | val_auc 0.999930 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0253


  [train] step 40/44 loss=0.0217


[fold 2] epoch 02 | tr_loss 0.0203 | val_auc 0.999973 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0078


  [train] step 40/44 loss=0.0094


[fold 2] epoch 03 | tr_loss 0.0095 | val_auc 0.999983 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0071


  [train] step 40/44 loss=0.0076


[fold 2] epoch 04 | tr_loss 0.0074 | val_auc 0.999990 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0079


  [train] step 40/44 loss=0.0056


[fold 2] epoch 05 | tr_loss 0.0055 | val_auc 0.999989 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.4s


  [train] step 20/44 loss=0.0082


  [train] step 40/44 loss=0.0077


[fold 2] epoch 06 | tr_loss 0.0076 | val_auc 0.999997 | lrs [5.5e-05, 0.000505] | 7.5s


  [train] step 20/44 loss=0.0034


  [train] step 40/44 loss=0.0040


[fold 2] epoch 07 | tr_loss 0.0041 | val_auc 0.999989 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0031


  [train] step 40/44 loss=0.0027


[fold 2] epoch 08 | tr_loss 0.0026 | val_auc 0.999997 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.4s


  [train] step 20/44 loss=0.0025


  [train] step 40/44 loss=0.0027


[fold 2] epoch 09 | tr_loss 0.0026 | val_auc 1.000000 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


  [train] step 20/44 loss=0.0022


  [train] step 40/44 loss=0.0015


[fold 2] epoch 10 | tr_loss 0.0014 | val_auc 1.000000 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.4s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0027


[fold 2] epoch 11 | tr_loss 0.0026 | val_auc 0.999997 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.5s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0023


[fold 2] epoch 12 | tr_loss 0.0026 | val_auc 0.999999 | lrs [1e-05, 1e-05] | 7.3s


[fold 2] early stop at 12
[fold 2] best val AUC: 1.000000 | fold time 89.3s


[fold 2] test D8 TTA inference done.


===== D8 ReRun Fold 4/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1756


  [train] step 40/44 loss=0.1018


[fold 3] epoch 01 | tr_loss 0.0944 | val_auc 0.999866 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.7s


  [train] step 20/44 loss=0.0165


  [train] step 40/44 loss=0.0144


[fold 3] epoch 02 | tr_loss 0.0152 | val_auc 0.999883 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0095


  [train] step 40/44 loss=0.0108


[fold 3] epoch 03 | tr_loss 0.0106 | val_auc 0.999906 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0066


  [train] step 40/44 loss=0.0062


[fold 3] epoch 04 | tr_loss 0.0062 | val_auc 0.999950 | lrs [7.75e-05, 0.0007524999999999999] | 7.4s


  [train] step 20/44 loss=0.0045


  [train] step 40/44 loss=0.0048


[fold 3] epoch 05 | tr_loss 0.0046 | val_auc 0.999989 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0051


  [train] step 40/44 loss=0.0041


[fold 3] epoch 06 | tr_loss 0.0039 | val_auc 0.999981 | lrs [5.5e-05, 0.000505] | 7.5s


  [train] step 20/44 loss=0.0030


  [train] step 40/44 loss=0.0047


[fold 3] epoch 07 | tr_loss 0.0046 | val_auc 0.999988 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0019


  [train] step 40/44 loss=0.0028


[fold 3] epoch 08 | tr_loss 0.0027 | val_auc 0.999993 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.4s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0018


[fold 3] epoch 09 | tr_loss 0.0018 | val_auc 0.999995 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.6s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0025


[fold 3] epoch 10 | tr_loss 0.0027 | val_auc 0.999994 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.4s


  [train] step 20/44 loss=0.0016


  [train] step 40/44 loss=0.0016


[fold 3] epoch 11 | tr_loss 0.0015 | val_auc 0.999993 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.4s


  [train] step 20/44 loss=0.0032


  [train] step 40/44 loss=0.0021


[fold 3] epoch 12 | tr_loss 0.0019 | val_auc 0.999992 | lrs [1e-05, 1e-05] | 7.4s


[fold 3] early stop at 12
[fold 3] best val AUC: 0.999995 | fold time 89.9s


[fold 3] test D8 TTA inference done.


===== D8 ReRun Fold 5/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1357


  [train] step 40/44 loss=0.0796


[fold 4] epoch 01 | tr_loss 0.0747 | val_auc 0.999925 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0186


  [train] step 40/44 loss=0.0175


[fold 4] epoch 02 | tr_loss 0.0174 | val_auc 0.999981 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0091


  [train] step 40/44 loss=0.0101


[fold 4] epoch 03 | tr_loss 0.0097 | val_auc 0.999991 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0105


  [train] step 40/44 loss=0.0094


[fold 4] epoch 04 | tr_loss 0.0094 | val_auc 0.999991 | lrs [7.75e-05, 0.0007524999999999999] | 7.4s


  [train] step 20/44 loss=0.0052


  [train] step 40/44 loss=0.0061


[fold 4] epoch 05 | tr_loss 0.0059 | val_auc 0.999992 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.4s


  [train] step 20/44 loss=0.0045


  [train] step 40/44 loss=0.0058


[fold 4] epoch 06 | tr_loss 0.0058 | val_auc 0.999992 | lrs [5.5e-05, 0.000505] | 7.7s


  [train] step 20/44 loss=0.0026


  [train] step 40/44 loss=0.0026


[fold 4] epoch 07 | tr_loss 0.0029 | val_auc 0.999994 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.4s


  [train] step 20/44 loss=0.0020


  [train] step 40/44 loss=0.0018


[fold 4] epoch 08 | tr_loss 0.0018 | val_auc 0.999992 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


  [train] step 20/44 loss=0.0022


  [train] step 40/44 loss=0.0019


[fold 4] epoch 09 | tr_loss 0.0019 | val_auc 0.999993 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


  [train] step 20/44 loss=0.0016


  [train] step 40/44 loss=0.0025


[fold 4] epoch 10 | tr_loss 0.0023 | val_auc 1.000000 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.5s


  [train] step 20/44 loss=0.0012


  [train] step 40/44 loss=0.0012


[fold 4] epoch 11 | tr_loss 0.0011 | val_auc 0.999996 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.6s


  [train] step 20/44 loss=0.0022


  [train] step 40/44 loss=0.0016


[fold 4] epoch 12 | tr_loss 0.0016 | val_auc 0.999997 | lrs [1e-05, 1e-05] | 7.5s


[fold 4] best val AUC: 1.000000 | fold time 90.1s


[fold 4] test D8 TTA inference done.


D8 OOF AUC: 0.999980
D8 Fold AUCs: ['0.999989', '0.999958', '1.000000', '0.999995', '1.000000']
[inference-D8] exact-hash overrides in test: 0
Saved submission_d8.csv with shape (3325, 2)


In [9]:
# Multi-seed (42, 2025, 777) ResNet18 with EMA and Dihedral-8 TTA, logit-ensemble across folds and seeds
import os, time, json, gc, random
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet18_Weights

CACHE_DIR = Path('./.model_cache'); CACHE_DIR.mkdir(parents=True, exist_ok=True)
os.environ['TORCH_HOME'] = str(CACHE_DIR)
os.environ['XDG_CACHE_HOME'] = str(CACHE_DIR)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SEEDS = [42, 2025, 777]
IMG_SIZE = 96
N_FOLDS = 5
EPOCHS = 12
BATCH_SIZE = 256
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4
EMA_DECAY = 0.999
EARLY_VAL_THRESH = 0.99990

def set_seed(seed: int):
    import numpy as _np, random as _random, torch as _torch
    _random.seed(seed); _np.random.seed(seed); _torch.manual_seed(seed); _torch.cuda.manual_seed_all(seed)
    _torch.backends.cudnn.deterministic = True
    _torch.backends.cudnn.benchmark = False

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class TTADataset(Dataset):
    def __init__(self, img_dir, df, mode='train', rot_k=0, hflip=False):
        self.img_dir = Path(img_dir); self.df = df.reset_index(drop=True)
        self.mode = mode; self.rot_k = rot_k; self.hflip = hflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        x = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return x, y
        return x, row['id']

def build_model():
    m = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, 1)
    return m

def make_optimizer(model):
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

class EMA:
    def __init__(self, model, decay=EMA_DECAY):
        self.decay = decay
        self.shadow = {}
        for n, p in model.named_parameters():
            if p.requires_grad:
                self.shadow[n] = p.detach().clone()
    def update(self, model):
        for n, p in model.named_parameters():
            if not p.requires_grad: continue
            self.shadow[n].mul_(self.decay).add_(p.detach(), alpha=1.0 - self.decay)
    def copy_to(self, model):
        for n, p in model.named_parameters():
            if n in self.shadow:
                p.data.copy_(self.shadow[n].data)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler, ema: EMA):
    model.train(); total=0.0
    for i,(x,y) in enumerate(loader):
        x=x.to(DEVICE,non_blocking=True); y=y.to(DEVICE,non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits=model(x); loss=loss_fn(logits,y)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        if ema is not None: ema.update(model)
        total += loss.item()*x.size(0)
        if (i+1)%20==0: print(f"  [train] step {i+1}/{len(loader)} loss={total/((i+1)*loader.batch_size):.4f}", flush=True)
    return total/len(loader.dataset)

def valid_auc(model, loader):
    model.eval(); preds=[]; targs=[]
    with torch.no_grad():
        for x,y in loader:
            x=x.to(DEVICE,non_blocking=True); y=y.view(-1,1)
            logits=model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds=np.concatenate(preds); targs=np.concatenate(targs)
    return roc_auc_score(targs,preds)

def predict_logits(model, loader):
    model.eval(); out_logits=[]; out_ids=[]
    with torch.no_grad():
        for x,ids in loader:
            x=x.to(DEVICE,non_blocking=True)
            logits=model(x).squeeze(1).cpu().numpy()
            out_logits.append(logits); out_ids += list(ids)
    return np.concatenate(out_logits), out_ids

# Data
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

global_seed_logits = np.zeros(len(test_ids), dtype=np.float32)

for seed in SEEDS:
    print(f"===== SEED {seed} =====", flush=True)
    set_seed(seed)
    skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=seed)
    seed_test_logits_accum = np.zeros(len(test_ids), dtype=np.float32)
    bad_folds = 0
    for fold,(tr_idx,va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
        print(f"-- seed {seed} fold {fold+1}/{N_FOLDS}")
        tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
        va_df = train_df.iloc[va_idx].reset_index(drop=True)
        tr_loader = DataLoader(TTADataset('train', tr_df, mode='train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
        va_loader = DataLoader(TTADataset('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

        model = build_model().to(DEVICE)
        optimizer = make_optimizer(model)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
        loss_fn = nn.BCEWithLogitsLoss()
        scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))
        ema = EMA(model, decay=EMA_DECAY)

        best_auc = -1.0; best_state=None; no_imp=0
        for epoch in range(1, EPOCHS+1):
            t0=time.time()
            tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler, ema)
            # validate with EMA weights
            bak = {k: v.detach().clone() for k,v in model.state_dict().items()}
            ema.copy_to(model)
            val_auc = valid_auc(model, va_loader)
            # restore weights
            model.load_state_dict(bak, strict=True)
            scheduler.step()
            lrs=[pg['lr'] for pg in optimizer.param_groups]
            print(f"[seed {seed} fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {time.time()-t0:.1f}s")
            if val_auc > best_auc: best_auc=val_auc; best_state={k:v.cpu() for k,v in ema.shadow.items()}; no_imp=0
            else: no_imp+=1
            if epoch==3 and val_auc < EARLY_VAL_THRESH:
                bad_folds += 1
                print(f"[seed {seed} fold {fold}] early abort due to low AUC at epoch 3: {val_auc:.6f}")
                break
            if no_imp>=3:
                print(f"[seed {seed} fold {fold}] early stop at epoch {epoch}")
                break
        if bad_folds >= 2:
            print(f"[seed {seed}] aborting seed due to {bad_folds} bad folds")
            break
        # load best EMA state to model
        model.load_state_dict({k: v.to(DEVICE) for k,v in best_state.items()}, strict=False)
        # TTA D8 logits
        tta_logits_sum = np.zeros(len(test_ids), dtype=np.float32)
        for rot_k in (0,1,2,3):
            for hf in (False, True):
                ds = TTADataset('test', test_ids, mode='test', rot_k=rot_k, hflip=hf)
                dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
                logits, ids = predict_logits(model, dl)
                tta_logits_sum += logits.astype(np.float32)
        fold_logits = tta_logits_sum / 8.0
        seed_test_logits_accum += fold_logits
        print(f"[seed {seed} fold {fold}] D8 inference done; best val AUC {best_auc:.6f}")
        del model, optimizer, loss_fn, scaler, tr_loader, va_loader
        gc.collect(); torch.cuda.empty_cache()
    # average across folds for this seed
    if bad_folds < 2 and np.any(seed_test_logits_accum!=0):
        seed_avg_logits = seed_test_logits_accum / N_FOLDS
        global_seed_logits += seed_avg_logits
        print(f"[seed {seed}] added to ensemble.")
    else:
        print(f"[seed {seed}] skipped in ensemble due to insufficient folds.")

# average across seeds
n_seeds_used = len(SEEDS)
avg_logits = global_seed_logits / n_seeds_used
test_pred = 1.0 / (1.0 + np.exp(-avg_logits))

# Exact-hash overrides (should be zero)
with open('train_sha1_label_map.json', 'r') as f:
    sha1_to_label = json.load(f)
test_hash_df = pd.read_csv('test_hashes.csv')
lookup = test_hash_df[['id','sha1']].copy()
lookup['label_from_train'] = lookup['sha1'].map(sha1_to_label).astype('float32')
overrides = lookup['label_from_train'].notna().sum()
print(f"[multi-seed] exact-hash overrides in test: {overrides}")

sub = pd.read_csv('sample_submission.csv')
sub = sub.merge(lookup[['id','label_from_train']], on='id', how='left')
sub['pred'] = test_pred
sub['has_cactus'] = np.where(sub['label_from_train'].notna(), sub['label_from_train'], sub['pred'])
submission = sub[['id','has_cactus']].copy()
submission.to_csv('submission_seeds_d8.csv', index=False)
print('Saved submission_seeds_d8.csv with shape', submission.shape)

===== SEED 42 =====


-- seed 42 fold 1/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1672


  [train] step 40/44 loss=0.0972


[seed 42 fold 0] epoch 01 | tr_loss 0.0890 | val_auc 0.526545 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.3s


  [train] step 20/44 loss=0.0173


  [train] step 40/44 loss=0.0148


[seed 42 fold 0] epoch 02 | tr_loss 0.0164 | val_auc 0.619454 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.4s


  [train] step 20/44 loss=0.0064


  [train] step 40/44 loss=0.0076


[seed 42 fold 0] epoch 03 | tr_loss 0.0080 | val_auc 0.778835 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s
[seed 42 fold 0] early abort due to low AUC at epoch 3: 0.778835


[seed 42 fold 0] D8 inference done; best val AUC 0.778835


-- seed 42 fold 2/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1406


  [train] step 40/44 loss=0.0832


[seed 42 fold 1] epoch 01 | tr_loss 0.0769 | val_auc 0.476950 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0103


  [train] step 40/44 loss=0.0124


[seed 42 fold 1] epoch 02 | tr_loss 0.0132 | val_auc 0.685562 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0095


  [train] step 40/44 loss=0.0084


[seed 42 fold 1] epoch 03 | tr_loss 0.0079 | val_auc 0.842271 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s
[seed 42 fold 1] early abort due to low AUC at epoch 3: 0.842271
[seed 42] aborting seed due to 2 bad folds
[seed 42] skipped in ensemble due to insufficient folds.
===== SEED 2025 =====


-- seed 2025 fold 1/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1532


  [train] step 40/44 loss=0.0932


[seed 2025 fold 0] epoch 01 | tr_loss 0.0862 | val_auc 0.522217 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0123


  [train] step 40/44 loss=0.0109


[seed 2025 fold 0] epoch 02 | tr_loss 0.0112 | val_auc 0.668951 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0143


  [train] step 40/44 loss=0.0124


[seed 2025 fold 0] epoch 03 | tr_loss 0.0119 | val_auc 0.740689 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s
[seed 2025 fold 0] early abort due to low AUC at epoch 3: 0.740689


[seed 2025 fold 0] D8 inference done; best val AUC 0.740689


-- seed 2025 fold 2/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1569


  [train] step 40/44 loss=0.0905


[seed 2025 fold 1] epoch 01 | tr_loss 0.0832 | val_auc 0.431799 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.4s


  [train] step 20/44 loss=0.0131


  [train] step 40/44 loss=0.0142


[seed 2025 fold 1] epoch 02 | tr_loss 0.0144 | val_auc 0.590777 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.7s


  [train] step 20/44 loss=0.0108


  [train] step 40/44 loss=0.0119


[seed 2025 fold 1] epoch 03 | tr_loss 0.0113 | val_auc 0.729468 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s
[seed 2025 fold 1] early abort due to low AUC at epoch 3: 0.729468
[seed 2025] aborting seed due to 2 bad folds
[seed 2025] skipped in ensemble due to insufficient folds.
===== SEED 777 =====


-- seed 777 fold 1/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1598


  [train] step 40/44 loss=0.0912


[seed 777 fold 0] epoch 01 | tr_loss 0.0835 | val_auc 0.410984 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0163


  [train] step 40/44 loss=0.0164


[seed 777 fold 0] epoch 02 | tr_loss 0.0165 | val_auc 0.554892 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0146


  [train] step 40/44 loss=0.0115


[seed 777 fold 0] epoch 03 | tr_loss 0.0109 | val_auc 0.730270 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s
[seed 777 fold 0] early abort due to low AUC at epoch 3: 0.730270


[seed 777 fold 0] D8 inference done; best val AUC 0.730270


-- seed 777 fold 2/5


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1613


  [train] step 40/44 loss=0.0945


[seed 777 fold 1] epoch 01 | tr_loss 0.0878 | val_auc 0.406898 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0197


  [train] step 40/44 loss=0.0169


[seed 777 fold 1] epoch 02 | tr_loss 0.0170 | val_auc 0.507475 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0113


  [train] step 40/44 loss=0.0097


[seed 777 fold 1] epoch 03 | tr_loss 0.0092 | val_auc 0.616131 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s
[seed 777 fold 1] early abort due to low AUC at epoch 3: 0.616131
[seed 777] aborting seed due to 2 bad folds
[seed 777] skipped in ensemble due to insufficient folds.
[multi-seed] exact-hash overrides in test: 0
Saved submission_seeds_d8.csv with shape (3325, 2)


In [10]:
# phash nearest-neighbor overrides to improve AUC without retraining
import pandas as pd, numpy as np
from pathlib import Path
import json, time

def phash_hex_to_uint64(hexstr: str) -> np.uint64:
    return np.uint64(int(hexstr, 16))

# byte popcount lookup
POPCOUNT = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8)

def hamming_uint64(a: np.ndarray, b: np.uint64) -> np.ndarray:
    # a: (N,) uint64, b: scalar uint64
    x = np.bitwise_xor(a, b).view(np.uint8).reshape(-1, 8)  # 8 bytes
    return POPCOUNT[x].sum(axis=1).astype(np.uint8)

# Load phashes
train_hash_df = pd.read_csv('train_hashes.csv')  # columns: id, sha1, phash, has_cactus
test_hash_df = pd.read_csv('test_hashes.csv')    # columns: id, sha1, phash
sub = pd.read_csv('submission_d8.csv')  # strong base predictions

# Prepare arrays
train_codes = np.array([phash_hex_to_uint64(h) for h in train_hash_df['phash'].values], dtype=np.uint64)
train_labels = train_hash_df['has_cactus'].astype(np.uint8).values
test_codes = np.array([phash_hex_to_uint64(h) for h in test_hash_df['phash'].values], dtype=np.uint64)

# Brute-force nearest neighbor by Hamming for each test
t0 = time.time()
min_dists = np.empty(len(test_codes), dtype=np.uint8)
nearest_idx = np.empty(len(test_codes), dtype=np.int32)
for i, code in enumerate(test_codes):
    dists = hamming_uint64(train_codes, code)
    md = dists.min()
    min_dists[i] = md
    # pick nearest; tie-break by majority label among equals
    idxs = np.where(dists == md)[0]
    if len(idxs) == 1:
        nearest_idx[i] = idxs[0]
    else:
        # majority label; if tie, pick first
        lbls = train_labels[idxs]
        maj = 1 if lbls.mean() >= 0.5 else 0
        # choose first index with majority label
        choose = idxs[np.where(lbls == maj)[0][0]]
        nearest_idx[i] = choose
    if (i+1) % 500 == 0:
        print(f"[phash NN] {i+1}/{len(test_codes)} done in {time.time()-t0:.1f}s", flush=True)
print(f"[phash NN] completed in {time.time()-t0:.1f}s")

# Simple histogram to inspect distances
hist = np.bincount(min_dists.astype(int), minlength=65)
print('Hamming distance counts (first 10):', {i:int(hist[i]) for i in range(10)})

# Apply overrides for close matches (threshold tuned; start with <=3)
THRESH = 3
override_mask = min_dists <= THRESH
n_override = int(override_mask.sum())
print(f"[phash override] applying label override for {n_override}/{len(test_codes)} tests with dist <= {THRESH}")

override_labels = train_labels[nearest_idx].astype(np.float32)

out = sub.copy()
out = out.merge(test_hash_df[['id']].copy(), on='id', how='right')  # ensure same order as test_hash_df
assert len(out) == len(test_hash_df), 'Submission/test id mismatch'

new_probs = out['has_cactus'].values.astype(np.float32)
new_probs[override_mask] = override_labels[override_mask]
out['has_cactus'] = new_probs
out[['id','has_cactus']].to_csv('submission_phash_d8.csv', index=False)
print('Saved submission_phash_d8.csv with shape', out.shape)

[phash NN] 500/3325 done in 0.3s


[phash NN] 1000/3325 done in 0.7s


[phash NN] 1500/3325 done in 1.0s


[phash NN] 2000/3325 done in 1.4s


[phash NN] 2500/3325 done in 1.7s


[phash NN] 3000/3325 done in 2.1s


[phash NN] completed in 2.3s
Hamming distance counts (first 10): {0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 0, 6: 13, 7: 0, 8: 39, 9: 0}
[phash override] applying label override for 0/3325 tests with dist <= 3
Saved submission_phash_d8.csv with shape (3325, 2)


In [11]:
# 128px ResNet18 with Dihedral-8 TTA (logit-avg over TTAs and folds); produce submission_128_d8.csv
import os, time, json, gc
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet18_Weights

CACHE_DIR = Path('./.model_cache'); CACHE_DIR.mkdir(parents=True, exist_ok=True)
os.environ['TORCH_HOME'] = str(CACHE_DIR)
os.environ['XDG_CACHE_HOME'] = str(CACHE_DIR)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SEED = 42
IMG_SIZE = 128
N_FOLDS = 5
EPOCHS = 10
BATCH_SIZE = 128
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4

def set_seed(seed=SEED):
    import numpy as _np, random as _random, torch as _torch
    _random.seed(seed); _np.random.seed(seed); _torch.manual_seed(seed); _torch.cuda.manual_seed_all(seed)
    _torch.backends.cudnn.deterministic = True
    _torch.backends.cudnn.benchmark = False
set_seed()

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class TtaD8Dataset(Dataset):
    def __init__(self, img_dir, df, mode='train', rot_k=0, hflip=False):
        self.img_dir = Path(img_dir); self.df = df.reset_index(drop=True)
        self.mode = mode; self.rot_k = rot_k; self.hflip = hflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        x = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return x, y
        return x, row['id']

def build_model():
    m = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, 1)
    return m

def make_optimizer(model):
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train(); total=0.0
    for i,(x,y) in enumerate(loader):
        x=x.to(DEVICE,non_blocking=True); y=y.to(DEVICE,non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits=model(x); loss=loss_fn(logits,y)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        total += loss.item()*x.size(0)
        if (i+1)%20==0: print(f"  [train] step {i+1}/{len(loader)} loss={total/((i+1)*loader.batch_size):.4f}", flush=True)
    return total/len(loader.dataset)

def valid_auc(model, loader):
    model.eval(); preds=[]; targs=[]
    with torch.no_grad():
        for x,y in loader:
            x=x.to(DEVICE,non_blocking=True); y=y.view(-1,1)
            logits=model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds=np.concatenate(preds); targs=np.concatenate(targs)
    return roc_auc_score(targs,preds)

def predict_logits(model, loader):
    model.eval(); out_logits=[]; out_ids=[]
    with torch.no_grad():
        for x,ids in loader:
            x=x.to(DEVICE,non_blocking=True)
            logits=model(x).squeeze(1).cpu().numpy()
            out_logits.append(logits); out_ids += list(ids)
    return np.concatenate(out_logits), out_ids

# Data
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
test_logit_accum = np.zeros(len(test_ids), dtype=np.float32)

fold_aucs=[]
for fold,(tr_idx,va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
    print(f"===== 128px Fold {fold+1}/{N_FOLDS} =====")
    tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
    va_df = train_df.iloc[va_idx].reset_index(drop=True)
    tr_loader = DataLoader(TtaD8Dataset('train', tr_df, mode='train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
    va_loader = DataLoader(TtaD8Dataset('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

    model = build_model().to(DEVICE)
    optimizer = make_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))

    best_auc=-1.0; best_state=None; no_imp=0
    for epoch in range(1,EPOCHS+1):
        t0=time.time()
        tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler)
        val_auc = valid_auc(model, va_loader)
        scheduler.step()
        lrs=[pg['lr'] for pg in optimizer.param_groups]
        print(f"[fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {time.time()-t0:.1f}s", flush=True)
        if val_auc > best_auc: best_auc=val_auc; best_state={k:v.cpu() for k,v in model.state_dict().items()}; no_imp=0
        else: no_imp+=1
        if no_imp>=3: print(f"[fold {fold}] early stop at {epoch}"); break
    print(f"[fold {fold}] best val AUC: {best_auc:.6f}")
    fold_aucs.append(best_auc)
    model.load_state_dict({k:v.to(DEVICE) for k,v in best_state.items()}, strict=True)

    # Dihedral-8 TTA logits
    tta_logits_sum = np.zeros(len(test_ids), dtype=np.float32)
    for rot_k in (0,1,2,3):
        for hf in (False, True):
            ds = TtaD8Dataset('test', test_ids, mode='test', rot_k=rot_k, hflip=hf)
            dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
            logits, ids = predict_logits(model, dl)
            tta_logits_sum += logits.astype(np.float32)
    test_logit_accum += (tta_logits_sum / 8.0)
    print(f"[fold {fold}] 128px D8 TTA done")
    del model, optimizer, loss_fn, scaler, tr_loader, va_loader
    gc.collect(); torch.cuda.empty_cache()

# Average across folds -> logits then sigmoid
avg_test_logits = test_logit_accum / N_FOLDS
np.save('logits_128.npy', avg_test_logits)
test_pred = 1.0 / (1.0 + np.exp(-avg_test_logits))

# Save submission
sub = pd.read_csv('sample_submission.csv')
sub['has_cactus'] = test_pred.astype(np.float32)
sub.to_csv('submission_128_d8.csv', index=False)
print('Saved submission_128_d8.csv; fold AUCs:', [f"{a:.6f}" for a in fold_aucs])

===== 128px Fold 1/5 =====


  [train] step 20/88 loss=0.1567


  [train] step 40/88 loss=0.0939


  [train] step 60/88 loss=0.0694


  [train] step 80/88 loss=0.0571


[fold 0] epoch 01 | tr_loss 0.0528 | val_auc 0.999918 | lrs [9.779754323328192e-05, 0.0009757729755661011] | 8.9s


  [train] step 20/88 loss=0.0181


  [train] step 40/88 loss=0.0171


  [train] step 60/88 loss=0.0164


  [train] step 80/88 loss=0.0143


[fold 0] epoch 02 | tr_loss 0.0139 | val_auc 0.999987 | lrs [9.140576474687264e-05, 0.000905463412215599] | 8.8s


  [train] step 20/88 loss=0.0080


  [train] step 40/88 loss=0.0069


  [train] step 60/88 loss=0.0075


  [train] step 80/88 loss=0.0071


[fold 0] epoch 03 | tr_loss 0.0079 | val_auc 0.999982 | lrs [8.14503363531613e-05, 0.0007959536998847742] | 8.6s


  [train] step 20/88 loss=0.0062


  [train] step 40/88 loss=0.0044


  [train] step 60/88 loss=0.0049


  [train] step 80/88 loss=0.0059


[fold 0] epoch 04 | tr_loss 0.0059 | val_auc 0.999984 | lrs [6.890576474687264e-05, 0.000657963412215599] | 8.7s


  [train] step 20/88 loss=0.0056


  [train] step 40/88 loss=0.0051


  [train] step 60/88 loss=0.0046


  [train] step 80/88 loss=0.0044


[fold 0] epoch 05 | tr_loss 0.0043 | val_auc 0.999995 | lrs [5.500000000000001e-05, 0.000505] | 8.9s


  [train] step 20/88 loss=0.0015


  [train] step 40/88 loss=0.0028


  [train] step 60/88 loss=0.0025


  [train] step 80/88 loss=0.0036


[fold 0] epoch 06 | tr_loss 0.0037 | val_auc 0.999992 | lrs [4.109423525312737e-05, 0.0003520365877844011] | 8.6s


  [train] step 20/88 loss=0.0029


  [train] step 40/88 loss=0.0021


  [train] step 60/88 loss=0.0019


  [train] step 80/88 loss=0.0018


[fold 0] epoch 07 | tr_loss 0.0017 | val_auc 0.999989 | lrs [2.8549663646838717e-05, 0.00021404630011522585] | 8.6s


  [train] step 20/88 loss=0.0017


  [train] step 40/88 loss=0.0013


  [train] step 60/88 loss=0.0011


  [train] step 80/88 loss=0.0011


[fold 0] epoch 08 | tr_loss 0.0011 | val_auc 0.999994 | lrs [1.8594235253127375e-05, 0.00010453658778440107] | 8.8s


[fold 0] early stop at 8
[fold 0] best val AUC: 0.999995


[fold 0] 128px D8 TTA done


===== 128px Fold 2/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/88 loss=0.1540


  [train] step 40/88 loss=0.0937


  [train] step 60/88 loss=0.0699


  [train] step 80/88 loss=0.0571


[fold 1] epoch 01 | tr_loss 0.0536 | val_auc 0.999803 | lrs [9.779754323328192e-05, 0.0009757729755661011] | 8.7s


  [train] step 20/88 loss=0.0145


  [train] step 40/88 loss=0.0131


  [train] step 60/88 loss=0.0131


  [train] step 80/88 loss=0.0145


[fold 1] epoch 02 | tr_loss 0.0146 | val_auc 0.999967 | lrs [9.140576474687264e-05, 0.000905463412215599] | 9.0s


  [train] step 20/88 loss=0.0061


  [train] step 40/88 loss=0.0082


  [train] step 60/88 loss=0.0103


  [train] step 80/88 loss=0.0106


[fold 1] epoch 03 | tr_loss 0.0098 | val_auc 0.999972 | lrs [8.14503363531613e-05, 0.0007959536998847742] | 8.7s


  [train] step 20/88 loss=0.0062


  [train] step 40/88 loss=0.0047


  [train] step 60/88 loss=0.0052


  [train] step 80/88 loss=0.0053


[fold 1] epoch 04 | tr_loss 0.0052 | val_auc 0.999988 | lrs [6.890576474687264e-05, 0.000657963412215599] | 8.6s


  [train] step 20/88 loss=0.0052


  [train] step 40/88 loss=0.0043


  [train] step 60/88 loss=0.0036


  [train] step 80/88 loss=0.0046


[fold 1] epoch 05 | tr_loss 0.0042 | val_auc 0.999971 | lrs [5.500000000000001e-05, 0.000505] | 8.6s


  [train] step 20/88 loss=0.0056


  [train] step 40/88 loss=0.0056


  [train] step 60/88 loss=0.0047


  [train] step 80/88 loss=0.0045


[fold 1] epoch 06 | tr_loss 0.0044 | val_auc 0.999993 | lrs [4.109423525312737e-05, 0.0003520365877844011] | 8.7s


  [train] step 20/88 loss=0.0035


  [train] step 40/88 loss=0.0039


  [train] step 60/88 loss=0.0041


  [train] step 80/88 loss=0.0039


[fold 1] epoch 07 | tr_loss 0.0038 | val_auc 0.999992 | lrs [2.8549663646838717e-05, 0.00021404630011522585] | 8.8s


  [train] step 20/88 loss=0.0034


  [train] step 40/88 loss=0.0027


  [train] step 60/88 loss=0.0023


  [train] step 80/88 loss=0.0028


[fold 1] epoch 08 | tr_loss 0.0026 | val_auc 0.999979 | lrs [1.8594235253127375e-05, 0.00010453658778440107] | 8.6s


  [train] step 20/88 loss=0.0027


  [train] step 40/88 loss=0.0016


  [train] step 60/88 loss=0.0013


  [train] step 80/88 loss=0.0012


[fold 1] epoch 09 | tr_loss 0.0012 | val_auc 0.999987 | lrs [1.2202456766718093e-05, 3.4227024433899005e-05] | 8.8s


[fold 1] early stop at 9
[fold 1] best val AUC: 0.999993


[fold 1] 128px D8 TTA done


===== 128px Fold 3/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/88 loss=0.1707


  [train] step 40/88 loss=0.1033


  [train] step 60/88 loss=0.0797


  [train] step 80/88 loss=0.0663


[fold 2] epoch 01 | tr_loss 0.0615 | val_auc 0.999946 | lrs [9.779754323328192e-05, 0.0009757729755661011] | 8.6s


  [train] step 20/88 loss=0.0131


  [train] step 40/88 loss=0.0118


  [train] step 60/88 loss=0.0109


  [train] step 80/88 loss=0.0123


[fold 2] epoch 02 | tr_loss 0.0118 | val_auc 0.999997 | lrs [9.140576474687264e-05, 0.000905463412215599] | 8.6s


  [train] step 20/88 loss=0.0053


  [train] step 40/88 loss=0.0057


  [train] step 60/88 loss=0.0072


  [train] step 80/88 loss=0.0095


[fold 2] epoch 03 | tr_loss 0.0092 | val_auc 0.999989 | lrs [8.14503363531613e-05, 0.0007959536998847742] | 8.6s


  [train] step 20/88 loss=0.0078


  [train] step 40/88 loss=0.0091


  [train] step 60/88 loss=0.0076


  [train] step 80/88 loss=0.0081


[fold 2] epoch 04 | tr_loss 0.0084 | val_auc 0.999995 | lrs [6.890576474687264e-05, 0.000657963412215599] | 8.7s


  [train] step 20/88 loss=0.0053


  [train] step 40/88 loss=0.0058


  [train] step 60/88 loss=0.0058


  [train] step 80/88 loss=0.0060


[fold 2] epoch 05 | tr_loss 0.0057 | val_auc 0.999993 | lrs [5.500000000000001e-05, 0.000505] | 8.6s


[fold 2] early stop at 5
[fold 2] best val AUC: 0.999997


[fold 2] 128px D8 TTA done


===== 128px Fold 4/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/88 loss=0.1905


  [train] step 40/88 loss=0.1108


  [train] step 60/88 loss=0.0814


  [train] step 80/88 loss=0.0657


[fold 3] epoch 01 | tr_loss 0.0609 | val_auc 0.999981 | lrs [9.779754323328192e-05, 0.0009757729755661011] | 8.7s


  [train] step 20/88 loss=0.0120


  [train] step 40/88 loss=0.0132


  [train] step 60/88 loss=0.0111


  [train] step 80/88 loss=0.0128


[fold 3] epoch 02 | tr_loss 0.0129 | val_auc 0.999979 | lrs [9.140576474687264e-05, 0.000905463412215599] | 8.7s


  [train] step 20/88 loss=0.0064


  [train] step 40/88 loss=0.0054


  [train] step 60/88 loss=0.0075


  [train] step 80/88 loss=0.0084


[fold 3] epoch 03 | tr_loss 0.0081 | val_auc 0.999966 | lrs [8.14503363531613e-05, 0.0007959536998847742] | 8.8s


  [train] step 20/88 loss=0.0114


  [train] step 40/88 loss=0.0087


  [train] step 60/88 loss=0.0087


  [train] step 80/88 loss=0.0078


[fold 3] epoch 04 | tr_loss 0.0074 | val_auc 0.999995 | lrs [6.890576474687264e-05, 0.000657963412215599] | 8.7s


  [train] step 20/88 loss=0.0043


  [train] step 40/88 loss=0.0042


  [train] step 60/88 loss=0.0048


  [train] step 80/88 loss=0.0048


[fold 3] epoch 05 | tr_loss 0.0051 | val_auc 0.999999 | lrs [5.500000000000001e-05, 0.000505] | 8.7s


  [train] step 20/88 loss=0.0065


  [train] step 40/88 loss=0.0052


  [train] step 60/88 loss=0.0044


  [train] step 80/88 loss=0.0042


[fold 3] epoch 06 | tr_loss 0.0039 | val_auc 0.999983 | lrs [4.109423525312737e-05, 0.0003520365877844011] | 8.6s


  [train] step 20/88 loss=0.0069


  [train] step 40/88 loss=0.0043


  [train] step 60/88 loss=0.0038


  [train] step 80/88 loss=0.0031


[fold 3] epoch 07 | tr_loss 0.0031 | val_auc 0.999998 | lrs [2.8549663646838717e-05, 0.00021404630011522585] | 8.8s


  [train] step 20/88 loss=0.0021


  [train] step 40/88 loss=0.0020


  [train] step 60/88 loss=0.0016


  [train] step 80/88 loss=0.0017


[fold 3] epoch 08 | tr_loss 0.0016 | val_auc 0.999998 | lrs [1.8594235253127375e-05, 0.00010453658778440107] | 8.7s


[fold 3] early stop at 8
[fold 3] best val AUC: 0.999999


[fold 3] 128px D8 TTA done


===== 128px Fold 5/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/88 loss=0.1611


  [train] step 40/88 loss=0.1027


  [train] step 60/88 loss=0.0784


  [train] step 80/88 loss=0.0649


[fold 4] epoch 01 | tr_loss 0.0599 | val_auc 0.999963 | lrs [9.779754323328192e-05, 0.0009757729755661011] | 8.6s


  [train] step 20/88 loss=0.0107


  [train] step 40/88 loss=0.0116


  [train] step 60/88 loss=0.0120


  [train] step 80/88 loss=0.0124


[fold 4] epoch 02 | tr_loss 0.0138 | val_auc 0.999982 | lrs [9.140576474687264e-05, 0.000905463412215599] | 8.7s


  [train] step 20/88 loss=0.0085


  [train] step 40/88 loss=0.0088


  [train] step 60/88 loss=0.0102


  [train] step 80/88 loss=0.0109


[fold 4] epoch 03 | tr_loss 0.0106 | val_auc 0.999997 | lrs [8.14503363531613e-05, 0.0007959536998847742] | 8.8s


  [train] step 20/88 loss=0.0051


  [train] step 40/88 loss=0.0081


  [train] step 60/88 loss=0.0085


  [train] step 80/88 loss=0.0086


[fold 4] epoch 04 | tr_loss 0.0085 | val_auc 0.999999 | lrs [6.890576474687264e-05, 0.000657963412215599] | 8.7s


  [train] step 20/88 loss=0.0064


  [train] step 40/88 loss=0.0082


  [train] step 60/88 loss=0.0069


  [train] step 80/88 loss=0.0060


[fold 4] epoch 05 | tr_loss 0.0058 | val_auc 0.999999 | lrs [5.500000000000001e-05, 0.000505] | 8.7s


  [train] step 20/88 loss=0.0040


  [train] step 40/88 loss=0.0041


  [train] step 60/88 loss=0.0032


  [train] step 80/88 loss=0.0028


[fold 4] epoch 06 | tr_loss 0.0027 | val_auc 1.000000 | lrs [4.109423525312737e-05, 0.0003520365877844011] | 8.6s


  [train] step 20/88 loss=0.0024


  [train] step 40/88 loss=0.0023


  [train] step 60/88 loss=0.0024


  [train] step 80/88 loss=0.0027


[fold 4] epoch 07 | tr_loss 0.0025 | val_auc 1.000000 | lrs [2.8549663646838717e-05, 0.00021404630011522585] | 8.7s


  [train] step 20/88 loss=0.0009


  [train] step 40/88 loss=0.0022


  [train] step 60/88 loss=0.0027


  [train] step 80/88 loss=0.0028


[fold 4] epoch 08 | tr_loss 0.0031 | val_auc 1.000000 | lrs [1.8594235253127375e-05, 0.00010453658778440107] | 8.6s


  [train] step 20/88 loss=0.0032


  [train] step 40/88 loss=0.0039


  [train] step 60/88 loss=0.0031


  [train] step 80/88 loss=0.0025


[fold 4] epoch 09 | tr_loss 0.0027 | val_auc 1.000000 | lrs [1.2202456766718093e-05, 3.4227024433899005e-05] | 8.8s


[fold 4] early stop at 9
[fold 4] best val AUC: 1.000000


[fold 4] 128px D8 TTA done


Saved submission_128_d8.csv; fold AUCs: ['0.999995', '0.999993', '0.999997', '0.999999', '1.000000']


In [12]:
# Blend 96px D8 and 128px D8 via logit averaging; submit
import numpy as np, pandas as pd

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def logit(p):
    p = np.clip(p, 1e-6, 1-1e-6).astype(np.float64)
    return np.log(p/(1.0-p))

# Load 96px preds and invert to logits
sub96 = pd.read_csv('submission_d8.csv')
p96 = sub96['has_cactus'].values.astype(np.float32)
logits96 = logit(p96)

# Load 128px logits saved earlier
logits128 = np.load('logits_128.npy')
assert logits128.shape[0] == logits96.shape[0], 'Length mismatch between 96 and 128 logits'

# Average logits and sigmoid once
avg_logits = 0.5 * (logits96 + logits128.astype(np.float64))
blend_pred = sigmoid(avg_logits).astype(np.float32)

# Save blended submission
sub = pd.read_csv('sample_submission.csv')
sub['has_cactus'] = blend_pred
sub.to_csv('submission_blend_96_128.csv', index=False)
print('Saved submission_blend_96_128.csv with shape', sub.shape)

Saved submission_blend_96_128.csv with shape (3325, 2)


In [13]:
# phash NN overrides on blended submission with thresholds 4 and 5
import pandas as pd, numpy as np, time

def phash_hex_to_uint64(hexstr: str) -> np.uint64:
    return np.uint64(int(hexstr, 16))

POPCOUNT = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8)

def hamming_uint64(a: np.ndarray, b: np.uint64) -> np.ndarray:
    x = np.bitwise_xor(a, b).view(np.uint8).reshape(-1, 8)
    return POPCOUNT[x].sum(axis=1).astype(np.uint8)

train_hash_df = pd.read_csv('train_hashes.csv')
test_hash_df = pd.read_csv('test_hashes.csv')
base = pd.read_csv('submission_blend_96_128.csv')

train_codes = np.array([phash_hex_to_uint64(h) for h in train_hash_df['phash'].values], dtype=np.uint64)
train_labels = train_hash_df['has_cactus'].astype(np.uint8).values
test_codes = np.array([phash_hex_to_uint64(h) for h in test_hash_df['phash'].values], dtype=np.uint64)

t0 = time.time()
min_dists = np.empty(len(test_codes), dtype=np.uint8)
nearest_idx = np.empty(len(test_codes), dtype=np.int32)
for i, code in enumerate(test_codes):
    dists = hamming_uint64(train_codes, code)
    md = dists.min()
    min_dists[i] = md
    idxs = np.where(dists == md)[0]
    if len(idxs) == 1:
        nearest_idx[i] = idxs[0]
    else:
        lbls = train_labels[idxs]
        maj = 1 if lbls.mean() >= 0.5 else 0
        choose = idxs[np.where(lbls == maj)[0][0]]
        nearest_idx[i] = choose
print(f"[phash NN] computed in {time.time()-t0:.2f}s; min_dists<=9 counts:", {i:int((min_dists==i).sum()) for i in range(10)})

def apply_override(base_df: pd.DataFrame, thresh: int, out_path: str):
    out = base_df.merge(test_hash_df[['id']], on='id', how='right')
    probs = out['has_cactus'].values.astype(np.float32)
    mask = (min_dists <= thresh)
    if mask.any():
        probs[mask] = train_labels[nearest_idx][mask].astype(np.float32)
    out['has_cactus'] = probs
    out[['id','has_cactus']].to_csv(out_path, index=False)
    print(f"Saved {out_path} | overrides: {int(mask.sum())}")

apply_override(base, 4, 'submission_phash4_blend.csv')
apply_override(base, 5, 'submission_phash5_blend.csv')

[phash NN] computed in 2.33s; min_dists<=9 counts: {0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 0, 6: 13, 7: 0, 8: 39, 9: 0}
Saved submission_phash4_blend.csv | overrides: 2
Saved submission_phash5_blend.csv | overrides: 2


In [14]:
# 96px ResNet34 with Dihedral-8 TTA (logit-avg over TTAs and folds); save logits_34_96.npy and submission_34_96_d8.csv
import os, time, json, gc
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet34_Weights

CACHE_DIR = Path('./.model_cache'); CACHE_DIR.mkdir(parents=True, exist_ok=True)
os.environ['TORCH_HOME'] = str(CACHE_DIR)
os.environ['XDG_CACHE_HOME'] = str(CACHE_DIR)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SEED = 42
IMG_SIZE = 96
N_FOLDS = 5
EPOCHS = 12
BATCH_SIZE = 256
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4

def set_seed(seed=SEED):
    import numpy as _np, random as _random, torch as _torch
    _random.seed(seed); _np.random.seed(seed); _torch.manual_seed(seed); _torch.cuda.manual_seed_all(seed)
    _torch.backends.cudnn.deterministic = True
    _torch.backends.cudnn.benchmark = False
set_seed()

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class TtaD8Dataset34(Dataset):
    def __init__(self, img_dir, df, mode='train', rot_k=0, hflip=False):
        self.img_dir = Path(img_dir); self.df = df.reset_index(drop=True)
        self.mode = mode; self.rot_k = rot_k; self.hflip = hflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        x = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return x, y
        return x, row['id']

def build_model():
    m = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, 1)
    return m

def make_optimizer(model):
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train(); total=0.0
    for i,(x,y) in enumerate(loader):
        x=x.to(DEVICE,non_blocking=True); y=y.to(DEVICE,non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits=model(x); loss=loss_fn(logits,y)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        total += loss.item()*x.size(0)
        if (i+1)%20==0: print(f"  [train] step {i+1}/{len(loader)} loss={total/((i+1)*loader.batch_size):.4f}", flush=True)
    return total/len(loader.dataset)

def valid_auc(model, loader):
    model.eval(); preds=[]; targs=[]
    with torch.no_grad():
        for x,y in loader:
            x=x.to(DEVICE,non_blocking=True); y=y.view(-1,1)
            logits=model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds=np.concatenate(preds); targs=np.concatenate(targs)
    return roc_auc_score(targs,preds)

def predict_logits(model, loader):
    model.eval(); out_logits=[]; out_ids=[]
    with torch.no_grad():
        for x,ids in loader:
            x=x.to(DEVICE,non_blocking=True)
            logits=model(x).squeeze(1).cpu().numpy()
            out_logits.append(logits); out_ids += list(ids)
    return np.concatenate(out_logits), out_ids

# Data
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
test_logit_accum = np.zeros(len(test_ids), dtype=np.float32)

fold_aucs=[]
for fold,(tr_idx,va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
    print(f"===== ResNet34 96px Fold {fold+1}/{N_FOLDS} =====")
    tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
    va_df = train_df.iloc[va_idx].reset_index(drop=True)
    tr_loader = DataLoader(TtaD8Dataset34('train', tr_df, mode='train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
    va_loader = DataLoader(TtaD8Dataset34('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

    model = build_model().to(DEVICE)
    optimizer = make_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))

    best_auc=-1.0; best_state=None; no_imp=0
    for epoch in range(1,EPOCHS+1):
        t0=time.time()
        tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler)
        val_auc = valid_auc(model, va_loader)
        scheduler.step()
        lrs=[pg['lr'] for pg in optimizer.param_groups]
        print(f"[fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {time.time()-t0:.1f}s", flush=True)
        if val_auc > best_auc: best_auc=val_auc; best_state={k:v.cpu() for k,v in model.state_dict().items()}; no_imp=0
        else: no_imp+=1
        if no_imp>=3: print(f"[fold {fold}] early stop at {epoch}"); break
    print(f"[fold {fold}] best val AUC: {best_auc:.6f}")
    fold_aucs.append(best_auc)
    model.load_state_dict({k:v.to(DEVICE) for k,v in best_state.items()}, strict=True)

    # Dihedral-8 TTA logits
    tta_logits_sum = np.zeros(len(test_ids), dtype=np.float32)
    for rot_k in (0,1,2,3):
        for hf in (False, True):
            ds = TtaD8Dataset34('test', test_ids, mode='test', rot_k=rot_k, hflip=hf)
            dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
            logits, ids = predict_logits(model, dl)
            tta_logits_sum += logits.astype(np.float32)
    test_logit_accum += (tta_logits_sum / 8.0)
    print(f"[fold {fold}] ResNet34 96px D8 TTA done")
    del model, optimizer, loss_fn, scaler, tr_loader, va_loader
    gc.collect(); torch.cuda.empty_cache()

# Average across folds -> logits then sigmoid
avg_test_logits = test_logit_accum / N_FOLDS
np.save('logits_34_96.npy', avg_test_logits)
test_pred = 1.0 / (1.0 + np.exp(-avg_test_logits))

# Save submission
sub = pd.read_csv('sample_submission.csv')
sub['has_cactus'] = test_pred.astype(np.float32)
sub.to_csv('submission_34_96_d8.csv', index=False)
print('Saved submission_34_96_d8.csv; fold AUCs:', [f"{a:.6f}" for a in fold_aucs])

===== ResNet34 96px Fold 1/5 =====


  original_init(self, **validated_kwargs)


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to .model_cache/hub/checkpoints/resnet34-b627a593.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

 11%|█         | 9.25M/83.3M [00:00<00:00, 96.6MB/s]

  [train] step 20/44 loss=0.1288


  [train] step 40/44 loss=0.0794


[fold 0] epoch 01 | tr_loss 0.0733 | val_auc 0.999629 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0215


  [train] step 40/44 loss=0.0180


[fold 0] epoch 02 | tr_loss 0.0169 | val_auc 0.999957 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0063


  [train] step 40/44 loss=0.0087


[fold 0] epoch 03 | tr_loss 0.0082 | val_auc 0.999974 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.6s


  [train] step 20/44 loss=0.0039


  [train] step 40/44 loss=0.0054


[fold 0] epoch 04 | tr_loss 0.0064 | val_auc 0.999993 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0100


  [train] step 40/44 loss=0.0077


[fold 0] epoch 05 | tr_loss 0.0078 | val_auc 0.999992 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0038


  [train] step 40/44 loss=0.0038


[fold 0] epoch 06 | tr_loss 0.0036 | val_auc 0.999990 | lrs [5.5e-05, 0.000505] | 7.7s


  [train] step 20/44 loss=0.0040


  [train] step 40/44 loss=0.0041


[fold 0] epoch 07 | tr_loss 0.0038 | val_auc 0.999984 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.6s


[fold 0] early stop at 7
[fold 0] best val AUC: 0.999993


[fold 0] ResNet34 96px D8 TTA done


===== ResNet34 96px Fold 2/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1816


  [train] step 40/44 loss=0.1035


[fold 1] epoch 01 | tr_loss 0.0963 | val_auc 0.999860 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0115


  [train] step 40/44 loss=0.0141


[fold 1] epoch 02 | tr_loss 0.0145 | val_auc 0.999923 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0093


  [train] step 40/44 loss=0.0090


[fold 1] epoch 03 | tr_loss 0.0085 | val_auc 0.999916 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.8s


  [train] step 20/44 loss=0.0072


  [train] step 40/44 loss=0.0079


[fold 1] epoch 04 | tr_loss 0.0080 | val_auc 0.999951 | lrs [7.75e-05, 0.0007524999999999999] | 7.7s


  [train] step 20/44 loss=0.0065


  [train] step 40/44 loss=0.0064


[fold 1] epoch 05 | tr_loss 0.0060 | val_auc 0.999970 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.7s


  [train] step 20/44 loss=0.0026


  [train] step 40/44 loss=0.0025


[fold 1] epoch 06 | tr_loss 0.0023 | val_auc 0.999970 | lrs [5.5e-05, 0.000505] | 7.7s


  [train] step 20/44 loss=0.0047


  [train] step 40/44 loss=0.0043


[fold 1] epoch 07 | tr_loss 0.0039 | val_auc 0.999958 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.5s


  [train] step 20/44 loss=0.0068


  [train] step 40/44 loss=0.0050


[fold 1] epoch 08 | tr_loss 0.0049 | val_auc 0.999986 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


  [train] step 20/44 loss=0.0015


  [train] step 40/44 loss=0.0016


[fold 1] epoch 09 | tr_loss 0.0016 | val_auc 0.999983 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.7s


  [train] step 20/44 loss=0.0018


  [train] step 40/44 loss=0.0016


[fold 1] epoch 10 | tr_loss 0.0015 | val_auc 0.999984 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.6s


  [train] step 20/44 loss=0.0012


  [train] step 40/44 loss=0.0008


[fold 1] epoch 11 | tr_loss 0.0007 | val_auc 0.999981 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.6s


[fold 1] early stop at 11
[fold 1] best val AUC: 0.999986


[fold 1] ResNet34 96px D8 TTA done


===== ResNet34 96px Fold 3/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1425


  [train] step 40/44 loss=0.0811


[fold 2] epoch 01 | tr_loss 0.0751 | val_auc 0.999953 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0128


  [train] step 40/44 loss=0.0120


[fold 2] epoch 02 | tr_loss 0.0119 | val_auc 0.999961 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0087


  [train] step 40/44 loss=0.0092


[fold 2] epoch 03 | tr_loss 0.0092 | val_auc 0.999972 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.5s


  [train] step 20/44 loss=0.0081


  [train] step 40/44 loss=0.0074


[fold 2] epoch 04 | tr_loss 0.0075 | val_auc 0.999988 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0044


  [train] step 40/44 loss=0.0042


[fold 2] epoch 05 | tr_loss 0.0042 | val_auc 0.999982 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.7s


  [train] step 20/44 loss=0.0032


  [train] step 40/44 loss=0.0038


[fold 2] epoch 06 | tr_loss 0.0036 | val_auc 0.999991 | lrs [5.5e-05, 0.000505] | 7.9s


  [train] step 20/44 loss=0.0023


  [train] step 40/44 loss=0.0021


[fold 2] epoch 07 | tr_loss 0.0021 | val_auc 0.999993 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.6s


  [train] step 20/44 loss=0.0030


  [train] step 40/44 loss=0.0023


[fold 2] epoch 08 | tr_loss 0.0022 | val_auc 0.999991 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.5s


  [train] step 20/44 loss=0.0059


  [train] step 40/44 loss=0.0039


[fold 2] epoch 09 | tr_loss 0.0041 | val_auc 0.999987 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.7s


  [train] step 20/44 loss=0.0013


  [train] step 40/44 loss=0.0013


[fold 2] epoch 10 | tr_loss 0.0012 | val_auc 0.999991 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.9s


[fold 2] early stop at 10
[fold 2] best val AUC: 0.999993


[fold 2] ResNet34 96px D8 TTA done


===== ResNet34 96px Fold 4/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1388


  [train] step 40/44 loss=0.0857


[fold 3] epoch 01 | tr_loss 0.0791 | val_auc 0.999921 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0142


  [train] step 40/44 loss=0.0151


[fold 3] epoch 02 | tr_loss 0.0142 | val_auc 0.999238 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0081


  [train] step 40/44 loss=0.0070


[fold 3] epoch 03 | tr_loss 0.0069 | val_auc 0.999947 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.7s


  [train] step 20/44 loss=0.0085


  [train] step 40/44 loss=0.0069


[fold 3] epoch 04 | tr_loss 0.0069 | val_auc 0.998599 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0049


  [train] step 40/44 loss=0.0049


[fold 3] epoch 05 | tr_loss 0.0049 | val_auc 0.999964 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0048


  [train] step 40/44 loss=0.0043


[fold 3] epoch 06 | tr_loss 0.0040 | val_auc 0.999996 | lrs [5.5e-05, 0.000505] | 7.6s


  [train] step 20/44 loss=0.0058


  [train] step 40/44 loss=0.0040


[fold 3] epoch 07 | tr_loss 0.0039 | val_auc 0.999989 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.5s


  [train] step 20/44 loss=0.0023


  [train] step 40/44 loss=0.0029


[fold 3] epoch 08 | tr_loss 0.0027 | val_auc 0.999977 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.5s


  [train] step 20/44 loss=0.0029


  [train] step 40/44 loss=0.0033


[fold 3] epoch 09 | tr_loss 0.0030 | val_auc 0.999986 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


[fold 3] early stop at 9
[fold 3] best val AUC: 0.999996


[fold 3] ResNet34 96px D8 TTA done


===== ResNet34 96px Fold 5/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1625


  [train] step 40/44 loss=0.0934


[fold 4] epoch 01 | tr_loss 0.0859 | val_auc 0.999967 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0133


  [train] step 40/44 loss=0.0138


[fold 4] epoch 02 | tr_loss 0.0137 | val_auc 0.999999 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0099


  [train] step 40/44 loss=0.0090


[fold 4] epoch 03 | tr_loss 0.0102 | val_auc 0.999911 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.6s


  [train] step 20/44 loss=0.0089


  [train] step 40/44 loss=0.0082


[fold 4] epoch 04 | tr_loss 0.0088 | val_auc 0.999996 | lrs [7.75e-05, 0.0007524999999999999] | 7.6s


  [train] step 20/44 loss=0.0053


  [train] step 40/44 loss=0.0052


[fold 4] epoch 05 | tr_loss 0.0057 | val_auc 0.999999 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.5s


  [train] step 20/44 loss=0.0033


  [train] step 40/44 loss=0.0036


[fold 4] epoch 06 | tr_loss 0.0036 | val_auc 1.000000 | lrs [5.5e-05, 0.000505] | 7.6s


  [train] step 20/44 loss=0.0023


  [train] step 40/44 loss=0.0023


[fold 4] epoch 07 | tr_loss 0.0022 | val_auc 1.000000 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.6s


  [train] step 20/44 loss=0.0038


  [train] step 40/44 loss=0.0031


[fold 4] epoch 08 | tr_loss 0.0030 | val_auc 1.000000 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


  [train] step 20/44 loss=0.0012


  [train] step 40/44 loss=0.0012


[fold 4] epoch 09 | tr_loss 0.0014 | val_auc 1.000000 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


[fold 4] early stop at 9
[fold 4] best val AUC: 1.000000


[fold 4] ResNet34 96px D8 TTA done


Saved submission_34_96_d8.csv; fold AUCs: ['0.999993', '0.999986', '0.999993', '0.999996', '1.000000']


In [15]:
# Blend three models (96-ResNet18 D8, 128-ResNet18 D8, 96-ResNet34 D8) via logit averaging
import numpy as np, pandas as pd

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def logit(p):
    p = np.clip(p, 1e-6, 1-1e-6).astype(np.float64)
    return np.log(p/(1.0-p))

# 96-ResNet18 D8: recover logits from probs
sub96 = pd.read_csv('submission_d8.csv')
logits96 = logit(sub96['has_cactus'].values.astype(np.float32))

# 128-ResNet18 D8 logits
logits128 = np.load('logits_128.npy').astype(np.float64)

# 96-ResNet34 D8 logits
logits34_96 = np.load('logits_34_96.npy').astype(np.float64)

assert logits96.shape[0] == logits128.shape[0] == logits34_96.shape[0], 'Length mismatch among logits'

# Equal-weight logit average
avg_logits = (logits96 + logits128 + logits34_96) / 3.0
blend_pred = sigmoid(avg_logits).astype(np.float32)

sub = pd.read_csv('sample_submission.csv')
sub['has_cactus'] = blend_pred
sub.to_csv('submission_blend_3.csv', index=False)
print('Saved submission_blend_3.csv with shape', sub.shape)

Saved submission_blend_3.csv with shape (3325, 2)


In [16]:
# Rank-averaged blend of 96-ResNet18 D8, 128-ResNet18 D8, 96-ResNet34 D8
import pandas as pd, numpy as np

sub96 = pd.read_csv('submission_d8.csv')
sub128 = pd.read_csv('submission_128_d8.csv')
sub34 = pd.read_csv('submission_34_96_d8.csv')

# Ensure same order/ids
assert sub96['id'].equals(sub128['id']) and sub96['id'].equals(sub34['id']), 'ID mismatch across submissions'
ids = sub96['id'].values

p1 = sub96['has_cactus'].values
p2 = sub128['has_cactus'].values
p3 = sub34['has_cactus'].values

n = len(p1)
r1 = pd.Series(p1).rank(method='average').to_numpy() / (n + 1.0)
r2 = pd.Series(p2).rank(method='average').to_numpy() / (n + 1.0)
r3 = pd.Series(p3).rank(method='average').to_numpy() / (n + 1.0)

# Equal-weight rank average
rank_blend = (r1 + r2 + r3) / 3.0

sub = pd.DataFrame({'id': ids, 'has_cactus': rank_blend.astype(np.float32)})
sub.to_csv('submission_blend_rank3.csv', index=False)
print('Saved submission_blend_rank3.csv with shape', sub.shape)

Saved submission_blend_rank3.csv with shape (3325, 2)


In [17]:
# 96px ResNet18 with Dihedral-8 TTA (logit-avg) and save logits_96.npy + submission_96_d8.csv
import os, time, json, gc
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedGroupKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import models
from torchvision.models import ResNet18_Weights

CACHE_DIR = Path('./.model_cache'); CACHE_DIR.mkdir(parents=True, exist_ok=True)
os.environ['TORCH_HOME'] = str(CACHE_DIR)
os.environ['XDG_CACHE_HOME'] = str(CACHE_DIR)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SEED = 42
IMG_SIZE = 96
N_FOLDS = 5
EPOCHS = 12
BATCH_SIZE = 256
WORKERS = 2
LR_BACKBONE = 1e-4
LR_HEAD = 1e-3
WEIGHT_DECAY = 1e-4

def set_seed(seed=SEED):
    import numpy as _np, random as _random, torch as _torch
    _random.seed(seed); _np.random.seed(seed); _torch.manual_seed(seed); _torch.cuda.manual_seed_all(seed)
    _torch.backends.cudnn.deterministic = True
    _torch.backends.cudnn.benchmark = False
set_seed()

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.10, rotate_limit=10, border_mode=0, p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.0, p=0.2),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])
    else:
        return A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
            A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ])

class TtaD8Dataset96(Dataset):
    def __init__(self, img_dir, df, mode='train', rot_k=0, hflip=False):
        self.img_dir = Path(img_dir); self.df = df.reset_index(drop=True)
        self.mode = mode; self.rot_k = rot_k; self.hflip = hflip
        self.tfms = get_transforms(train=(mode=='train'))
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        x = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        if self.mode in ('train','valid'):
            y = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return x, y
        return x, row['id']

def build_model():
    m = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, 1)
    return m

def make_optimizer(model):
    head_params = list(model.fc.parameters())
    backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
    return torch.optim.AdamW([
        {'params': backbone_params, 'lr': LR_BACKBONE},
        {'params': head_params, 'lr': LR_HEAD},
    ], weight_decay=WEIGHT_DECAY)

def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train(); total=0.0
    for i,(x,y) in enumerate(loader):
        x=x.to(DEVICE,non_blocking=True); y=y.to(DEVICE,non_blocking=True).view(-1,1)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE=='cuda')):
            logits=model(x); loss=loss_fn(logits,y)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        total += loss.item()*x.size(0)
        if (i+1)%20==0: print(f"  [train] step {i+1}/{len(loader)} loss={total/((i+1)*loader.batch_size):.4f}", flush=True)
    return total/len(loader.dataset)

def valid_auc(model, loader):
    model.eval(); preds=[]; targs=[]
    with torch.no_grad():
        for x,y in loader:
            x=x.to(DEVICE,non_blocking=True); y=y.view(-1,1)
            logits=model(x)
            preds.append(torch.sigmoid(logits).squeeze(1).cpu().numpy())
            targs.append(y.squeeze(1).cpu().numpy())
    preds=np.concatenate(preds); targs=np.concatenate(targs)
    return roc_auc_score(targs,preds)

def predict_logits(model, loader):
    model.eval(); out_logits=[]; out_ids=[]
    with torch.no_grad():
        for x,ids in loader:
            x=x.to(DEVICE,non_blocking=True)
            logits=model(x).squeeze(1).cpu().numpy()
            out_logits.append(logits); out_ids += list(ids)
    return np.concatenate(out_logits), out_ids

# Data
train_df = pd.read_csv('train.csv')
groups_df = pd.read_csv('cv_groups.csv')
train_df = train_df.merge(groups_df[['id','sha1']], on='id', how='left')
test_ids = pd.read_csv('sample_submission.csv')

skf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
test_logit_accum = np.zeros(len(test_ids), dtype=np.float32)

for fold,(tr_idx,va_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus'], groups=train_df['sha1'])):
    print(f"===== ResNet18 96px D8 Fold {fold+1}/{N_FOLDS} =====")
    tr_df = train_df.iloc[tr_idx].reset_index(drop=True)
    va_df = train_df.iloc[va_idx].reset_index(drop=True)
    tr_loader = DataLoader(TtaD8Dataset96('train', tr_df, mode='train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=True, drop_last=True)
    va_loader = DataLoader(TtaD8Dataset96('train', va_df, mode='valid'), batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

    model = build_model().to(DEVICE)
    optimizer = make_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda', enabled=(DEVICE=='cuda'))

    best_auc=-1.0; best_state=None; no_imp=0
    for epoch in range(1,EPOCHS+1):
        t0=time.time()
        tr_loss = train_one_epoch(model, tr_loader, optimizer, loss_fn, scaler)
        val_auc = valid_auc(model, va_loader)
        scheduler.step()
        lrs=[pg['lr'] for pg in optimizer.param_groups]
        print(f"[fold {fold}] epoch {epoch:02d} | tr_loss {tr_loss:.4f} | val_auc {val_auc:.6f} | lrs {lrs} | {time.time()-t0:.1f}s", flush=True)
        if val_auc > best_auc: best_auc=val_auc; best_state={k:v.cpu() for k,v in model.state_dict().items()}; no_imp=0
        else: no_imp+=1
        if no_imp>=3: print(f"[fold {fold}] early stop at {epoch}"); break
    print(f"[fold {fold}] best val AUC: {best_auc:.6f}")
    model.load_state_dict({k:v.to(DEVICE) for k,v in best_state.items()}, strict=True)

    # Dihedral-8 TTA logits
    tta_logits_sum = np.zeros(len(test_ids), dtype=np.float32)
    for rot_k in (0,1,2,3):
        for hf in (False, True):
            ds = TtaD8Dataset96('test', test_ids, mode='test', rot_k=rot_k, hflip=hf)
            dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
            logits, ids = predict_logits(model, dl)
            tta_logits_sum += logits.astype(np.float32)
    test_logit_accum += (tta_logits_sum / 8.0)
    print(f"[fold {fold}] 96px D8 TTA done")
    del model, optimizer, loss_fn, scaler, tr_loader, va_loader
    gc.collect(); torch.cuda.empty_cache()

# Average across folds -> logits then sigmoid
avg_test_logits = test_logit_accum / N_FOLDS
np.save('logits_96.npy', avg_test_logits)
test_pred = 1.0 / (1.0 + np.exp(-avg_test_logits))

# Save submission
sub = pd.read_csv('sample_submission.csv')
sub['has_cactus'] = test_pred.astype(np.float32)
sub.to_csv('submission_96_d8.csv', index=False)
print('Saved submission_96_d8.csv and logits_96.npy')

# Blend true logits (96, 128, 34@96) and save submission_blend3_true.csv
logits96 = np.load('logits_96.npy').astype(np.float64)
logits128 = np.load('logits_128.npy').astype(np.float64)
logits34 = np.load('logits_34_96.npy').astype(np.float64)
assert logits96.shape[0] == logits128.shape[0] == logits34.shape[0], 'Length mismatch among logits'
avg_logits = (logits96 + logits128 + logits34) / 3.0
blend_pred = 1.0 / (1.0 + np.exp(-avg_logits))
sub_blend = pd.read_csv('sample_submission.csv')
sub_blend['has_cactus'] = blend_pred.astype(np.float32)
sub_blend.to_csv('submission_blend3_true.csv', index=False)
print('Saved submission_blend3_true.csv')

===== ResNet18 96px D8 Fold 1/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1621


  [train] step 40/44 loss=0.0946


[fold 0] epoch 01 | tr_loss 0.0867 | val_auc 0.999910 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0157


  [train] step 40/44 loss=0.0138


[fold 0] epoch 02 | tr_loss 0.0144 | val_auc 0.999922 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0080


  [train] step 40/44 loss=0.0085


[fold 0] epoch 03 | tr_loss 0.0088 | val_auc 0.999965 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.2s


  [train] step 20/44 loss=0.0047


  [train] step 40/44 loss=0.0053


[fold 0] epoch 04 | tr_loss 0.0057 | val_auc 0.999960 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0063


  [train] step 40/44 loss=0.0055


[fold 0] epoch 05 | tr_loss 0.0053 | val_auc 0.999966 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.5s


  [train] step 20/44 loss=0.0040


  [train] step 40/44 loss=0.0035


[fold 0] epoch 06 | tr_loss 0.0035 | val_auc 0.999984 | lrs [5.5e-05, 0.000505] | 7.3s


  [train] step 20/44 loss=0.0056


  [train] step 40/44 loss=0.0037


[fold 0] epoch 07 | tr_loss 0.0036 | val_auc 0.999984 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.3s


  [train] step 20/44 loss=0.0021


  [train] step 40/44 loss=0.0022


[fold 0] epoch 08 | tr_loss 0.0020 | val_auc 0.999991 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.3s


  [train] step 20/44 loss=0.0019


  [train] step 40/44 loss=0.0029


[fold 0] epoch 09 | tr_loss 0.0029 | val_auc 0.999998 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


  [train] step 20/44 loss=0.0011


  [train] step 40/44 loss=0.0018


[fold 0] epoch 10 | tr_loss 0.0017 | val_auc 0.999996 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.3s


  [train] step 20/44 loss=0.0011


  [train] step 40/44 loss=0.0010


[fold 0] epoch 11 | tr_loss 0.0011 | val_auc 0.999997 | lrs [1.1533337816991932e-05, 2.6866715986911242e-05] | 7.4s


  [train] step 20/44 loss=0.0034


  [train] step 40/44 loss=0.0027


[fold 0] epoch 12 | tr_loss 0.0026 | val_auc 0.999997 | lrs [1e-05, 1e-05] | 7.3s


[fold 0] early stop at 12
[fold 0] best val AUC: 0.999998


[fold 0] 96px D8 TTA done


===== ResNet18 96px D8 Fold 2/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1303


  [train] step 40/44 loss=0.0771


[fold 1] epoch 01 | tr_loss 0.0716 | val_auc 0.999828 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.6s


  [train] step 20/44 loss=0.0224


  [train] step 40/44 loss=0.0166


[fold 1] epoch 02 | tr_loss 0.0155 | val_auc 0.999880 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0126


  [train] step 40/44 loss=0.0097


[fold 1] epoch 03 | tr_loss 0.0097 | val_auc 0.999897 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.4s


  [train] step 20/44 loss=0.0074


  [train] step 40/44 loss=0.0065


[fold 1] epoch 04 | tr_loss 0.0061 | val_auc 0.999901 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0048


  [train] step 40/44 loss=0.0052


[fold 1] epoch 05 | tr_loss 0.0054 | val_auc 0.999930 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.5s


  [train] step 20/44 loss=0.0043


  [train] step 40/44 loss=0.0063


[fold 1] epoch 06 | tr_loss 0.0064 | val_auc 0.999905 | lrs [5.5e-05, 0.000505] | 7.6s


  [train] step 20/44 loss=0.0049


  [train] step 40/44 loss=0.0041


[fold 1] epoch 07 | tr_loss 0.0039 | val_auc 0.999899 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.6s


  [train] step 20/44 loss=0.0040


  [train] step 40/44 loss=0.0042


[fold 1] epoch 08 | tr_loss 0.0039 | val_auc 0.999919 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


[fold 1] early stop at 8
[fold 1] best val AUC: 0.999930


[fold 1] 96px D8 TTA done


===== ResNet18 96px D8 Fold 3/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1725


  [train] step 40/44 loss=0.1004


[fold 2] epoch 01 | tr_loss 0.0940 | val_auc 0.999832 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0185


  [train] step 40/44 loss=0.0172


[fold 2] epoch 02 | tr_loss 0.0173 | val_auc 0.999978 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0090


  [train] step 40/44 loss=0.0098


[fold 2] epoch 03 | tr_loss 0.0092 | val_auc 0.999966 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.6s


  [train] step 20/44 loss=0.0071


  [train] step 40/44 loss=0.0079


[fold 2] epoch 04 | tr_loss 0.0078 | val_auc 0.999984 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0042


  [train] step 40/44 loss=0.0046


[fold 2] epoch 05 | tr_loss 0.0048 | val_auc 0.999991 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0042


  [train] step 40/44 loss=0.0043


[fold 2] epoch 06 | tr_loss 0.0042 | val_auc 0.999995 | lrs [5.5e-05, 0.000505] | 7.6s


  [train] step 20/44 loss=0.0021


  [train] step 40/44 loss=0.0026


[fold 2] epoch 07 | tr_loss 0.0031 | val_auc 0.999995 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.5s


  [train] step 20/44 loss=0.0043


  [train] step 40/44 loss=0.0044


[fold 2] epoch 08 | tr_loss 0.0042 | val_auc 0.999992 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.5s


  [train] step 20/44 loss=0.0016


  [train] step 40/44 loss=0.0019


[fold 2] epoch 09 | tr_loss 0.0019 | val_auc 0.999992 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.5s


[fold 2] early stop at 9
[fold 2] best val AUC: 0.999995


[fold 2] 96px D8 TTA done


===== ResNet18 96px D8 Fold 4/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1811


  [train] step 40/44 loss=0.1040


[fold 3] epoch 01 | tr_loss 0.0955 | val_auc 0.999644 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.5s


  [train] step 20/44 loss=0.0200


  [train] step 40/44 loss=0.0158


[fold 3] epoch 02 | tr_loss 0.0154 | val_auc 0.999864 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.6s


  [train] step 20/44 loss=0.0086


  [train] step 40/44 loss=0.0089


[fold 3] epoch 03 | tr_loss 0.0091 | val_auc 0.999877 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.6s


  [train] step 20/44 loss=0.0084


  [train] step 40/44 loss=0.0090


[fold 3] epoch 04 | tr_loss 0.0099 | val_auc 0.999959 | lrs [7.75e-05, 0.0007524999999999999] | 7.5s


  [train] step 20/44 loss=0.0067


  [train] step 40/44 loss=0.0060


[fold 3] epoch 05 | tr_loss 0.0058 | val_auc 0.999891 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0060


  [train] step 40/44 loss=0.0070


[fold 3] epoch 06 | tr_loss 0.0066 | val_auc 0.999948 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0064


  [train] step 40/44 loss=0.0055


[fold 3] epoch 07 | tr_loss 0.0051 | val_auc 0.999963 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.6s


  [train] step 20/44 loss=0.0025


  [train] step 40/44 loss=0.0018


[fold 3] epoch 08 | tr_loss 0.0018 | val_auc 0.999952 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


  [train] step 20/44 loss=0.0013


  [train] step 40/44 loss=0.0017


[fold 3] epoch 09 | tr_loss 0.0017 | val_auc 0.999956 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.7s


  [train] step 20/44 loss=0.0016


  [train] step 40/44 loss=0.0021


[fold 3] epoch 10 | tr_loss 0.0022 | val_auc 0.999952 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.6s


[fold 3] early stop at 10
[fold 3] best val AUC: 0.999963


[fold 3] 96px D8 TTA done


===== ResNet18 96px D8 Fold 5/5 =====


  original_init(self, **validated_kwargs)


  [train] step 20/44 loss=0.1456


  [train] step 40/44 loss=0.0862


[fold 4] epoch 01 | tr_loss 0.0797 | val_auc 0.999839 | lrs [9.846666218300807e-05, 0.0009831332840130886] | 7.2s


  [train] step 20/44 loss=0.0132


  [train] step 40/44 loss=0.0159


[fold 4] epoch 02 | tr_loss 0.0152 | val_auc 0.999940 | lrs [9.397114317029975e-05, 0.0009336825748732971] | 7.5s


  [train] step 20/44 loss=0.0104


  [train] step 40/44 loss=0.0101


[fold 4] epoch 03 | tr_loss 0.0100 | val_auc 0.999967 | lrs [8.681980515339464e-05, 0.0008550178566873409] | 7.6s


  [train] step 20/44 loss=0.0106


  [train] step 40/44 loss=0.0096


[fold 4] epoch 04 | tr_loss 0.0092 | val_auc 0.999980 | lrs [7.75e-05, 0.0007524999999999999] | 7.4s


  [train] step 20/44 loss=0.0071


  [train] step 40/44 loss=0.0070


[fold 4] epoch 05 | tr_loss 0.0066 | val_auc 0.999985 | lrs [6.664685702961344e-05, 0.0006331154273257478] | 7.6s


  [train] step 20/44 loss=0.0053


  [train] step 40/44 loss=0.0063


[fold 4] epoch 06 | tr_loss 0.0062 | val_auc 0.999995 | lrs [5.5e-05, 0.000505] | 7.4s


  [train] step 20/44 loss=0.0032


  [train] step 40/44 loss=0.0038


[fold 4] epoch 07 | tr_loss 0.0035 | val_auc 0.999999 | lrs [4.335314297038657e-05, 0.00037688457267425233] | 7.5s


  [train] step 20/44 loss=0.0032


  [train] step 40/44 loss=0.0027


[fold 4] epoch 08 | tr_loss 0.0039 | val_auc 0.999996 | lrs [3.250000000000001e-05, 0.00025750000000000013] | 7.6s


  [train] step 20/44 loss=0.0021


  [train] step 40/44 loss=0.0021


[fold 4] epoch 09 | tr_loss 0.0021 | val_auc 0.999996 | lrs [2.3180194846605367e-05, 0.000154982143312659] | 7.7s


  [train] step 20/44 loss=0.0034


  [train] step 40/44 loss=0.0029


[fold 4] epoch 10 | tr_loss 0.0032 | val_auc 0.999997 | lrs [1.602885682970026e-05, 7.631742512670284e-05] | 7.6s


[fold 4] early stop at 10
[fold 4] best val AUC: 0.999999


[fold 4] 96px D8 TTA done


Saved submission_96_d8.csv and logits_96.npy
Saved submission_blend3_true.csv


In [18]:
# Embedding kNN overrides using ResNet18@128 GAP features with D8 TTA (cosine NN, unanimous top-5, high-sim threshold)
import time, os, gc, numpy as np, pandas as pd
import torch, torch.nn as nn
from pathlib import Path
from PIL import Image
from sklearn.neighbors import NearestNeighbors
from torchvision import models
from torchvision.models import ResNet18_Weights
import albumentations as A
from torch.utils.data import Dataset, DataLoader

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
IMG_SIZE = 128
BATCH_SIZE = 256
WORKERS = 2
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def get_tfms_128():
    return A.Compose([
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=3),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ])

class FeatD8Dataset(Dataset):
    def __init__(self, img_dir, ids_df, rot_k=0, hflip=False):
        self.img_dir = Path(img_dir)
        self.df = ids_df.reset_index(drop=True)
        self.rot_k = rot_k
        self.hflip = hflip
        self.tfms = get_tfms_128()
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = np.array(im)
        if self.rot_k:
            img = np.ascontiguousarray(np.rot90(img, k=self.rot_k))
        if self.hflip:
            img = np.ascontiguousarray(img[:, ::-1, :])
        aug = self.tfms(image=img)
        x = torch.from_numpy(aug['image'].transpose(2,0,1)).float()
        return x, row['id']

def build_feat_model():
    m = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    m.fc = nn.Identity()  # return 512-dim GAP features
    return m.to(DEVICE).eval()

def extract_d8_feats(img_dir, ids_df):
    model = build_feat_model()
    n = len(ids_df)
    feats = np.zeros((n, 512), dtype=np.float32)
    counts = np.zeros(n, dtype=np.int32)
    id_to_idx = {img_id: i for i, img_id in enumerate(ids_df['id'].tolist())}
    with torch.no_grad():
        for rot_k in (0,1,2,3):
            for hf in (False, True):
                ds = FeatD8Dataset(img_dir, ids_df, rot_k=rot_k, hflip=hf)
                dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)
                t0 = time.time()
                seen = 0
                for x, ids in dl:
                    x = x.to(DEVICE, non_blocking=True)
                    f = model(x).float()  # (bs, 512)
                    f = f.cpu().numpy()
                    for j, img_id in enumerate(ids):
                        idx = id_to_idx[img_id]
                        feats[idx] += f[j]
                        counts[idx] += 1
                    seen += x.size(0)
                    if seen % (BATCH_SIZE*10) == 0:
                        print(f"  [feat] rot={rot_k} hf={hf} seen={seen}/{n}", flush=True)
                print(f"[feat] rot={rot_k} hf={hf} done in {time.time()-t0:.1f}s", flush=True)
    # average over 8 views and L2-normalize
    feats /= counts[:, None].clip(min=1)
    norms = np.linalg.norm(feats, axis=1, keepdims=True)
    feats = feats / np.clip(norms, 1e-12, None)
    del model; gc.collect(); torch.cuda.empty_cache()
    return feats

t_all = time.time()
train_df = pd.read_csv('train.csv')  # id, has_cactus
test_ids = pd.read_csv('sample_submission.csv')[['id']]
print('[knn] extracting train D8 features...')
train_ids = train_df[['id']].copy()
train_feats = extract_d8_feats('train', train_ids)
print('[knn] extracting test D8 features...')
test_feats = extract_d8_feats('test', test_ids)
train_labels = train_df['has_cactus'].astype(np.int8).values

# Build cosine kNN on train feats
print('[knn] fitting NearestNeighbors (cosine)...')
nn_model = NearestNeighbors(n_neighbors=5, metric='cosine', n_jobs=-1)
nn_model.fit(train_feats)
dists, idxs = nn_model.kneighbors(test_feats, n_neighbors=5, return_distance=True)
sims = 1.0 - dists  # cosine similarity
top1_sim = sims[:, 0]
top5_lbls = train_labels[idxs]  # (n_test, 5)
unanimous = (top5_lbls.min(axis=1) == top5_lbls.max(axis=1))
vote = top5_lbls[:, 0]  # same if unanimous

# Load base submission to override (clean 3-model logit blend)
base = pd.read_csv('submission_blend3_true.csv')
base = base.merge(test_ids, on='id', how='right')
probs_base = base['has_cactus'].values.astype(np.float32)

def apply_knn_override(thresh: float, out_path: str):
    mask = (top1_sim >= thresh) & unanimous
    out_probs = probs_base.copy()
    out_probs[mask] = vote[mask].astype(np.float32)
    out = pd.DataFrame({'id': test_ids['id'], 'has_cactus': out_probs})
    out.to_csv(out_path, index=False)
    n_over = int(mask.sum())
    print(f"[knn] saved {out_path} | overrides: {n_over} | thresh={thresh}")
    return n_over

n1 = apply_knn_override(0.997, 'submission_knn_0997.csv')
n2 = apply_knn_override(0.999, 'submission_knn_0999.csv')
print(f"[knn] done in {time.time()-t_all:.1f}s | overrides(0.997,0.999)=({n1},{n2})")

[knn] extracting train D8 features...


  [feat] rot=0 hf=False seen=2560/14175


  [feat] rot=0 hf=False seen=5120/14175


  [feat] rot=0 hf=False seen=7680/14175


  [feat] rot=0 hf=False seen=10240/14175


  [feat] rot=0 hf=False seen=12800/14175


[feat] rot=0 hf=False done in 6.5s


  [feat] rot=0 hf=True seen=2560/14175


  [feat] rot=0 hf=True seen=5120/14175


  [feat] rot=0 hf=True seen=7680/14175


  [feat] rot=0 hf=True seen=10240/14175


  [feat] rot=0 hf=True seen=12800/14175


[feat] rot=0 hf=True done in 6.6s


  [feat] rot=1 hf=False seen=2560/14175


  [feat] rot=1 hf=False seen=5120/14175


  [feat] rot=1 hf=False seen=7680/14175


  [feat] rot=1 hf=False seen=10240/14175


  [feat] rot=1 hf=False seen=12800/14175


[feat] rot=1 hf=False done in 6.7s


  [feat] rot=1 hf=True seen=2560/14175


  [feat] rot=1 hf=True seen=5120/14175


  [feat] rot=1 hf=True seen=7680/14175


  [feat] rot=1 hf=True seen=10240/14175


  [feat] rot=1 hf=True seen=12800/14175


[feat] rot=1 hf=True done in 6.9s


  [feat] rot=2 hf=False seen=2560/14175


  [feat] rot=2 hf=False seen=5120/14175


  [feat] rot=2 hf=False seen=7680/14175


  [feat] rot=2 hf=False seen=10240/14175


  [feat] rot=2 hf=False seen=12800/14175


[feat] rot=2 hf=False done in 6.9s


  [feat] rot=2 hf=True seen=2560/14175


  [feat] rot=2 hf=True seen=5120/14175


  [feat] rot=2 hf=True seen=7680/14175


  [feat] rot=2 hf=True seen=10240/14175


  [feat] rot=2 hf=True seen=12800/14175


[feat] rot=2 hf=True done in 6.8s


  [feat] rot=3 hf=False seen=2560/14175


  [feat] rot=3 hf=False seen=5120/14175


  [feat] rot=3 hf=False seen=7680/14175


  [feat] rot=3 hf=False seen=10240/14175


  [feat] rot=3 hf=False seen=12800/14175


[feat] rot=3 hf=False done in 6.8s


  [feat] rot=3 hf=True seen=2560/14175


  [feat] rot=3 hf=True seen=5120/14175


  [feat] rot=3 hf=True seen=7680/14175


  [feat] rot=3 hf=True seen=10240/14175


  [feat] rot=3 hf=True seen=12800/14175


[feat] rot=3 hf=True done in 6.9s


[knn] extracting test D8 features...


  [feat] rot=0 hf=False seen=2560/3325


[feat] rot=0 hf=False done in 1.9s


  [feat] rot=0 hf=True seen=2560/3325


[feat] rot=0 hf=True done in 1.8s


  [feat] rot=1 hf=False seen=2560/3325


[feat] rot=1 hf=False done in 1.9s


  [feat] rot=1 hf=True seen=2560/3325


[feat] rot=1 hf=True done in 1.9s


  [feat] rot=2 hf=False seen=2560/3325


[feat] rot=2 hf=False done in 1.9s


  [feat] rot=2 hf=True seen=2560/3325


[feat] rot=2 hf=True done in 2.0s


  [feat] rot=3 hf=False seen=2560/3325


[feat] rot=3 hf=False done in 1.9s


  [feat] rot=3 hf=True seen=2560/3325


[feat] rot=3 hf=True done in 1.9s


[knn] fitting NearestNeighbors (cosine)...


[knn] saved submission_knn_0997.csv | overrides: 0 | thresh=0.997
[knn] saved submission_knn_0999.csv | overrides: 0 | thresh=0.999
[knn] done in 72.2s | overrides(0.997,0.999)=(0,0)


In [19]:
# Additional kNN override variants (looser thresholds, different voting) using existing embedding neighbors
import numpy as np, pandas as pd

# Reuse sims, idxs, train_labels, probs_base, test_ids from previous cell
assert 'sims' in globals() and 'idxs' in globals(), 'Run the embedding kNN cell first.'
top1_sim = sims[:,0]
top1_idx = idxs[:,0]
top3 = idxs[:, :3]
top3_sims = sims[:, :3]
top5_lbls = train_labels[idxs]

base_probs = probs_base.copy()

def save_override(mask, labels, fname):
    out_probs = base_probs.copy()
    out_probs[mask] = labels[mask].astype(np.float32)
    out = pd.DataFrame({'id': test_ids['id'], 'has_cactus': out_probs})
    out.to_csv(fname, index=False)
    print(f"[knn-var] saved {fname} | overrides: {int(mask.sum())}")

# Variant A: top-1 sim >= 0.995, take top-1 label
mask_a = top1_sim >= 0.995
labels_a = train_labels[top1_idx]
save_override(mask_a, labels_a, 'submission_knn_0995.csv')

# Variant B: top-1 sim >= 0.993, take top-1 label (more aggressive)
mask_b = top1_sim >= 0.993
labels_b = train_labels[top1_idx]
save_override(mask_b, labels_b, 'submission_knn_0993.csv')

# Variant C: top-3 all highly similar (>=0.995) and unanimous label among top-3
labels_top3 = train_labels[top3]
unanimous_top3 = (labels_top3.min(axis=1) == labels_top3.max(axis=1))
highsim_top3 = (top3_sims.min(axis=1) >= 0.995)
mask_c = unanimous_top3 & highsim_top3
labels_c = labels_top3[:,0]
save_override(mask_c, labels_c, 'submission_knn_top3_0995.csv')

[knn-var] saved submission_knn_0995.csv | overrides: 0
[knn-var] saved submission_knn_0993.csv | overrides: 0
[knn-var] saved submission_knn_top3_0995.csv | overrides: 0


In [20]:
# Raw-pixel cosine kNN overrides (images are 32x32 RGB) on top of clean blend
import numpy as np, pandas as pd, time
from pathlib import Path
from PIL import Image
from sklearn.neighbors import NearestNeighbors

def load_images_matrix(img_dir: str, ids: pd.Series) -> np.ndarray:
    # Returns float32 array (N, 3072), L2-normalized per row
    X = np.empty((len(ids), 32*32*3), dtype=np.float32)
    for i, img_id in enumerate(ids):
        with Image.open(Path(img_dir)/img_id) as im:
            arr = np.asarray(im.convert('RGB'), dtype=np.float32)  # (32,32,3)
        X[i] = arr.reshape(-1)
    # L2 normalize
    norms = np.linalg.norm(X, axis=1, keepdims=True)
    norms = np.clip(norms, 1e-12, None)
    X /= norms
    return X

t0 = time.time()
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('sample_submission.csv')
train_ids = train_df['id']
test_ids_df = test_df[['id']]

print('[pixknn] loading raw pixel matrices...')
X_train = load_images_matrix('train', train_ids)
X_test = load_images_matrix('test', test_ids_df['id'])
y_train = train_df['has_cactus'].astype(np.int8).values
print(f"[pixknn] shapes train={X_train.shape} test={X_test.shape} load_time={time.time()-t0:.1f}s", flush=True)

print('[pixknn] fitting cosine NN...')
nn_model = NearestNeighbors(n_neighbors=5, metric='cosine', n_jobs=-1)
nn_model.fit(X_train)
dists, idxs = nn_model.kneighbors(X_test, n_neighbors=5, return_distance=True)
sims = 1.0 - dists  # cosine similarity
top1_sim = sims[:,0]
top1_idx = idxs[:,0]
top5_lbls = y_train[idxs]
unanimous5 = (top5_lbls.min(axis=1) == top5_lbls.max(axis=1))
vote5 = top5_lbls[:,0]

base = pd.read_csv('submission_blend3_true.csv')
base = base.merge(test_ids_df, on='id', how='right')
probs_base = base['has_cactus'].values.astype(np.float32)

def save_pix_overrides(mask, labels, out_path):
    out_probs = probs_base.copy()
    out_probs[mask] = labels[mask].astype(np.float32)
    out = pd.DataFrame({'id': test_ids_df['id'], 'has_cactus': out_probs})
    out.to_csv(out_path, index=False)
    print(f"[pixknn] saved {out_path} | overrides: {int(mask.sum())}")

# Variant 1: very high sim + unanimous5
mask1 = (top1_sim >= 0.995) & unanimous5
labels1 = vote5
save_pix_overrides(mask1, labels1, 'submission_pixknn_u5_0995.csv')

# Variant 2: high sim top1 only
mask2 = (top1_sim >= 0.995)
labels2 = y_train[top1_idx]
save_pix_overrides(mask2, labels2, 'submission_pixknn_top1_0995.csv')

# Variant 3: slightly looser
mask3 = (top1_sim >= 0.990)
labels3 = y_train[top1_idx]
save_pix_overrides(mask3, labels3, 'submission_pixknn_top1_0990.csv')

print('[pixknn] done.')

[pixknn] loading raw pixel matrices...


[pixknn] shapes train=(14175, 3072) test=(3325, 3072) load_time=3.9s


[pixknn] fitting cosine NN...


[pixknn] saved submission_pixknn_u5_0995.csv | overrides: 202
[pixknn] saved submission_pixknn_top1_0995.csv | overrides: 204
[pixknn] saved submission_pixknn_top1_0990.csv | overrides: 403
[pixknn] done.


In [21]:
# Set final submission to conservative raw-pixel kNN unanimous5 overrides
import pandas as pd
chosen = 'submission_pixknn_u5_0995.csv'
df = pd.read_csv(chosen)
df.to_csv('submission.csv', index=False)
print('submission.csv written from', chosen, '| shape =', df.shape)

submission.csv written from submission_pixknn_u5_0995.csv | shape = (3325, 2)


In [None]:
# SSIM-based nearest-neighbor overrides (conservative) on top of clean 3-model blend
import numpy as np, pandas as pd, time, subprocess, sys
from pathlib import Path
from PIL import Image
from sklearn.neighbors import NearestNeighbors
try:
    from skimage.metrics import structural_similarity as ssim
except Exception:
    print('[ssim] scikit-image not found. Installing...')
    subprocess.run([sys.executable, '-m', 'pip', 'install', 'scikit-image', '--quiet'], check=False)
    from skimage.metrics import structural_similarity as ssim

def load_flat32(img_dir: str, ids: pd.Series) -> np.ndarray:
    X = np.empty((len(ids), 32*32*3), dtype=np.float32)
    for i, img_id in enumerate(ids):
        with Image.open(Path(img_dir)/img_id) as im:
            arr = np.asarray(im.convert('RGB'), dtype=np.float32)
        X[i] = arr.reshape(-1)
    norms = np.linalg.norm(X, axis=1, keepdims=True)
    X /= np.clip(norms, 1e-12, None)
    return X

def load_gray32(path: Path) -> np.ndarray:
    with Image.open(path) as im:
        im = im.convert('L')  # grayscale
        arr = np.asarray(im, dtype=np.float32) / 255.0
    return arr

t0 = time.time()
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('sample_submission.csv')
train_ids = train_df['id']
test_ids_df = test_df[['id']]
y_train = train_df['has_cactus'].astype(np.int8).values

print('[ssim] building cosine top1 index on raw 32x32...')
X_tr = load_flat32('train', train_ids)
X_te = load_flat32('test', test_ids_df['id'])
nn = NearestNeighbors(n_neighbors=1, metric='cosine', n_jobs=-1).fit(X_tr)
dists, idxs = nn.kneighbors(X_te, n_neighbors=1, return_distance=True)
cos_top1 = 1.0 - dists.squeeze(1)
top1_idx = idxs.squeeze(1)
print('[ssim] NN ready. computing SSIM for candidate pairs...')

train_dir = Path('train'); test_dir = Path('test')
ssims = np.zeros(len(test_ids_df), dtype=np.float32)
for i, tid in enumerate(test_ids_df['id']):
    tr_id = train_ids.iloc[top1_idx[i]]
    g_test = load_gray32(test_dir / tid)
    g_train = load_gray32(train_dir / tr_id)
    ssims[i] = ssim(g_test, g_train, data_range=1.0)
    if (i+1) % 500 == 0:
        print(f'  [ssim] {i+1}/{len(test_ids_df)} computed', flush=True)

base = pd.read_csv('submission_blend3_true.csv')
base = base.merge(test_ids_df, on='id', how='right')
probs_base = base['has_cactus'].values.astype(np.float32)

def save_ssim_override(ssim_thresh: float, cos_thresh: float, out_path: str):
    mask = (ssims >= ssim_thresh) & (cos_top1 >= cos_thresh)
    labels = y_train[top1_idx].astype(np.float32)
    out_probs = probs_base.copy()
    out_probs[mask] = labels[mask]
    out = pd.DataFrame({'id': test_ids_df['id'], 'has_cactus': out_probs})
    out.to_csv(out_path, index=False)
    print(f"[ssim] saved {out_path} | overrides: {int(mask.sum())} | ssim>= {ssim_thresh} & cos>= {cos_thresh}")

# Conservative and moderate variants per expert advice
save_ssim_override(0.99, 0.998, 'submission_ssim_0990_cos0998.csv')
save_ssim_override(0.985, 0.995, 'submission_ssim_0985_cos0995.csv')
print(f"[ssim] done in {time.time()-t0:.1f}s")

# Set submission.csv to the most conservative SSIM override
cons = 'submission_ssim_0990_cos0998.csv'
df = pd.read_csv(cons)
df.to_csv('submission.csv', index=False)
print('submission.csv written from', cons, '| shape =', df.shape)

[ssim] building cosine top1 index on raw 32x32...
