# Plan: Aerial Cactus Identification (Kernels-Only Emulation)

Objectives:
- Produce submission.csv with test predictions.
- Target AUC-ROC ~1.0 (medal-level).

Data:
- train.csv: id,label
- train.zip: training images
- test.zip: test images
- sample_submission.csv: required format

High-Level Approach:
1) Setup & Unzip: Extract train.zip and test.zip. Verify counts.
2) EDA/Checks: Class balance, image size, basic sanity-check.
3) Cross-Validation: Stratified KFold (e.g., 5 folds).
4) Model: Simple but strong image classifier.
   - Preferred: PyTorch + timm (efficientnet_b0 or resnet18) with pretrained weights.
   - If timm unavailable: torchvision resnet18 pretrained.
   - Augmentations via Albumentations.
5) Training:
   - Input size: 128 or 224.
   - Loss: BCEWithLogitsLoss.
   - Optimizer: AdamW, OneCycle or cosine schedule.
   - Early stopping on AUC; 5–10 epochs per fold (dataset is small).
6) Inference:
   - TTA (horizontal/vertical flips) if time allows.
   - Average fold predictions.
7) Submission: Create submission.csv with id, has_cactus.

Efficiency/Logging:
- Print fold indices, times, and epoch logs with AUC.
- Keep training light; interrupt if diminishing returns.

Checkpoints for Expert Review:
- After this plan.
- After data load/unzip & EDA.
- After baseline CV model and initial AUC.
- Before long trainings or grid searches.
- After validation AUC and before final inference.

Risk Mitigation:
- If installs heavy, fall back to torchvision-only resnet18.
- If torch unavailable, quick Keras/TF CNN.
- If time-critical, train single model no-CV and submit.

Next:
- Get expert feedback on plan; then implement unzip + data loaders.

In [3]:
# Setup: imports, seed, unzip, quick EDA
import os, sys, time, random, zipfile, math, gc
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

SEED = 42
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

DATA_DIR = Path('.')
TRAIN_ZIP = DATA_DIR / 'train.zip'
TEST_ZIP = DATA_DIR / 'test.zip'
TRAIN_DIR = DATA_DIR / 'train'
TEST_DIR = DATA_DIR / 'test'

def safe_unzip(zip_path, out_dir):
    if out_dir.exists() and any(out_dir.iterdir()):
        print(f"Exists: {out_dir}, skipping unzip.")
        return
    print(f"Unzipping {zip_path} -> {out_dir}")
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(out_dir)
    print(f"Done unzip: {out_dir}")

safe_unzip(TRAIN_ZIP, TRAIN_DIR)
safe_unzip(TEST_ZIP, TEST_DIR)

# Load CSVs
train_df = pd.read_csv(DATA_DIR / 'train.csv')
sub_df = pd.read_csv(DATA_DIR / 'sample_submission.csv')
print('train_df shape:', train_df.shape)
print('sub_df shape:', sub_df.shape)
print('train head:\n', train_df.head())

# Basic checks
print('Unique labels:', train_df['has_cactus'].value_counts().to_dict())
pos_rate = train_df['has_cactus'].mean()
print(f'Pos rate: {pos_rate:.4f}')

# Verify a few images and size
sample_ids = train_df['id'].head(3).tolist()
sizes = []
for sid in sample_ids:
    p = TRAIN_DIR / sid
    with Image.open(p) as im:
        sizes.append(im.size)
print('Sample image sizes:', sizes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

Unzipping train.zip -> train


Done unzip: train
Unzipping test.zip -> test


Done unzip: test
train_df shape: (14175, 2)
sub_df shape: (3325, 2)
train head:
                                      id  has_cactus
0  2de8f189f1dce439766637e75df0ee27.jpg           1
1  36704d250f236238e7f996812c48235d.jpg           1
2  eacde22fdc8c175972a5768e3daa8bc9.jpg           1
3  5d442f834da5e57d22b24802c32a8ca8.jpg           1
4  152491e0daf75c0e669400300ff7e645.jpg           1
Unique labels: {1: 10628, 0: 3547}
Pos rate: 0.7498
Sample image sizes: [(32, 32), (32, 32), (32, 32)]
Device: cuda


In [2]:
# Install PyTorch (CUDA 12.1 wheels) and torchvision
import sys, subprocess, time
start = time.time()
print('Installing torch/torchvision...')
cmd = [sys.executable, '-m', 'pip', 'install', '--quiet', '--upgrade', 'pip']
subprocess.run(cmd, check=True)
cmd = [sys.executable, '-m', 'pip', 'install', '--quiet', '--index-url', 'https://download.pytorch.org/whl/cu121', 'torch', 'torchvision']
subprocess.run(cmd, check=True)
print(f'Install done in {time.time()-start:.1f}s')

# Quick import test
import torch, torchvision
print('torch version:', torch.__version__)
print('torchvision version:', torchvision.__version__)
print('CUDA available:', torch.cuda.is_available())

Installing torch/torchvision...




[0m

Install done in 106.9s


torch version: 2.5.1+cu121
torchvision version: 0.20.1+cu121
CUDA available: True


In [4]:
# Dataset, Model, 5-fold Training, Inference, Submission
import time, copy
from collections import defaultdict

class CactusDataset(Dataset):
    def __init__(self, df, img_dir, transforms=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.transforms = transforms
        self.has_label = 'has_cactus' in self.df.columns
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / row['id']
        with Image.open(img_path) as im:
            im = im.convert('RGB')
            img = im
        if self.transforms is not None:
            img = self.transforms(img)
        if self.has_label:
            label = torch.tensor(row['has_cactus'], dtype=torch.float32)
            return img, label
        else:
            return img, row['id']

# Transforms
train_tfms = T.Compose([
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.5),
    T.ColorJitter(brightness=0.1, contrast=0.1),
    T.ToTensor(),
    T.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])
valid_tfms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

class SmallCNN(nn.Module):
    def __init__(self):
        super().__init__()
        def block(in_c, out_c):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, 3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_c, out_c, 3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2)
            )
        self.features = nn.Sequential(
            block(3, 32),   # 32->16
            block(32, 64),  # 16->8
            block(64, 128)  # 8->4
        )
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(0.2)
        self.head = nn.Linear(128, 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x).flatten(1)
        x = self.dropout(x)
        x = self.head(x)
        return x

def compute_auc(y_true, y_pred):
    try:
        return roc_auc_score(y_true, y_pred)
    except ValueError:
        return float('nan')

def train_one_fold(fold, trn_df, val_df, epochs=12, batch_size=256, lr=1e-3, wd=1e-4, min_epochs=5, patience=3):
    model = SmallCNN().to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))
    loss_fn = nn.BCEWithLogitsLoss()

    train_ds = CactusDataset(trn_df, TRAIN_DIR, transforms=train_tfms)
    val_ds = CactusDataset(val_df, TRAIN_DIR, transforms=valid_tfms)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=False)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)

    best_auc = -1.0
    best_state = None
    no_improve = 0
    start_time = time.time()

    for epoch in range(1, epochs+1):
        ep_start = time.time()
        # Warmup (simple): linearly scale lr for first 2 epochs
        if epoch <= 2:
            for pg in optimizer.param_groups:
                pg['lr'] = lr * epoch / 2.0
        model.train()
        trn_loss = 0.0
        n_batches = 0
        for xb, yb in train_loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True).view(-1,1)
            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
                logits = model(xb)
                loss = loss_fn(logits, yb)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            trn_loss += loss.item()
            n_batches += 1
        scheduler.step()
        trn_loss /= max(1, n_batches)

        # Validation
        model.eval()
        val_probs = []
        val_targets = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(device, non_blocking=True)
                logits = model(xb)
                probs = torch.sigmoid(logits).squeeze(1).detach().cpu().numpy()
                val_probs.append(probs)
                val_targets.append(yb.numpy())
        val_probs = np.concatenate(val_probs)
        val_targets = np.concatenate(val_targets)
        val_auc = compute_auc(val_targets, val_probs)
        elapsed_ep = time.time() - ep_start
        print(f"Fold {fold} Epoch {epoch}/{epochs} - trn_loss {trn_loss:.4f} val_auc {val_auc:.6f} time {elapsed_ep:.1f}s")
        # Early stopping
        improved = val_auc > best_auc + 1e-6
        if improved:
            best_auc = val_auc
            best_state = copy.deepcopy(model.state_dict())
            no_improve = 0
        else:
            no_improve += 1
        if epoch >= min_epochs and no_improve >= patience:
            print(f"Early stopping at epoch {epoch}. Best val_auc {best_auc:.6f}")
            break

    total_time = time.time() - start_time
    print(f"Fold {fold} training done in {total_time/60:.2f} min. Best AUC {best_auc:.6f}")
    if best_state is not None:
        model.load_state_dict(best_state)
    # OOF for this fold
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
    model.eval()
    oof_probs = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device, non_blocking=True)
            logits = model(xb)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            oof_probs.append(probs)
    oof_probs = np.concatenate(oof_probs)
    return model, oof_probs, best_auc

# Prepare folds
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
train_df = train_df.sample(frac=1.0, random_state=SEED).reset_index(drop=True)  # shuffle once
oof = np.zeros(len(train_df), dtype=np.float32)
fold_models = []
fold_aucs = []

for fold, (trn_idx, val_idx) in enumerate(skf.split(train_df['id'], train_df['has_cactus']), start=1):
    trn_df = train_df.iloc[trn_idx].reset_index(drop=True)
    val_df = train_df.iloc[val_idx].reset_index(drop=True)
    print(f"========== Fold {fold} (n_trn={len(trn_df)} n_val={len(val_df)}) ==========")
    model, val_probs, best_auc = train_one_fold(
        fold=fold, trn_df=trn_df, val_df=val_df,
        epochs=12, batch_size=256, lr=1e-3, wd=1e-4, min_epochs=5, patience=3
    )
    oof[val_idx] = val_probs
    fold_models.append(model)
    fold_aucs.append(best_auc)
    gc.collect()
    torch.cuda.empty_cache()

oof_auc = compute_auc(train_df['has_cactus'].values, oof)
print(f"OOF AUC: {oof_auc:.6f}")
print("Fold AUCs:", [float(f"{a:.6f}") for a in fold_aucs])

# Inference with TTA (original, hflip, vflip, rot90) averaged across folds
def tta_preds(models, img_tensor):
    # img_tensor: [B,3,32,32] normalized
    with torch.no_grad():
        logits_agg = 0.0
        for m in models:
            m.eval()
            # original
            logits = m(img_tensor)
            logits_agg += logits
            # hflip
            logits_agg += m(torch.flip(img_tensor, dims=[3]))
            # vflip
            logits_agg += m(torch.flip(img_tensor, dims=[2]))
            # rot90 (k=1)
            logits_agg += m(torch.rot90(img_tensor, k=1, dims=[2,3]))
        logits_agg = logits_agg / (len(models) * 4.0)
        probs = torch.sigmoid(logits_agg).squeeze(1)
    return probs

test_ds = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms)
test_loader = DataLoader(test_ds, batch_size=512, shuffle=False, num_workers=4, pin_memory=True)
all_probs = []
start_inf = time.time()
with torch.no_grad():
    for xb, ids in test_loader:
        xb = xb.to(device, non_blocking=True)
        probs = tta_preds(fold_models, xb).cpu().numpy()
        all_probs.append(probs)
all_probs = np.concatenate(all_probs)
print(f"Test inference done in {time.time()-start_inf:.1f}s")

# Create submission in sample order
submission = sub_df.copy()
submission['has_cactus'] = all_probs.astype(float)
submission.to_csv('submission.csv', index=False)
print('Saved submission.csv with shape:', submission.shape)
submission.head()



  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 1/12 - trn_loss 0.1824 val_auc 0.996398 time 4.1s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 2/12 - trn_loss 0.0816 val_auc 0.997705 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 3/12 - trn_loss 0.0592 val_auc 0.999294 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 4/12 - trn_loss 0.0516 val_auc 0.998784 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 5/12 - trn_loss 0.0339 val_auc 0.999463 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 6/12 - trn_loss 0.0275 val_auc 0.999093 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 7/12 - trn_loss 0.0246 val_auc 0.998875 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 8/12 - trn_loss 0.0198 val_auc 0.999626 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 9/12 - trn_loss 0.0170 val_auc 0.999701 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 10/12 - trn_loss 0.0113 val_auc 0.999647 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 11/12 - trn_loss 0.0095 val_auc 0.999619 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 1 Epoch 12/12 - trn_loss 0.0076 val_auc 0.999792 time 3.6s
Fold 1 training done in 0.74 min. Best AUC 0.999792




  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 1/12 - trn_loss 0.1983 val_auc 0.994566 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 2/12 - trn_loss 0.0906 val_auc 0.998635 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 3/12 - trn_loss 0.0554 val_auc 0.997253 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 4/12 - trn_loss 0.0467 val_auc 0.998887 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 5/12 - trn_loss 0.0357 val_auc 0.999668 time 3.4s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 6/12 - trn_loss 0.0298 val_auc 0.999813 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 7/12 - trn_loss 0.0250 val_auc 0.999550 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 8/12 - trn_loss 0.0233 val_auc 0.999854 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 9/12 - trn_loss 0.0176 val_auc 0.999877 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 10/12 - trn_loss 0.0143 val_auc 0.999862 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 11/12 - trn_loss 0.0125 val_auc 0.999876 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 2 Epoch 12/12 - trn_loss 0.0117 val_auc 0.999932 time 3.5s
Fold 2 training done in 0.72 min. Best AUC 0.999932




  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 1/12 - trn_loss 0.1665 val_auc 0.993516 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 2/12 - trn_loss 0.0800 val_auc 0.997751 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 3/12 - trn_loss 0.0518 val_auc 0.993817 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 4/12 - trn_loss 0.0518 val_auc 0.998712 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 5/12 - trn_loss 0.0418 val_auc 0.998805 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 6/12 - trn_loss 0.0282 val_auc 0.999496 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 7/12 - trn_loss 0.0247 val_auc 0.999402 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 8/12 - trn_loss 0.0177 val_auc 0.999464 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 9/12 - trn_loss 0.0163 val_auc 0.999720 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 10/12 - trn_loss 0.0130 val_auc 0.999574 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 11/12 - trn_loss 0.0107 val_auc 0.999759 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 3 Epoch 12/12 - trn_loss 0.0111 val_auc 0.999794 time 3.6s
Fold 3 training done in 0.72 min. Best AUC 0.999794




  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 1/12 - trn_loss 0.1859 val_auc 0.991696 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 2/12 - trn_loss 0.0853 val_auc 0.996551 time 3.8s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 3/12 - trn_loss 0.0664 val_auc 0.998479 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 4/12 - trn_loss 0.0487 val_auc 0.999221 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 5/12 - trn_loss 0.0333 val_auc 0.995456 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 6/12 - trn_loss 0.0290 val_auc 0.998598 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 7/12 - trn_loss 0.0229 val_auc 0.999611 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 8/12 - trn_loss 0.0181 val_auc 0.999665 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 9/12 - trn_loss 0.0144 val_auc 0.999791 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 10/12 - trn_loss 0.0119 val_auc 0.999818 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 11/12 - trn_loss 0.0108 val_auc 0.999779 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 4 Epoch 12/12 - trn_loss 0.0096 val_auc 0.999810 time 3.5s
Fold 4 training done in 0.72 min. Best AUC 0.999818




  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=='cuda'))


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 1/12 - trn_loss 0.1689 val_auc 0.995291 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 2/12 - trn_loss 0.0859 val_auc 0.992425 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 3/12 - trn_loss 0.0573 val_auc 0.998798 time 3.8s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 4/12 - trn_loss 0.0420 val_auc 0.999245 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 5/12 - trn_loss 0.0357 val_auc 0.999356 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 6/12 - trn_loss 0.0340 val_auc 0.999779 time 3.5s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 7/12 - trn_loss 0.0215 val_auc 0.999344 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 8/12 - trn_loss 0.0202 val_auc 0.999807 time 3.7s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 9/12 - trn_loss 0.0135 val_auc 0.999895 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 10/12 - trn_loss 0.0129 val_auc 0.999922 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 11/12 - trn_loss 0.0095 val_auc 0.999955 time 3.6s


  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):


Fold 5 Epoch 12/12 - trn_loss 0.0083 val_auc 0.999961 time 3.5s
Fold 5 training done in 0.72 min. Best AUC 0.999961


OOF AUC: 0.999864
Fold AUCs: [0.999792, 0.999932, 0.999794, 0.999818, 0.999961]


Test inference done in 2.4s
Saved submission.csv with shape: (3325, 2)


Unnamed: 0,id,has_cactus
0,09034a34de0e2015a8a28dfe18f423f6.jpg,0.999596
1,134f04305c795d6d202502c2ce3578f3.jpg,0.999914
2,41fad8d145e6c41868ce3617e30a2545.jpg,0.999906
3,35f8a11352c8d41b6231bb33d8d09f7e.jpg,0.999976
4,b77dc902b035887cbbc01920ce0e3151.jpg,0.999909


In [6]:
# Backup Track B: ResNet18 pretrained 5-fold + Ensemble with SmallCNN
import os
import torchvision.models as models
from torchvision.models import ResNet18_Weights

# Ensure a writable cache directory for pretrained weights (avoid read-only /app/.cache)
TORCH_CACHE = Path('./torch_cache')
TORCH_CACHE.mkdir(exist_ok=True, parents=True)
os.environ['TORCH_HOME'] = str(TORCH_CACHE.resolve())
os.environ['XDG_CACHE_HOME'] = str(TORCH_CACHE.resolve())  # force torch.hub to use ./torch_cache/torch
import torch.hub as hub
hub.set_dir(str(TORCH_CACHE.resolve()))
print('TORCH_HOME:', os.environ.get('TORCH_HOME'))
print('XDG_CACHE_HOME:', os.environ.get('XDG_CACHE_HOME'))

IMG_SIZE = 128
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

train_tfms_res = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE), interpolation=T.InterpolationMode.BICUBIC),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.5),
    T.ColorJitter(brightness=0.1, contrast=0.1),
    T.ToTensor(),
    T.Normalize(mean=imagenet_mean, std=imagenet_std)
])
valid_tfms_res = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE), interpolation=T.InterpolationMode.BICUBIC),
    T.ToTensor(),
    T.Normalize(mean=imagenet_mean, std=imagenet_std)
])

class ResNet18Binary(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        in_feats = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_feats, 1)
    def forward(self, x):
        return self.backbone(x)

def train_one_fold_resnet(fold, trn_df, val_df, epochs=8, batch_size=128, lr=1e-3, wd=1e-4, min_epochs=4, patience=2):
    model = ResNet18Binary().to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = torch.amp.GradScaler('cuda', enabled=(device.type=='cuda'))
    loss_fn = nn.BCEWithLogitsLoss()

    train_ds = CactusDataset(trn_df, TRAIN_DIR, transforms=train_tfms_res)
    val_ds = CactusDataset(val_df, TRAIN_DIR, transforms=valid_tfms_res)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=False)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)

    best_auc = -1.0
    best_state = None
    no_improve = 0

    for epoch in range(1, epochs+1):
        if epoch <= 2:
            for pg in optimizer.param_groups:
                pg['lr'] = lr * epoch / 2.0
        model.train()
        trn_loss = 0.0
        nb = 0
        ep_start = time.time()
        for xb, yb in train_loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True).view(-1,1)
            optimizer.zero_grad(set_to_none=True)
            with torch.amp.autocast('cuda', enabled=(device.type=='cuda')):
                logits = model(xb)
                loss = loss_fn(logits, yb)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            trn_loss += loss.item()
            nb += 1
        scheduler.step()
        trn_loss /= max(1, nb)

        # Validation
        model.eval()
        val_probs, val_tgts = [], []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(device, non_blocking=True)
                logits = model(xb)
                probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
                val_probs.append(probs)
                val_tgts.append(yb.numpy())
        val_probs = np.concatenate(val_probs)
        val_tgts = np.concatenate(val_tgts)
        val_auc = compute_auc(val_tgts, val_probs)
        print(f"[ResNet] Fold {fold} Epoch {epoch}/{epochs} - trn_loss {trn_loss:.4f} val_auc {val_auc:.6f} time {time.time()-ep_start:.1f}s")
        if val_auc > best_auc + 1e-6:
            best_auc = val_auc
            best_state = copy.deepcopy(model.state_dict())
            no_improve = 0
        else:
            no_improve += 1
        if epoch >= min_epochs and no_improve >= patience:
            print(f"[ResNet] Early stopping at epoch {epoch}. Best AUC {best_auc:.6f}")
            break

    if best_state is not None:
        model.load_state_dict(best_state)
    # Return OOF predictions for this fold
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
    model.eval()
    oof_probs = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device, non_blocking=True)
            logits = model(xb)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            oof_probs.append(probs)
    return model, np.concatenate(oof_probs), best_auc

def tta_preds_resnet(models, img_tensor):
    with torch.no_grad():
        logits_agg = 0.0
        for m in models:
            m.eval()
            logits_agg += m(img_tensor)
            logits_agg += m(torch.flip(img_tensor, dims=[3]))
            logits_agg += m(torch.flip(img_tensor, dims=[2]))
            logits_agg += m(torch.rot90(img_tensor, k=1, dims=[2,3]))
        logits_agg = logits_agg / (len(models) * 4.0)
        probs = torch.sigmoid(logits_agg).squeeze(1)
    return probs

# Train 5-fold ResNet18
skf2 = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
oof_res = np.zeros(len(train_df), dtype=np.float32)
res_models = []
res_fold_aucs = []
for fold, (trn_idx, val_idx) in enumerate(skf2.split(train_df['id'], train_df['has_cactus']), start=1):
    trn_df = train_df.iloc[trn_idx].reset_index(drop=True)
    val_df = train_df.iloc[val_idx].reset_index(drop=True)
    print(f"[ResNet] ===== Fold {fold} (n_trn={len(trn_df)} n_val={len(val_df)}) =====")
    model, val_probs, best_auc = train_one_fold_resnet(
        fold=fold, trn_df=trn_df, val_df=val_df,
        epochs=8, batch_size=128, lr=1e-3, wd=1e-4, min_epochs=4, patience=2
    )
    oof_res[val_idx] = val_probs
    res_models.append(model)
    res_fold_aucs.append(best_auc)
    gc.collect(); torch.cuda.empty_cache()

oof_auc_res = compute_auc(train_df['has_cactus'].values, oof_res)
print(f"[ResNet] OOF AUC: {oof_auc_res:.6f}")
print("[ResNet] Fold AUCs:", [float(f"{a:.6f}") for a in res_fold_aucs])

# Test inference for ResNet
test_ds_res = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms_res)
test_loader_res = DataLoader(test_ds_res, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
all_probs_res = []
with torch.no_grad():
    for xb, ids in test_loader_res:
        xb = xb.to(device, non_blocking=True)
        probs = tta_preds_resnet(res_models, xb).cpu().numpy()
        all_probs_res.append(probs)
all_probs_res = np.concatenate(all_probs_res)

# Recompute SmallCNN test preds (ensure fresh loader) and ensemble
test_ds_small = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms)
test_loader_small = DataLoader(test_ds_small, batch_size=512, shuffle=False, num_workers=4, pin_memory=True)
all_probs_small = []
with torch.no_grad():
    for xb, ids in test_loader_small:
        xb = xb.to(device, non_blocking=True)
        probs = tta_preds(fold_models, xb).cpu().numpy()
        all_probs_small.append(probs)
all_probs_small = np.concatenate(all_probs_small)

final_probs = (all_probs_small + all_probs_res) / 2.0
submission2 = sub_df.copy()
submission2['has_cactus'] = final_probs.astype(float)
submission2.to_csv('submission.csv', index=False)
print('Ensembled submission saved. Shape:', submission2.shape)
submission2.head()

TORCH_HOME: /app/agent_run_states/aerial-cactus-identification-spray-20250910-034735/torch_cache
XDG_CACHE_HOME: /app/agent_run_states/aerial-cactus-identification-spray-20250910-034735/torch_cache
[ResNet] ===== Fold 1 (n_trn=11340 n_val=2835) =====


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /app/agent_run_states/aerial-cactus-identification-spray-20250910-034735/torch_cache/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

 36%|███▌      | 16.1M/44.7M [00:00<00:00, 168MB/s]

 88%|████████▊ | 39.2M/44.7M [00:00<00:00, 212MB/s]

100%|██████████| 44.7M/44.7M [00:00<00:00, 209MB/s]




[ResNet] Fold 1 Epoch 1/8 - trn_loss 0.0536 val_auc 0.999820 time 7.2s


[ResNet] Fold 1 Epoch 2/8 - trn_loss 0.0319 val_auc 0.999303 time 6.9s


[ResNet] Fold 1 Epoch 3/8 - trn_loss 0.0401 val_auc 0.999626 time 6.9s


[ResNet] Fold 1 Epoch 4/8 - trn_loss 0.0220 val_auc 0.999976 time 7.0s


[ResNet] Fold 1 Epoch 5/8 - trn_loss 0.0145 val_auc 0.999969 time 6.9s


[ResNet] Fold 1 Epoch 6/8 - trn_loss 0.0032 val_auc 0.999997 time 6.9s


[ResNet] Fold 1 Epoch 7/8 - trn_loss 0.0015 val_auc 1.000000 time 6.9s


[ResNet] Fold 1 Epoch 8/8 - trn_loss 0.0004 val_auc 1.000000 time 6.8s


[ResNet] ===== Fold 2 (n_trn=11340 n_val=2835) =====


[ResNet] Fold 2 Epoch 1/8 - trn_loss 0.0557 val_auc 0.999903 time 6.9s


[ResNet] Fold 2 Epoch 2/8 - trn_loss 0.0336 val_auc 0.999625 time 6.9s


[ResNet] Fold 2 Epoch 3/8 - trn_loss 0.0189 val_auc 0.999662 time 6.9s


[ResNet] Fold 2 Epoch 4/8 - trn_loss 0.0235 val_auc 0.999122 time 6.9s
[ResNet] Early stopping at epoch 4. Best AUC 0.999903


[ResNet] ===== Fold 3 (n_trn=11340 n_val=2835) =====


[ResNet] Fold 3 Epoch 1/8 - trn_loss 0.0533 val_auc 0.999916 time 7.0s


[ResNet] Fold 3 Epoch 2/8 - trn_loss 0.0287 val_auc 0.999064 time 6.9s


[ResNet] Fold 3 Epoch 3/8 - trn_loss 0.0273 val_auc 0.999699 time 7.0s


[ResNet] Fold 3 Epoch 4/8 - trn_loss 0.0196 val_auc 0.999203 time 6.9s
[ResNet] Early stopping at epoch 4. Best AUC 0.999916


[ResNet] ===== Fold 4 (n_trn=11340 n_val=2835) =====


[ResNet] Fold 4 Epoch 1/8 - trn_loss 0.0520 val_auc 0.999900 time 6.9s


[ResNet] Fold 4 Epoch 2/8 - trn_loss 0.0422 val_auc 0.999239 time 6.9s


[ResNet] Fold 4 Epoch 3/8 - trn_loss 0.0289 val_auc 0.999856 time 6.9s


[ResNet] Fold 4 Epoch 4/8 - trn_loss 0.0286 val_auc 0.999834 time 6.9s
[ResNet] Early stopping at epoch 4. Best AUC 0.999900


[ResNet] ===== Fold 5 (n_trn=11340 n_val=2835) =====


[ResNet] Fold 5 Epoch 1/8 - trn_loss 0.0885 val_auc 0.999910 time 6.9s


[ResNet] Fold 5 Epoch 2/8 - trn_loss 0.0441 val_auc 0.999200 time 6.9s


[ResNet] Fold 5 Epoch 3/8 - trn_loss 0.0258 val_auc 0.999645 time 6.9s


[ResNet] Fold 5 Epoch 4/8 - trn_loss 0.0210 val_auc 0.999992 time 6.9s


[ResNet] Fold 5 Epoch 5/8 - trn_loss 0.0164 val_auc 1.000000 time 6.9s


[ResNet] Fold 5 Epoch 6/8 - trn_loss 0.0042 val_auc 1.000000 time 6.8s


[ResNet] Fold 5 Epoch 7/8 - trn_loss 0.0029 val_auc 1.000000 time 7.1s
[ResNet] Early stopping at epoch 7. Best AUC 1.000000


[ResNet] OOF AUC: 0.999919
[ResNet] Fold AUCs: [1.0, 0.999903, 0.999916, 0.9999, 1.0]


Ensembled submission saved. Shape: (3325, 2)


Unnamed: 0,id,has_cactus
0,09034a34de0e2015a8a28dfe18f423f6.jpg,0.999798
1,134f04305c795d6d202502c2ce3578f3.jpg,0.999956
2,41fad8d145e6c41868ce3617e30a2545.jpg,0.999952
3,35f8a11352c8d41b6231bb33d8d09f7e.jpg,0.999987
4,b77dc902b035887cbbc01920ce0e3151.jpg,0.999953


In [7]:
# Recompute test predictions with stronger 8-way TTA and re-save submission
import torch
def tta8_preds(models, x):
    # x: [B,3,H,W]
    outs = 0.0
    trans = []
    trans.append(x)  # 0 deg
    trans.append(torch.flip(x, dims=[3]))  # hflip
    trans.append(torch.flip(x, dims=[2]))  # vflip
    trans.append(torch.rot90(x, k=1, dims=[2,3]))  # 90
    trans.append(torch.rot90(x, k=2, dims=[2,3]))  # 180
    trans.append(torch.rot90(x, k=3, dims=[2,3]))  # 270
    trans.append(torch.flip(torch.rot90(x, k=1, dims=[2,3]), dims=[3]))  # 90 + hflip
    trans.append(torch.flip(torch.rot90(x, k=1, dims=[2,3]), dims=[2]))  # 90 + vflip
    with torch.no_grad():
        for m in models:
            m.eval()
            logits_sum = 0.0
            for t in trans:
                logits_sum = logits_sum + m(t)
            outs = outs + logits_sum / len(trans)
    outs = outs / len(models)
    probs = torch.sigmoid(outs).squeeze(1)
    return probs

# SmallCNN test preds with 8-way TTA
test_ds_small8 = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms)
test_loader_small8 = DataLoader(test_ds_small8, batch_size=512, shuffle=False, num_workers=4, pin_memory=True)
probs_small8 = []
with torch.no_grad():
    for xb, ids in test_loader_small8:
        xb = xb.to(device, non_blocking=True)
        probs_small8.append(tta8_preds(fold_models, xb).cpu().numpy())
probs_small8 = np.concatenate(probs_small8)

# ResNet18 test preds with 8-way TTA
test_ds_res8 = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms_res)
test_loader_res8 = DataLoader(test_ds_res8, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
probs_res8 = []
with torch.no_grad():
    for xb, ids in test_loader_res8:
        xb = xb.to(device, non_blocking=True)
        probs_res8.append(tta8_preds(res_models, xb).cpu().numpy())
probs_res8 = np.concatenate(probs_res8)

# Weighted ensemble (lean slightly on ResNet)
final_probs8 = (0.6 * probs_res8 + 0.4 * probs_small8).astype(float)
submission8 = sub_df.copy()
submission8['has_cactus'] = final_probs8
submission8.to_csv('submission.csv', index=False)
print('Saved enhanced TTA submission.csv. Shape:', submission8.shape)
submission8.head()

Saved enhanced TTA submission.csv. Shape: (3325, 2)


Unnamed: 0,id,has_cactus
0,09034a34de0e2015a8a28dfe18f423f6.jpg,0.999739
1,134f04305c795d6d202502c2ce3578f3.jpg,0.999962
2,41fad8d145e6c41868ce3617e30a2545.jpg,0.999972
3,35f8a11352c8d41b6231bb33d8d09f7e.jpg,0.999991
4,b77dc902b035887cbbc01920ce0e3151.jpg,0.999964


In [8]:
# Add pixel-level Logistic Regression CV model and ensemble with current submission
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

def load_flat_pixels(df, img_dir):
    X = np.zeros((len(df), 32*32*3), dtype=np.float32)
    for i, img_id in enumerate(df['id'].values):
        with Image.open(Path(img_dir)/img_id) as im:
            im = im.convert('RGB')
            arr = np.asarray(im, dtype=np.float32) / 255.0
        X[i] = arr.reshape(-1)
        if (i+1) % 5000 == 0:
            print(f"Loaded {i+1}/{len(df)} images...", flush=True)
    return X

print('Loading flat pixel features for train/test...')
X = load_flat_pixels(train_df, TRAIN_DIR)
X_test = load_flat_pixels(sub_df, TEST_DIR)
y = train_df['has_cactus'].values.astype(np.int32)
print('Shapes:', X.shape, X_test.shape)

skf_lr = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
oof_lr = np.zeros(len(train_df), dtype=np.float32)
test_preds_lr = np.zeros(len(sub_df), dtype=np.float32)

fold = 1
for trn_idx, val_idx in skf_lr.split(X, y):
    print(f"[LR] Fold {fold}: train {len(trn_idx)} val {len(val_idx)}")
    X_tr, X_va = X[trn_idx], X[val_idx]
    y_tr, y_va = y[trn_idx], y[val_idx]
    scaler = StandardScaler(with_mean=True, with_std=True)
    X_trs = scaler.fit_transform(X_tr)
    X_vas = scaler.transform(X_va)
    X_tes = scaler.transform(X_test)
    clf = LogisticRegression(max_iter=500, C=1.0, solver='lbfgs')
    clf.fit(X_trs, y_tr)
    oof_lr[val_idx] = clf.predict_proba(X_vas)[:,1]
    test_preds_lr += clf.predict_proba(X_tes)[:,1] / skf_lr.n_splits
    fold += 1

auc_lr = roc_auc_score(y, oof_lr)
print(f"[LR] OOF AUC: {auc_lr:.6f}")

# Ensemble with current submission.csv predictions
prev_sub = pd.read_csv('submission.csv')
assert 'has_cactus' in prev_sub.columns and len(prev_sub)==len(sub_df)
ens_probs = 0.5 * prev_sub['has_cactus'].values.astype(float) + 0.5 * test_preds_lr.astype(float)
final_sub = sub_df.copy()
final_sub['has_cactus'] = ens_probs
final_sub.to_csv('submission.csv', index=False)
print('Saved LR-ensembled submission.csv. Shape:', final_sub.shape)
final_sub.head()

Loading flat pixel features for train/test...


Loaded 5000/14175 images...


Loaded 10000/14175 images...


Shapes: (14175, 3072) (3325, 3072)
[LR] Fold 1: train 11340 val 2835


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[LR] Fold 2: train 11340 val 2835


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[LR] Fold 3: train 11340 val 2835


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[LR] Fold 4: train 11340 val 2835


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[LR] Fold 5: train 11340 val 2835


[LR] OOF AUC: 0.950838
Saved LR-ensembled submission.csv. Shape: (3325, 2)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Unnamed: 0,id,has_cactus
0,09034a34de0e2015a8a28dfe18f423f6.jpg,0.692693
1,134f04305c795d6d202502c2ce3578f3.jpg,0.999976
2,41fad8d145e6c41868ce3617e30a2545.jpg,0.999677
3,35f8a11352c8d41b6231bb33d8d09f7e.jpg,0.972646
4,b77dc902b035887cbbc01920ce0e3151.jpg,0.922241


In [9]:
# Overwrite submission with ResNet18-only 8-way TTA (no LR, no SmallCNN)
import torch

def tta8_preds_res_only(models, x):
    outs = 0.0
    trans = []
    trans.append(x)
    trans.append(torch.flip(x, dims=[3]))
    trans.append(torch.flip(x, dims=[2]))
    trans.append(torch.rot90(x, k=1, dims=[2,3]))
    trans.append(torch.rot90(x, k=2, dims=[2,3]))
    trans.append(torch.rot90(x, k=3, dims=[2,3]))
    trans.append(torch.flip(torch.rot90(x, k=1, dims=[2,3]), dims=[3]))
    trans.append(torch.flip(torch.rot90(x, k=1, dims=[2,3]), dims=[2]))
    with torch.no_grad():
        for m in models:
            m.eval()
            logits_sum = 0.0
            for t in trans:
                logits_sum = logits_sum + m(t)
            outs = outs + logits_sum / len(trans)
    outs = outs / len(models)
    probs = torch.sigmoid(outs).squeeze(1)
    return probs

# DataLoader for ResNet transforms already defined as valid_tfms_res
test_ds_res8_only = CactusDataset(sub_df, TEST_DIR, transforms=valid_tfms_res)
test_loader_res8_only = DataLoader(test_ds_res8_only, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
probs_res8_only = []
with torch.no_grad():
    for xb, ids in test_loader_res8_only:
        xb = xb.to(device, non_blocking=True)
        probs_res8_only.append(tta8_preds_res_only(res_models, xb).cpu().numpy())
probs_res8_only = np.concatenate(probs_res8_only).astype(float)

sub_res_only = sub_df.copy()
sub_res_only['has_cactus'] = probs_res8_only
sub_res_only.to_csv('submission.csv', index=False)
print('Saved ResNet-only 8-way TTA submission.csv. Shape:', sub_res_only.shape)
sub_res_only.head()

Saved ResNet-only 8-way TTA submission.csv. Shape: (3325, 2)


Unnamed: 0,id,has_cactus
0,09034a34de0e2015a8a28dfe18f423f6.jpg,0.999999
1,134f04305c795d6d202502c2ce3578f3.jpg,0.999998
2,41fad8d145e6c41868ce3617e30a2545.jpg,0.999998
3,35f8a11352c8d41b6231bb33d8d09f7e.jpg,0.999999
4,b77dc902b035887cbbc01920ce0e3151.jpg,0.999998
