In [9]:
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import RandAugment
from torch.utils.data import DataLoader, random_split
import timm
import wandb
import numpy as np
import random
import hashlib
import pandas as pd

In [10]:
def find_project_root():
    current = os.path.abspath(os.curdir)
    while os.path.basename(current) != "cifar-week3":
        parent = os.path.dirname(current)
        if parent == current:  # reached filesystem root
            raise RuntimeError("Could not find 'cifar-week3' folder! Make sure your project is named exactly 'cifar-week3'")
        current = parent
    return current

ROOT_DIR = find_project_root()
DATA_DIR = os.path.join(ROOT_DIR, "data")
ARTIFACTS_DIR = os.path.join(ROOT_DIR, "artifacts")

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

print(f"Project root found: {ROOT_DIR}")
print(f"Data folder: {DATA_DIR}")
print(f"Models saved to: {ARTIFACTS_DIR}")

Project root found: c:\cifar-week3
Data folder: c:\cifar-week3\data
Models saved to: c:\cifar-week3\artifacts


In [11]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device → {device}")

Device → cuda


In [12]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33musansrita[0m ([33musansrita-kathmandu-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [13]:
def md5(path):
    if not os.path.exists(path): return "MISSING"
    h = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""): h.update(chunk)
    return h.hexdigest()

print("MD5 checksums:")
for name in ["data_batch_1", "test_batch"]:
    p = os.path.join(DATA_DIR, "cifar-10-batches-py", name)
    print(f"  {p}: {md5(p)}")

MD5 checksums:
  c:\cifar-week3\data\cifar-10-batches-py\data_batch_1: c99cafc152244af753f735de768cd75f
  c:\cifar-week3\data\cifar-10-batches-py\test_batch: 40351d587109b95175f43aff81a1287e


In [14]:
MEAN = [0.4914, 0.4822, 0.4465]
STD  = [0.2470, 0.2430, 0.2610]

def get_loaders(aug_name):
    if aug_name == "baseline":
        train_tf = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(MEAN, STD),
        ])
    elif aug_name == "randaug":
        train_tf = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(p=0.5),
            RandAugment(num_ops=2, magnitude=9),
            transforms.ToTensor(),
            transforms.Normalize(MEAN, STD),
        ])
    else:
        raise ValueError(aug_name)

    test_tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize(MEAN, STD)])

    full_train = datasets.CIFAR10(root=DATA_DIR, train=True, download=True, transform=train_tf)
    test_ds    = datasets.CIFAR10(root=DATA_DIR, train=False, download=True, transform=test_tf)

    train_ds, val_ds = random_split(full_train, [45000, 5000], generator=torch.Generator().manual_seed(42))

    return (
        DataLoader(train_ds, batch_size=128, shuffle=True,  num_workers=4, pin_memory=True),
        DataLoader(val_ds,   batch_size=256, shuffle=False, num_workers=4, pin_memory=True),
        DataLoader(test_ds,  batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
    )

    # Pandera (quick check)
    try:
        import pandera.pandas as pa
        labels = [int(train_ds[i][1]) for i in range(500)]
        pa.DataFrameSchema({"label": pa.Column(int, pa.Check(lambda s: s.between(0,9).all()))})(pd.DataFrame({"label": labels}))
        print("Pandera validation PASSED")
    except Exception as e:
        print("Pandera skipped or failed (still ok):", e)

In [15]:
def mixup_cutmix(x, y, alpha=1.0, use_cutmix=False):
    if alpha <= 0: return x, y, y, 1.0
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(x.size(0), device=x.device)
    if use_cutmix and np.random.rand() < 0.5:
        _, _, H, W = x.shape
        r = np.sqrt(1 - lam)
        cut_w = cut_h = int(32 * r)
        cx = cy = np.random.randint(32)
        bbx1 = np.clip(cx - cut_w//2, 0, 32)
        bby1 = np.clip(cy - cut_h//2, 0, 32)
        bbx2 = np.clip(cx + cut_w//2, 0, 32)
        bby2 = np.clip(cy + cut_h//2, 0, 32)
        x[:, :, bbx1:bbx2, bby1:bby2] = x[idx, :, bbx1:bbx2, bby1:bby2]
        lam = 1 - ((bbx2-bbx1)*(bby2-bby1)/(32*32))
        return x, y, y[idx], lam
    else:
        x = lam * x + (1-lam) * x[idx]
        return x, y, y[idx], lam

In [None]:
experiments = [
    {"name": "baseline",        "aug": "baseline", "alpha": 0.0, "cutmix": False},
    {"name": "randaug",         "aug": "randaug",  "alpha": 0.0, "cutmix": False},
    {"name": "mixup",           "aug": "baseline", "alpha": 1.0, "cutmix": False},
    {"name": "cutmix",          "aug": "baseline", "alpha": 1.0, "cutmix": True },
    {"name": "randaug+cutmix",  "aug": "randaug",  "alpha": 1.0, "cutmix": True },
]

for cfg in experiments:
    print("\n" + "="*90)
    print(f"RUNNING → {cfg['name']}")
    print("="*90)

    wandb.init(
        project="cifar10-week3",
        group="Day1-Augmentation-Ablation",
        name=cfg["name"],
        config={**cfg, "epochs": 20, "model": "resnet18", "lr": 0.1}
    )

    train_loader, val_loader, test_loader = get_loaders(cfg["aug"])

    model = timm.create_model("resnet18", pretrained=False, num_classes=10).to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    criterion = nn.CrossEntropyLoss()

    best_val = 0.0

    for epoch in range(1, 21):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            if cfg["alpha"] > 0:
                x_mix, ya, yb, lam = mixup_cutmix(x, y, cfg["alpha"], cfg["cutmix"])
                loss = lam * criterion(model(x_mix), ya) + (1-lam) * criterion(model(x_mix), yb)
            else:
                loss = criterion(model(x), y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        scheduler.step()

        # Validation
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                correct += (model(x).argmax(1) == y).sum().item()
                total += y.size(0)
        val_acc = correct / total
        wandb.log({"val_accuracy": val_acc, "epoch": epoch, "lr": scheduler.get_last_lr()[0]})
        if val_acc > best_val: best_val = val_acc
        print(f"Epoch {epoch:02d} → Val: {val_acc:.4f} (Best: {best_val:.4f})")

    # Final Test
    correct = total = 0
    with torch.no_grad():
        for x, y in:
            x, y = x.to(device), y.to(device)
            correct += (model(x).argmax(1) == y).sum().item()
            total += y.size(0)
    test_acc = correct / total
    wandb.log({"test_accuracy_final": test_acc, "best_val_accuracy": best_val})

    # Save model
    model_path = os.path.join(ARTIFACTS_DIR, f"{cfg['name']}_final.pth")
    torch.save(model.state_dict(), model_path)

    # FIXED: Replace '+' with '-' or '_' in artifact name
    safe_name = cfg["name"].replace("+", "-")  # ← THIS IS THE FIX
    artifact = wandb.Artifact(f"day1-{safe_name}-model", type="model")
    artifact.add_file(model_path)
    wandb.log_artifact(artifact)

    print(f"FINAL → Test: {test_acc:.4f} | Model saved & uploaded as 'day1-{safe_name}-model'")
    wandb.finish()

print("\nDAY 1 100% COMPLETE — ALL 5 MODELS UPLOADED SUCCESSFULLY!")
print("Check W&B → Group: Day1-Augmentation-Ablation → Artifacts tab")


RUNNING → baseline


Epoch 01 → Val: 0.4624 (Best: 0.4624)
Epoch 02 → Val: 0.5320 (Best: 0.5320)
Epoch 03 → Val: 0.6032 (Best: 0.6032)
Epoch 04 → Val: 0.6346 (Best: 0.6346)
Epoch 05 → Val: 0.6274 (Best: 0.6346)
Epoch 06 → Val: 0.6424 (Best: 0.6424)
Epoch 07 → Val: 0.6908 (Best: 0.6908)
Epoch 08 → Val: 0.7090 (Best: 0.7090)
Epoch 09 → Val: 0.7152 (Best: 0.7152)
Epoch 10 → Val: 0.7222 (Best: 0.7222)
Epoch 11 → Val: 0.7246 (Best: 0.7246)
Epoch 12 → Val: 0.7576 (Best: 0.7576)
Epoch 13 → Val: 0.7598 (Best: 0.7598)
Epoch 14 → Val: 0.7710 (Best: 0.7710)
Epoch 15 → Val: 0.7846 (Best: 0.7846)
Epoch 16 → Val: 0.8002 (Best: 0.8002)
Epoch 17 → Val: 0.8106 (Best: 0.8106)
Epoch 18 → Val: 0.8182 (Best: 0.8182)
Epoch 19 → Val: 0.8302 (Best: 0.8302)
Epoch 20 → Val: 0.8292 (Best: 0.8302)
FINAL → Test: 0.8373 | Model saved & uploaded as 'day1-baseline-model'


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
lr,███▇▇▇▆▆▅▅▄▃▃▂▂▂▁▁▁▁
test_accuracy_final,▁
val_accuracy,▁▂▄▄▄▄▅▆▆▆▆▇▇▇▇▇████

0,1
best_val_accuracy,0.8302
epoch,20.0
lr,0.0
test_accuracy_final,0.8373
val_accuracy,0.8292



RUNNING → randaug


Epoch 01 → Val: 0.3640 (Best: 0.3640)
Epoch 02 → Val: 0.4028 (Best: 0.4028)
Epoch 03 → Val: 0.4894 (Best: 0.4894)
Epoch 04 → Val: 0.5394 (Best: 0.5394)
Epoch 05 → Val: 0.5702 (Best: 0.5702)
Epoch 06 → Val: 0.5710 (Best: 0.5710)
Epoch 07 → Val: 0.5900 (Best: 0.5900)
Epoch 08 → Val: 0.6032 (Best: 0.6032)
Epoch 09 → Val: 0.6396 (Best: 0.6396)
Epoch 10 → Val: 0.6530 (Best: 0.6530)
Epoch 11 → Val: 0.6598 (Best: 0.6598)
Epoch 12 → Val: 0.6956 (Best: 0.6956)
Epoch 13 → Val: 0.6994 (Best: 0.6994)
Epoch 14 → Val: 0.7044 (Best: 0.7044)
Epoch 15 → Val: 0.7324 (Best: 0.7324)
Epoch 16 → Val: 0.7468 (Best: 0.7468)
Epoch 17 → Val: 0.7636 (Best: 0.7636)
Epoch 18 → Val: 0.7786 (Best: 0.7786)
Epoch 19 → Val: 0.7750 (Best: 0.7786)
Epoch 20 → Val: 0.7818 (Best: 0.7818)
FINAL → Test: 0.8316 | Model saved & uploaded as 'day1-randaug-model'


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
lr,███▇▇▇▆▆▅▅▄▃▃▂▂▂▁▁▁▁
test_accuracy_final,▁
val_accuracy,▁▂▃▄▄▄▅▅▆▆▆▇▇▇▇▇████

0,1
best_val_accuracy,0.7818
epoch,20.0
lr,0.0
test_accuracy_final,0.8316
val_accuracy,0.7818



RUNNING → mixup


Epoch 01 → Val: 0.3672 (Best: 0.3672)
Epoch 02 → Val: 0.4896 (Best: 0.4896)
Epoch 03 → Val: 0.5630 (Best: 0.5630)
Epoch 04 → Val: 0.5964 (Best: 0.5964)
Epoch 05 → Val: 0.5740 (Best: 0.5964)
Epoch 06 → Val: 0.6222 (Best: 0.6222)
Epoch 07 → Val: 0.6542 (Best: 0.6542)
Epoch 08 → Val: 0.6746 (Best: 0.6746)
Epoch 09 → Val: 0.6768 (Best: 0.6768)
Epoch 10 → Val: 0.6836 (Best: 0.6836)
Epoch 11 → Val: 0.7028 (Best: 0.7028)
Epoch 12 → Val: 0.7096 (Best: 0.7096)
Epoch 13 → Val: 0.7390 (Best: 0.7390)
Epoch 14 → Val: 0.7500 (Best: 0.7500)
Epoch 15 → Val: 0.7684 (Best: 0.7684)
Epoch 16 → Val: 0.7690 (Best: 0.7690)
Epoch 17 → Val: 0.7830 (Best: 0.7830)
Epoch 18 → Val: 0.7930 (Best: 0.7930)
Epoch 19 → Val: 0.8000 (Best: 0.8000)
Epoch 20 → Val: 0.8046 (Best: 0.8046)
FINAL → Test: 0.8090 | Model saved & uploaded as 'day1-mixup-model'


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
lr,███▇▇▇▆▆▅▅▄▃▃▂▂▂▁▁▁▁
test_accuracy_final,▁
val_accuracy,▁▃▄▅▄▅▆▆▆▆▆▆▇▇▇▇████

0,1
best_val_accuracy,0.8046
epoch,20.0
lr,0.0
test_accuracy_final,0.809
val_accuracy,0.8046



RUNNING → cutmix


Epoch 01 → Val: 0.4040 (Best: 0.4040)
Epoch 02 → Val: 0.4714 (Best: 0.4714)
Epoch 03 → Val: 0.4956 (Best: 0.4956)
Epoch 04 → Val: 0.5556 (Best: 0.5556)
Epoch 05 → Val: 0.6174 (Best: 0.6174)
Epoch 06 → Val: 0.6240 (Best: 0.6240)
Epoch 07 → Val: 0.6304 (Best: 0.6304)
Epoch 08 → Val: 0.6458 (Best: 0.6458)
Epoch 09 → Val: 0.6742 (Best: 0.6742)
Epoch 10 → Val: 0.6660 (Best: 0.6742)
Epoch 11 → Val: 0.6968 (Best: 0.6968)
Epoch 12 → Val: 0.7046 (Best: 0.7046)
Epoch 13 → Val: 0.7204 (Best: 0.7204)
Epoch 14 → Val: 0.7474 (Best: 0.7474)
Epoch 15 → Val: 0.7468 (Best: 0.7474)
Epoch 16 → Val: 0.7654 (Best: 0.7654)
Epoch 17 → Val: 0.7838 (Best: 0.7838)
Epoch 18 → Val: 0.7964 (Best: 0.7964)
Epoch 19 → Val: 0.8026 (Best: 0.8026)
Epoch 20 → Val: 0.8032 (Best: 0.8032)
FINAL → Test: 0.8120 | Model saved & uploaded as 'day1-cutmix-model'


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
lr,███▇▇▇▆▆▅▅▄▃▃▂▂▂▁▁▁▁
test_accuracy_final,▁
val_accuracy,▁▂▃▄▅▅▅▅▆▆▆▆▇▇▇▇████

0,1
best_val_accuracy,0.8032
epoch,20.0
lr,0.0
test_accuracy_final,0.812
val_accuracy,0.8032



RUNNING → randaug+cutmix


Epoch 01 → Val: 0.3364 (Best: 0.3364)
Epoch 02 → Val: 0.3998 (Best: 0.3998)
Epoch 03 → Val: 0.4314 (Best: 0.4314)
Epoch 04 → Val: 0.4860 (Best: 0.4860)
Epoch 05 → Val: 0.5302 (Best: 0.5302)
Epoch 06 → Val: 0.5382 (Best: 0.5382)
Epoch 07 → Val: 0.5544 (Best: 0.5544)
Epoch 08 → Val: 0.5828 (Best: 0.5828)
Epoch 09 → Val: 0.6014 (Best: 0.6014)
Epoch 10 → Val: 0.5956 (Best: 0.6014)
Epoch 11 → Val: 0.6184 (Best: 0.6184)
Epoch 12 → Val: 0.6560 (Best: 0.6560)
Epoch 13 → Val: 0.6508 (Best: 0.6560)
Epoch 14 → Val: 0.6802 (Best: 0.6802)
Epoch 15 → Val: 0.6908 (Best: 0.6908)
Epoch 16 → Val: 0.7010 (Best: 0.7010)
Epoch 17 → Val: 0.7204 (Best: 0.7204)
Epoch 18 → Val: 0.7286 (Best: 0.7286)
Epoch 19 → Val: 0.7402 (Best: 0.7402)
Epoch 20 → Val: 0.7446 (Best: 0.7446)
FINAL → Test: 0.7974 | Model saved & uploaded as 'day1-randaug-cutmix-model'


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
lr,███▇▇▇▆▆▅▅▄▃▃▂▂▂▁▁▁▁
test_accuracy_final,▁
val_accuracy,▁▂▃▄▄▄▅▅▆▅▆▆▆▇▇▇████

0,1
best_val_accuracy,0.7446
epoch,20.0
lr,0.0
test_accuracy_final,0.7974
val_accuracy,0.7446



DAY 1 100% COMPLETE — ALL 5 MODELS UPLOADED SUCCESSFULLY!
Check W&B → Group: Day1-Augmentation-Ablation → Artifacts tab
