In [1]:
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import timm
import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def find_root():
    cur = os.path.abspath(os.curdir)
    while os.path.basename(cur) != "cifar-week3":
        parent = os.path.dirname(cur)
        if parent == cur:
            raise RuntimeError("Rename your main folder to 'cifar-week3'")
        cur = parent
    return cur

ROOT = find_root()
DATA_DIR = os.path.join(ROOT, "data")
ARTIFACTS_DIR = os.path.join(ROOT, "artifacts")
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

In [3]:
torch.manual_seed(42)
import numpy as np
import random
random.seed(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device → {device}")

Device → cuda


In [4]:
MEAN = [0.4914, 0.4822, 0.4465]
STD  = [0.2470, 0.2430, 0.2610]

train_tf = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])
test_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

full_train = datasets.CIFAR10(root=DATA_DIR, train=True,  download=True, transform=train_tf)
test_ds    = datasets.CIFAR10(root=DATA_DIR, train=False, download=True, transform=test_tf)
train_ds, val_ds = random_split(full_train, [45000, 5000], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False, num_workers=4, pin_memory=True)

In [5]:
experiments = [
    {"name": "resnet18-frozen",         "unfreeze_layers": []},
    {"name": "resnet18-partial-unfreeze", "unfreeze_layers": ["layer4"]},
    {"name": "resnet18-full-unfreeze",  "unfreeze_layers": ["layer1","layer2","layer3","layer4"]},
]

In [6]:
def run_resnet18(cfg):
    wandb.init(
        project="cifar10-week3",
        group="Day3-Transfer-Learning",
        name=cfg["name"],
        config={"model": "resnet18", "unfreeze": cfg["unfreeze_layers"], "epochs": 25}
    )

    model = timm.create_model("resnet18", pretrained=True, num_classes=10).to(device)

    # === FREEZE / UNFREEZE LOGIC ===
    # Freeze everything first
    for param in model.parameters():
        param.requires_grad = False

    # Always train classifier
    for param in model.get_classifier().parameters():
        param.requires_grad = True

    # Unfreeze requested layers
    for layer_name in cfg["unfreeze_layers"]:
        for param in getattr(model, layer_name).parameters():
            param.requires_grad = True

    # Print what is trainable
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total     = sum(p.numel() for p in model.parameters())
    print(f"Trainable params: {trainable:,} / {total:,} ({100*trainable/total:.1f}%)")

    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=3e-4, weight_decay=0.05)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=25)
    criterion = nn.CrossEntropyLoss()
    scaler = torch.cuda.amp.GradScaler()

    best_val = 0.0
    for epoch in range(1, 26):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                loss = criterion(model(x), y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        scheduler.step()

        # Val
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                correct += (model(x).argmax(1) == y).sum().item()
                total += y.size(0)
        val_acc = correct / total
        wandb.log({"val_accuracy": val_acc, "epoch": epoch})
        if val_acc > best_val: best_val = val_acc
        print(f"Epoch {epoch:02d} → Val: {val_acc:.4f} (best {best_val:.4f})")

    # Test
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            correct += (model(x).argmax(1) == y).sum().item()
            total += y.size(0)
    test_acc = correct / total
    wandb.log({"test_accuracy_final": test_acc, "best_val_accuracy": best_val})
    print(f"\n{cfg['name']} → TEST ACCURACY: {test_acc:.4f}\n")

    # Save + upload .pth
    path = os.path.join(ARTIFACTS_DIR, f"day3_{cfg['name']}.pth")
    torch.save(model.state_dict(), path)
    artifact = wandb.Artifact(f"day3-{cfg['name']}", type="model")
    artifact.add_file(path)
    wandb.log_artifact(artifact)
    print(f"Uploaded → {path}")

    wandb.finish()

In [7]:
for exp in experiments:
    run_resnet18(exp)

[34m[1mwandb[0m: Currently logged in as: [33musansrita[0m ([33musansrita-kathmandu-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  scaler = torch.cuda.amp.GradScaler()


Trainable params: 5,130 / 11,181,642 (0.0%)


  with torch.cuda.amp.autocast():


Epoch 01 → Val: 0.4148 (best 0.4148)
Epoch 02 → Val: 0.4554 (best 0.4554)
Epoch 03 → Val: 0.4678 (best 0.4678)
Epoch 04 → Val: 0.4786 (best 0.4786)
Epoch 05 → Val: 0.5054 (best 0.5054)
Epoch 06 → Val: 0.4968 (best 0.5054)
Epoch 07 → Val: 0.4996 (best 0.5054)
Epoch 08 → Val: 0.5018 (best 0.5054)
Epoch 09 → Val: 0.5124 (best 0.5124)
Epoch 10 → Val: 0.4990 (best 0.5124)
Epoch 11 → Val: 0.5210 (best 0.5210)
Epoch 12 → Val: 0.5114 (best 0.5210)
Epoch 13 → Val: 0.5092 (best 0.5210)
Epoch 14 → Val: 0.5100 (best 0.5210)
Epoch 15 → Val: 0.5036 (best 0.5210)
Epoch 16 → Val: 0.5106 (best 0.5210)
Epoch 17 → Val: 0.5308 (best 0.5308)
Epoch 18 → Val: 0.5060 (best 0.5308)
Epoch 19 → Val: 0.5064 (best 0.5308)
Epoch 20 → Val: 0.5186 (best 0.5308)
Epoch 21 → Val: 0.5232 (best 0.5308)
Epoch 22 → Val: 0.5078 (best 0.5308)
Epoch 23 → Val: 0.5196 (best 0.5308)
Epoch 24 → Val: 0.5162 (best 0.5308)
Epoch 25 → Val: 0.5190 (best 0.5308)

resnet18-frozen → TEST ACCURACY: 0.7193

Uploaded → c:\cifar-week3\artifac

0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
test_accuracy_final,▁
val_accuracy,▁▃▄▅▆▆▆▆▇▆▇▇▇▇▆▇█▇▇▇█▇▇▇▇

0,1
best_val_accuracy,0.5308
epoch,25.0
test_accuracy_final,0.7193
val_accuracy,0.519


Trainable params: 8,398,858 / 11,181,642 (75.1%)


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 01 → Val: 0.5988 (best 0.5988)
Epoch 02 → Val: 0.6368 (best 0.6368)
Epoch 03 → Val: 0.6558 (best 0.6558)
Epoch 04 → Val: 0.6726 (best 0.6726)
Epoch 05 → Val: 0.6792 (best 0.6792)
Epoch 06 → Val: 0.6878 (best 0.6878)
Epoch 07 → Val: 0.6826 (best 0.6878)
Epoch 08 → Val: 0.6928 (best 0.6928)
Epoch 09 → Val: 0.6972 (best 0.6972)
Epoch 10 → Val: 0.6972 (best 0.6972)
Epoch 11 → Val: 0.7090 (best 0.7090)
Epoch 12 → Val: 0.7086 (best 0.7090)
Epoch 13 → Val: 0.7108 (best 0.7108)
Epoch 14 → Val: 0.7116 (best 0.7116)
Epoch 15 → Val: 0.7150 (best 0.7150)
Epoch 16 → Val: 0.7252 (best 0.7252)
Epoch 17 → Val: 0.7180 (best 0.7252)
Epoch 18 → Val: 0.7192 (best 0.7252)
Epoch 19 → Val: 0.7192 (best 0.7252)
Epoch 20 → Val: 0.7240 (best 0.7252)
Epoch 21 → Val: 0.7158 (best 0.7252)
Epoch 22 → Val: 0.7190 (best 0.7252)
Epoch 23 → Val: 0.7122 (best 0.7252)
Epoch 24 → Val: 0.7142 (best 0.7252)
Epoch 25 → Val: 0.7190 (best 0.7252)

resnet18-partial-unfreeze → TEST ACCURACY: 0.9052

Uploaded → c:\cifar-wee

0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
test_accuracy_final,▁
val_accuracy,▁▃▄▅▅▆▆▆▆▆▇▇▇▇▇█████▇█▇▇█

0,1
best_val_accuracy,0.7252
epoch,25.0
test_accuracy_final,0.9052
val_accuracy,0.719


Trainable params: 11,172,106 / 11,181,642 (99.9%)


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 01 → Val: 0.7160 (best 0.7160)
Epoch 02 → Val: 0.7526 (best 0.7526)
Epoch 03 → Val: 0.7676 (best 0.7676)
Epoch 04 → Val: 0.7872 (best 0.7872)
Epoch 05 → Val: 0.7940 (best 0.7940)
Epoch 06 → Val: 0.8060 (best 0.8060)
Epoch 07 → Val: 0.7962 (best 0.8060)
Epoch 08 → Val: 0.8000 (best 0.8060)
Epoch 09 → Val: 0.8148 (best 0.8148)
Epoch 10 → Val: 0.8104 (best 0.8148)
Epoch 11 → Val: 0.8220 (best 0.8220)
Epoch 12 → Val: 0.8292 (best 0.8292)
Epoch 13 → Val: 0.8274 (best 0.8292)
Epoch 14 → Val: 0.8252 (best 0.8292)
Epoch 15 → Val: 0.8250 (best 0.8292)
Epoch 16 → Val: 0.8252 (best 0.8292)
Epoch 17 → Val: 0.8340 (best 0.8340)
Epoch 18 → Val: 0.8408 (best 0.8408)
Epoch 19 → Val: 0.8344 (best 0.8408)
Epoch 20 → Val: 0.8294 (best 0.8408)
Epoch 21 → Val: 0.8392 (best 0.8408)
Epoch 22 → Val: 0.8302 (best 0.8408)
Epoch 23 → Val: 0.8440 (best 0.8440)
Epoch 24 → Val: 0.8406 (best 0.8440)
Epoch 25 → Val: 0.8338 (best 0.8440)

resnet18-full-unfreeze → TEST ACCURACY: 0.9542

Uploaded → c:\cifar-week3\

0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
test_accuracy_final,▁
val_accuracy,▁▃▄▅▅▆▅▆▆▆▇▇▇▇▇▇▇█▇▇█▇██▇

0,1
best_val_accuracy,0.844
epoch,25.0
test_accuracy_final,0.9542
val_accuracy,0.8338
