In [2]:
# BOOTSTRAP: rebuild project4 structure + files (because /content resets)

import os
from pathlib import Path

ROOT = Path("/content/project4")
SRC = ROOT / "src"
CONFIGS = ROOT / "configs"
OUT = ROOT / "outputs"
DATA = ROOT / "data"

# folders
for p in [
    SRC, CONFIGS, DATA,
    OUT / "checkpoints", OUT / "metrics", OUT / "plots"
]:
    p.mkdir(parents=True, exist_ok=True)

# src/__init__.py
(SRC / "__init__.py").write_text("# package\n")

# src/utils.py
(SRC / "utils.py").write_text(r"""
import os
import random
from datetime import datetime

import numpy as np
import torch


def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


def get_run_name(cfg) -> str:
    if hasattr(cfg, "run") and hasattr(cfg.run, "name") and cfg.run.name:
        return str(cfg.run.name)
    return datetime.now().strftime("run_%Y%m%d_%H%M%S")


def get_outputs_root(cfg) -> str:
    if hasattr(cfg, "outputs") and hasattr(cfg.outputs, "root") and cfg.outputs.root:
        return str(cfg.outputs.root)
    return "/content/project4/outputs"


def ensure_run_dirs(cfg):
    run_name = get_run_name(cfg)
    out_root = get_outputs_root(cfg)

    run_dir = os.path.join(out_root, run_name)
    os.makedirs(os.path.join(run_dir, "checkpoints"), exist_ok=True)
    os.makedirs(os.path.join(run_dir, "metrics"), exist_ok=True)
    os.makedirs(os.path.join(run_dir, "plots"), exist_ok=True)

    return run_dir
""".lstrip())

# src/data_pipeline.py  (CIFAR10 + CIFAR100)
(SRC / "data_pipeline.py").write_text(r"""
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split


class CIFARPipeline:
    def __init__(self, cfg, val_split=0.1):
        self.cfg = cfg
        self.batch_size = cfg.train.batch_size
        self.num_workers = cfg.data.num_workers
        self.download = cfg.data.download
        self.val_split = val_split
        self.mean = cfg.data.mean
        self.std = cfg.data.std
        self.data_root = getattr(cfg.data, "data_root", "./data")

        dataset = cfg.data.dataset.lower().strip()
        if dataset == "cifar10":
            self.dataset_cls = torchvision.datasets.CIFAR10
        elif dataset == "cifar100":
            self.dataset_cls = torchvision.datasets.CIFAR100
        else:
            raise ValueError(f"Unsupported dataset: {cfg.data.dataset}")

        self.train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std),
        ])

        self.eval_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std),
        ])

    def create_loaders(self):
        full_train = self.dataset_cls(
            root=self.data_root,
            train=True,
            download=self.download,
            transform=self.train_transform,
        )

        test_ds = self.dataset_cls(
            root=self.data_root,
            train=False,
            download=self.download,
            transform=self.eval_transform,
        )

        val_size = int(len(full_train) * self.val_split)
        train_size = len(full_train) - val_size
        train_ds, val_ds = random_split(full_train, [train_size, val_size])

        val_ds.dataset.transform = self.eval_transform

        train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
        val_loader = DataLoader(val_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)

        return train_loader, val_loader, test_loader
""".lstrip())

# src/classifier.py  (ResNet18/50 from scratch)
(SRC / "classifier.py").write_text(r"""
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, resnet50
from pathlib import Path


class Classifier:
    def __init__(self, cfg):
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.num_classes = cfg.model.num_classes
        self.learning_rate = cfg.train.learning_rate

        self.model = self._build_model(cfg.model.model_name, self.num_classes).to(self.device)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = self._build_optimizer(cfg.train.optimizer, cfg.train.weight_decay, cfg.train.momentum)

        self.train_losses, self.val_losses = [], []
        self.train_accs, self.val_accs = [], []

    def _build_model(self, model_name, num_classes):
        if model_name.lower() == "resnet18":
            model = resnet18(weights=None)
        elif model_name.lower() == "resnet50":
            model = resnet50(weights=None)
        else:
            raise ValueError(f"Unknown model: {model_name}")

        model.fc = nn.Linear(model.fc.in_features, num_classes)
        return model

    def _build_optimizer(self, optimizer_name, weight_decay, momentum):
        if optimizer_name.lower() == "adam":
            return optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=weight_decay)
        if optimizer_name.lower() == "sgd":
            return optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=momentum, weight_decay=weight_decay)
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in train_loader:
            x, y = x.to(self.device), y.to(self.device)

            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    @torch.no_grad()
    def evaluate(self, loader):
        self.model.eval()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in loader:
            x, y = x.to(self.device), y.to(self.device)
            out = self.model(x)
            loss = self.criterion(out, y)

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    def train(self, train_loader, val_loader, num_epochs):
        for epoch in range(num_epochs):
            tr_loss, tr_acc = self.train_epoch(train_loader)
            va_loss, va_acc = self.evaluate(val_loader)

            self.train_losses.append(tr_loss)
            self.val_losses.append(va_loss)
            self.train_accs.append(tr_acc)
            self.val_accs.append(va_acc)

            print(f"Epoch [{epoch+1}/{num_epochs}] "
                  f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.2f}% | "
                  f"Val Loss: {va_loss:.4f} Acc: {va_acc:.2f}%")

    def save_model(self, path):
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save({
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "train_losses": self.train_losses,
            "val_losses": self.val_losses,
            "train_accs": self.train_accs,
            "val_accs": self.val_accs,
        }, path)
""".lstrip())

# configs
(CONFIGS / "cifar10_scratch.yaml").write_text(r"""
run:
  name: cifar10_scratch
  seed: 42

model:
  model_name: ResNet18
  num_classes: 10

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 10
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs
""".lstrip())

(CONFIGS / "cifar100_pretrain.yaml").write_text(r"""
run:
  name: cifar100_pretrain
  seed: 42

model:
  model_name: ResNet18
  num_classes: 100

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 10
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar100
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs
""".lstrip())

print("✅ Rebuilt /content/project4")
!find /content/project4 -maxdepth 2 -type d
!ls -la /content/project4/configs
!ls -la /content/project4/src


✅ Rebuilt /content/project4
/content/project4
/content/project4/data
/content/project4/configs
/content/project4/outputs
/content/project4/outputs/plots
/content/project4/outputs/checkpoints
/content/project4/outputs/metrics
/content/project4/src
total 16
drwxr-xr-x 2 root root 4096 Jan  9 19:45 .
drwxr-xr-x 6 root root 4096 Jan  9 19:45 ..
-rw-r--r-- 1 root root  422 Jan  9 19:45 cifar100_pretrain.yaml
-rw-r--r-- 1 root root  418 Jan  9 19:45 cifar10_scratch.yaml
total 24
drwxr-xr-x 2 root root 4096 Jan  9 19:45 .
drwxr-xr-x 6 root root 4096 Jan  9 19:45 ..
-rw-r--r-- 1 root root 3752 Jan  9 19:45 classifier.py
-rw-r--r-- 1 root root 2282 Jan  9 19:45 data_pipeline.py
-rw-r--r-- 1 root root   10 Jan  9 19:45 __init__.py
-rw-r--r-- 1 root root 1020 Jan  9 19:45 utils.py


In [3]:
import importlib.util
print("omegaconf installed?", importlib.util.find_spec("omegaconf") is not None)


omegaconf installed? True


In [4]:
%cd /content/project4

import torch
from omegaconf import OmegaConf

from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

print("cuda available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("gpu:", torch.cuda.get_device_name(0))

cfg = OmegaConf.load("configs/cifar10_scratch.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

# Data
pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

# Model
clf = Classifier(cfg)
print("device used by model:", clf.device)

# 1 epoch sanity run
clf.train(train_loader, val_loader, num_epochs=1)

# Test
test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

# Save checkpoint
ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}_sanity_epoch1.pt"
clf.save_model(ckpt_path)
print("✅ saved checkpoint to:", ckpt_path)
print("✅ sanity test acc:", test_acc)


/content/project4
cuda available: True
gpu: Tesla T4


100%|██████████| 170M/170M [00:04<00:00, 40.6MB/s]


device used by model: cuda
Epoch [1/1] Train Loss: 1.3781 Acc: 50.51% | Val Loss: 1.2394 Acc: 56.78%
Test Loss: 1.2370 Acc: 56.56%
✅ saved checkpoint to: /content/project4/outputs/cifar10_scratch/checkpoints/cifar10_scratch_sanity_epoch1.pt
✅ sanity test acc: 56.56


#PART 1

Custom CNN architectures (Part 1)
We implement two CNN models for CIFAR images: SmallCNN (baseline) and DeepCNN (stronger).
Both support toggles for BatchNorm (use_batchnorm) and Dropout (dropout) so we can run controlled experiments later.

In [6]:
%%writefile /content/project4/src/models.py
import torch
import torch.nn as nn


class SmallCNN(nn.Module):
    """
    A simple CNN baseline for CIFAR-10/100 (32x32 RGB).
    Supports BatchNorm + Dropout toggles for ablation studies.
    """
    def __init__(self, num_classes: int, use_batchnorm: bool = False, dropout: float = 0.0):
        super().__init__()
        self.use_batchnorm = use_batchnorm
        self.dropout_p = float(dropout)

        def conv_block(in_ch, out_ch):
            layers = [
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=not use_batchnorm),
            ]
            if use_batchnorm:
                layers.append(nn.BatchNorm2d(out_ch))
            layers.append(nn.ReLU(inplace=True))
            return nn.Sequential(*layers)

        self.features = nn.Sequential(
            conv_block(3, 32),
            conv_block(32, 64),
            nn.MaxPool2d(2),   # 32 -> 16

            conv_block(64, 128),
            nn.MaxPool2d(2),   # 16 -> 8
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=self.dropout_p),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


class DeepCNN(nn.Module):
    """
    A deeper CNN (more capacity than SmallCNN).
    Supports BatchNorm + Dropout toggles for ablation studies.
    """
    def __init__(self, num_classes: int, use_batchnorm: bool = False, dropout: float = 0.0):
        super().__init__()
        self.use_batchnorm = use_batchnorm
        self.dropout_p = float(dropout)

        def conv_block(in_ch, out_ch):
            layers = [
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=not use_batchnorm),
            ]
            if use_batchnorm:
                layers.append(nn.BatchNorm2d(out_ch))
            layers.append(nn.ReLU(inplace=True))
            return nn.Sequential(*layers)

        self.features = nn.Sequential(
            # 32x32
            conv_block(3, 64),
            conv_block(64, 64),
            nn.MaxPool2d(2),   # 32 -> 16
            nn.Dropout(p=self.dropout_p),

            # 16x16
            conv_block(64, 128),
            conv_block(128, 128),
            nn.MaxPool2d(2),   # 16 -> 8
            nn.Dropout(p=self.dropout_p),

            # 8x8
            conv_block(128, 256),
            conv_block(256, 256),
            nn.MaxPool2d(2),   # 8 -> 4
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=self.dropout_p),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


Writing /content/project4/src/models.py


Unify model creation in Classifier
We update the training wrapper so model.model_name can be one of:
SmallCNN, DeepCNN, ResNet18, ResNet50.
For the custom CNNs we read optional config fields like model.use_batchnorm and model.dropout.

In [7]:
%%writefile /content/project4/src/classifier.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, resnet50
from pathlib import Path

from src.models import SmallCNN, DeepCNN


class Classifier:
    def __init__(self, cfg):
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.num_classes = cfg.model.num_classes
        self.learning_rate = cfg.train.learning_rate

        self.model = self._build_model(cfg).to(self.device)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = self._build_optimizer(cfg.train.optimizer, cfg.train.weight_decay, cfg.train.momentum)

        self.train_losses, self.val_losses = [], []
        self.train_accs, self.val_accs = [], []

    def _build_model(self, cfg):
        name = cfg.model.model_name.lower().strip()

        # Optional fields (only used by custom CNNs)
        use_bn = bool(getattr(cfg.model, "use_batchnorm", False))
        dropout = float(getattr(cfg.model, "dropout", 0.0))

        if name == "smallcnn":
            return SmallCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "deepcnn":
            return DeepCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "resnet18":
            model = resnet18(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        if name == "resnet50":
            model = resnet50(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        raise ValueError(f"Unknown model_name: {cfg.model.model_name}")

    def _build_optimizer(self, optimizer_name, weight_decay, momentum):
        if optimizer_name.lower() == "adam":
            return optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=weight_decay)
        if optimizer_name.lower() == "sgd":
            return optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=momentum, weight_decay=weight_decay)
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in train_loader:
            x, y = x.to(self.device), y.to(self.device)

            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    @torch.no_grad()
    def evaluate(self, loader):
        self.model.eval()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in loader:
            x, y = x.to(self.device), y.to(self.device)
            out = self.model(x)
            loss = self.criterion(out, y)

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    def train(self, train_loader, val_loader, num_epochs):
        for epoch in range(num_epochs):
            tr_loss, tr_acc = self.train_epoch(train_loader)
            va_loss, va_acc = self.evaluate(val_loader)

            self.train_losses.append(tr_loss)
            self.val_losses.append(va_loss)
            self.train_accs.append(tr_acc)
            self.val_accs.append(va_acc)

            print(
                f"Epoch [{epoch+1}/{num_epochs}] "
                f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.2f}% | "
                f"Val Loss: {va_loss:.4f} Acc: {va_acc:.2f}%"
            )

    def save_model(self, path):
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save({
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "train_losses": self.train_losses,
            "val_losses": self.val_losses,
            "train_accs": self.train_accs,
            "val_accs": self.val_accs,
        }, path)
        print(f"Model saved to {path}")


Overwriting /content/project4/src/classifier.py


Smoke test custom models
We load the config, switch model_name to our custom CNNs, and verify the forward pass output shape is [batch_size, num_classes].

In [8]:
%cd /content/project4
import importlib
from omegaconf import OmegaConf
import torch

# reload in case Colab cached old modules
import src.models, src.classifier
importlib.reload(src.models)
importlib.reload(src.classifier)

from src.classifier import Classifier
from src.data_pipeline import CIFARPipeline

cfg = OmegaConf.load("configs/cifar10_scratch.yaml")

pipe = CIFARPipeline(cfg)
train_loader, _, _ = pipe.create_loaders()
x, y = next(iter(train_loader))

for model_name, use_bn, dropout in [
    ("SmallCNN", False, 0.0),
    ("SmallCNN", True, 0.0),
    ("DeepCNN", True, 0.3),
]:
    cfg.model.model_name = model_name
    cfg.model.use_batchnorm = use_bn
    cfg.model.dropout = dropout

    clf = Classifier(cfg)
    clf.model.eval()
    with torch.no_grad():
        out = clf.model(x.to(clf.device))
    print(model_name, "BN=", use_bn, "dropout=", dropout, "-> out shape:", tuple(out.shape))


/content/project4
SmallCNN BN= False dropout= 0.0 -> out shape: (128, 10)
SmallCNN BN= True dropout= 0.0 -> out shape: (128, 10)
DeepCNN BN= True dropout= 0.3 -> out shape: (128, 10)


Fast config for quick iteration
We create a lightweight config to run quick experiments (few epochs) so we can validate code and compare models without long runtimes.
Later we will create “final” configs with more epochs for the report.

In [9]:
%%writefile /content/project4/configs/cifar10_fast_smallcnn.yaml
run:
  name: cifar10_fast_smallcnn
  seed: 42

model:
  model_name: SmallCNN
  num_classes: 10
  use_batchnorm: false
  dropout: 0.0

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3          # FAST: small number for quick checks
  optimizer: adam
  weight_decay: 0.0      # start with no regularization in baseline
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_smallcnn.yaml


Train baseline SmallCNN (no BN, no dropout)
We train SmallCNN from scratch on CIFAR-10 for a few epochs to establish a baseline.
We save the checkpoint and record validation/test accuracy.

In [10]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_smallcnn.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "BN:", cfg.model.use_batchnorm, "dropout:", cfg.model.dropout)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: SmallCNN BN: False dropout: 0.0
Epoch [1/3] Train Loss: 1.3521 Acc: 51.31% | Val Loss: 1.0426 Acc: 62.62%
Epoch [2/3] Train Loss: 0.8778 Acc: 69.12% | Val Loss: 0.8683 Acc: 69.16%
Epoch [3/3] Train Loss: 0.6783 Acc: 76.08% | Val Loss: 0.7367 Acc: 73.66%
Test Loss: 0.7388 Acc: 74.19%
Model saved to /content/project4/outputs/cifar10_fast_smallcnn/checkpoints/cifar10_fast_smallcnn.pt
✅ saved: /content/project4/outputs/cifar10_fast_smallcnn/checkpoints/cifar10_fast_smallcnn.pt


BatchNorm ablation (SmallCNN)
We repeat the same training setup as the baseline, but enable Batch Normalization in the CNN.
This lets us compare convergence and test accuracy: SmallCNN vs SmallCNN+BN.

In [11]:
%%writefile /content/project4/configs/cifar10_fast_smallcnn_bn.yaml
run:
  name: cifar10_fast_smallcnn_bn
  seed: 42

model:
  model_name: SmallCNN
  num_classes: 10
  use_batchnorm: true
  dropout: 0.0

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_smallcnn_bn.yaml


In [12]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_smallcnn_bn.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "BN:", cfg.model.use_batchnorm, "dropout:", cfg.model.dropout)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: SmallCNN BN: True dropout: 0.0
Epoch [1/3] Train Loss: 1.3545 Acc: 51.63% | Val Loss: 1.2336 Acc: 56.12%
Epoch [2/3] Train Loss: 0.8937 Acc: 68.56% | Val Loss: 0.8355 Acc: 71.30%
Epoch [3/3] Train Loss: 0.7391 Acc: 73.89% | Val Loss: 0.7734 Acc: 72.12%
Test Loss: 0.7724 Acc: 72.98%
Model saved to /content/project4/outputs/cifar10_fast_smallcnn_bn/checkpoints/cifar10_fast_smallcnn_bn.pt
✅ saved: /content/project4/outputs/cifar10_fast_smallcnn_bn/checkpoints/cifar10_fast_smallcnn_bn.pt


Regularization ablation (Dropout)
We add Dropout to the BatchNorm version of SmallCNN and re-train with the same settings.
This tests whether regularization improves generalization compared to SmallCNN and SmallCNN+BN.

In [13]:
%%writefile /content/project4/configs/cifar10_fast_smallcnn_bn_dropout.yaml
run:
  name: cifar10_fast_smallcnn_bn_dropout
  seed: 42

model:
  model_name: SmallCNN
  num_classes: 10
  use_batchnorm: true
  dropout: 0.3

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_smallcnn_bn_dropout.yaml


In [14]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_smallcnn_bn_dropout.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "BN:", cfg.model.use_batchnorm, "dropout:", cfg.model.dropout)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: SmallCNN BN: True dropout: 0.3
Epoch [1/3] Train Loss: 1.5517 Acc: 44.23% | Val Loss: 1.2026 Acc: 56.38%
Epoch [2/3] Train Loss: 1.1038 Acc: 60.50% | Val Loss: 0.9732 Acc: 66.16%
Epoch [3/3] Train Loss: 0.9557 Acc: 66.31% | Val Loss: 0.9172 Acc: 67.12%
Test Loss: 0.9088 Acc: 67.56%
Model saved to /content/project4/outputs/cifar10_fast_smallcnn_bn_dropout/checkpoints/cifar10_fast_smallcnn_bn_dropout.pt
✅ saved: /content/project4/outputs/cifar10_fast_smallcnn_bn_dropout/checkpoints/cifar10_fast_smallcnn_bn_dropout.pt


Architecture comparison (DeepCNN)
We train a deeper CNN (DeepCNN) on CIFAR-10 using the same fast settings.
This allows a controlled comparison between architectures: SmallCNN vs DeepCNN.

In [15]:
%%writefile /content/project4/configs/cifar10_fast_deepcnn.yaml
run:
  name: cifar10_fast_deepcnn
  seed: 42

model:
  model_name: DeepCNN
  num_classes: 10
  use_batchnorm: true
  dropout: 0.3

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_deepcnn.yaml


In [16]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_deepcnn.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "BN:", cfg.model.use_batchnorm, "dropout:", cfg.model.dropout)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: DeepCNN BN: True dropout: 0.3
Epoch [1/3] Train Loss: 1.8040 Acc: 34.04% | Val Loss: 1.3447 Acc: 51.40%
Epoch [2/3] Train Loss: 1.2366 Acc: 55.11% | Val Loss: 1.0546 Acc: 61.96%
Epoch [3/3] Train Loss: 0.9903 Acc: 64.66% | Val Loss: 0.9483 Acc: 66.42%
Test Loss: 0.9503 Acc: 66.52%
Model saved to /content/project4/outputs/cifar10_fast_deepcnn/checkpoints/cifar10_fast_deepcnn.pt
✅ saved: /content/project4/outputs/cifar10_fast_deepcnn/checkpoints/cifar10_fast_deepcnn.pt


Optimizer ablation (Adam vs SGD)
We compare optimizers while keeping the architecture fixed.
We train the same SmallCNN baseline using SGD + momentum and compare to the Adam baseline.

In [17]:
%%writefile /content/project4/configs/cifar10_fast_smallcnn_sgd.yaml
run:
  name: cifar10_fast_smallcnn_sgd
  seed: 42

model:
  model_name: SmallCNN
  num_classes: 10
  use_batchnorm: false
  dropout: 0.0

train:
  batch_size: 128
  learning_rate: 0.01      # typical SGD LR is higher than Adam
  num_epochs: 3
  optimizer: sgd
  weight_decay: 0.0
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_smallcnn_sgd.yaml


In [18]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_smallcnn_sgd.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "optimizer:", cfg.train.optimizer, "lr:", cfg.train.learning_rate)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: SmallCNN optimizer: sgd lr: 0.01
Epoch [1/3] Train Loss: 1.6426 Acc: 40.67% | Val Loss: 1.2997 Acc: 52.98%
Epoch [2/3] Train Loss: 1.1317 Acc: 59.88% | Val Loss: 1.0294 Acc: 63.60%
Epoch [3/3] Train Loss: 0.8968 Acc: 68.39% | Val Loss: 0.8849 Acc: 68.26%
Test Loss: 0.8801 Acc: 69.22%
Model saved to /content/project4/outputs/cifar10_fast_smallcnn_sgd/checkpoints/cifar10_fast_smallcnn_sgd.pt
✅ saved: /content/project4/outputs/cifar10_fast_smallcnn_sgd/checkpoints/cifar10_fast_smallcnn_sgd.pt


Summary table (Part 1 fast experiments)
We collect the key results from our fast runs into a compact table for easy comparison.
These are development runs (few epochs) and help us decide which settings to use for longer “final” runs.

In [19]:
import pandas as pd

results = [
    {"run": "cifar10_fast_smallcnn", "model": "SmallCNN", "BN": False, "dropout": 0.0, "opt": "adam", "epochs": 3, "test_acc": 74.19},
    {"run": "cifar10_fast_smallcnn_bn", "model": "SmallCNN", "BN": True,  "dropout": 0.0, "opt": "adam", "epochs": 3, "test_acc": 72.98},
    {"run": "cifar10_fast_smallcnn_bn_dropout", "model": "SmallCNN", "BN": True,  "dropout": 0.3, "opt": "adam", "epochs": 3, "test_acc": 67.56},
    {"run": "cifar10_fast_deepcnn", "model": "DeepCNN",  "BN": True,  "dropout": 0.3, "opt": "adam", "epochs": 3, "test_acc": 66.52},
    {"run": "cifar10_fast_smallcnn_sgd", "model": "SmallCNN", "BN": False, "dropout": 0.0, "opt": "sgd",  "epochs": 3, "test_acc": 69.22},
]

df = pd.DataFrame(results).sort_values("test_acc", ascending=False)
df


Unnamed: 0,run,model,BN,dropout,opt,epochs,test_acc
0,cifar10_fast_smallcnn,SmallCNN,False,0.0,adam,3,74.19
1,cifar10_fast_smallcnn_bn,SmallCNN,True,0.0,adam,3,72.98
4,cifar10_fast_smallcnn_sgd,SmallCNN,False,0.0,sgd,3,69.22
2,cifar10_fast_smallcnn_bn_dropout,SmallCNN,True,0.3,adam,3,67.56
3,cifar10_fast_deepcnn,DeepCNN,True,0.3,adam,3,66.52


#PART 2

Loading checkpoints (for transfer learning)
To support Part 2 and Part 3, we add functions to load checkpoints.
We support:

1. full load (model + optimizer)

2. loading only model weights (useful when changing optimizer or resetting training).

In [20]:
%%writefile /content/project4/src/classifier.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, resnet50
from pathlib import Path

from src.models import SmallCNN, DeepCNN


class Classifier:
    def __init__(self, cfg):
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.num_classes = cfg.model.num_classes
        self.learning_rate = cfg.train.learning_rate

        self.model = self._build_model(cfg).to(self.device)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = self._build_optimizer(cfg.train.optimizer, cfg.train.weight_decay, cfg.train.momentum)

        self.train_losses, self.val_losses = [], []
        self.train_accs, self.val_accs = [], []

    def _build_model(self, cfg):
        name = cfg.model.model_name.lower().strip()

        use_bn = bool(getattr(cfg.model, "use_batchnorm", False))
        dropout = float(getattr(cfg.model, "dropout", 0.0))

        if name == "smallcnn":
            return SmallCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "deepcnn":
            return DeepCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "resnet18":
            model = resnet18(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        if name == "resnet50":
            model = resnet50(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        raise ValueError(f"Unknown model_name: {cfg.model.model_name}")

    def _build_optimizer(self, optimizer_name, weight_decay, momentum):
        if optimizer_name.lower() == "adam":
            return optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=weight_decay)
        if optimizer_name.lower() == "sgd":
            return optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=momentum, weight_decay=weight_decay)
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in train_loader:
            x, y = x.to(self.device), y.to(self.device)

            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    @torch.no_grad()
    def evaluate(self, loader):
        self.model.eval()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in loader:
            x, y = x.to(self.device), y.to(self.device)
            out = self.model(x)
            loss = self.criterion(out, y)

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    def train(self, train_loader, val_loader, num_epochs):
        for epoch in range(num_epochs):
            tr_loss, tr_acc = self.train_epoch(train_loader)
            va_loss, va_acc = self.evaluate(val_loader)

            self.train_losses.append(tr_loss)
            self.val_losses.append(va_loss)
            self.train_accs.append(tr_acc)
            self.val_accs.append(va_acc)

            print(
                f"Epoch [{epoch+1}/{num_epochs}] "
                f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.2f}% | "
                f"Val Loss: {va_loss:.4f} Acc: {va_acc:.2f}%"
            )

    def save_model(self, path):
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save({
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "train_losses": self.train_losses,
            "val_losses": self.val_losses,
            "train_accs": self.train_accs,
            "val_accs": self.val_accs,
        }, path)
        print(f"Model saved to {path}")

    def load_model(self, path, load_optimizer: bool = True):
        checkpoint = torch.load(path, map_location=self.device)
        self.model.load_state_dict(checkpoint["model_state_dict"])
        if load_optimizer and "optimizer_state_dict" in checkpoint:
            self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

        self.train_losses = checkpoint.get("train_losses", [])
        self.val_losses = checkpoint.get("val_losses", [])
        self.train_accs = checkpoint.get("train_accs", [])
        self.val_accs = checkpoint.get("val_accs", [])

        print(f"✅ Loaded checkpoint from {path} (optimizer loaded={load_optimizer})")

    def load_weights_only(self, path):
        checkpoint = torch.load(path, map_location=self.device)
        self.model.load_state_dict(checkpoint["model_state_dict"])
        print(f"✅ Loaded model weights from {path} (optimizer NOT loaded)")


Overwriting /content/project4/src/classifier.py


In [21]:
import importlib, src.classifier
importlib.reload(src.classifier)
from src.classifier import Classifier
print("✅ Classifier reloaded with load_model()")


✅ Classifier reloaded with load_model()


Part 2 sanity: Pretrain on CIFAR-100
We pretrain a model on an external dataset (CIFAR-100) for a few epochs and save a checkpoint.
This checkpoint will be used for fine-tuning on CIFAR-10.

In [22]:
%%writefile /content/project4/configs/cifar100_fast_pretrain_resnet18.yaml
run:
  name: cifar100_fast_pretrain_resnet18
  seed: 42

model:
  model_name: ResNet18
  num_classes: 100

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar100
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar100_fast_pretrain_resnet18.yaml


Pretraining on CIFAR-100 (external dataset)
We train ResNet18 from scratch on CIFAR-100 for a few epochs and save the checkpoint.
This checkpoint will be used as initialization for fine-tuning on CIFAR-10 in the next step.

In [23]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier
import torch

print("cuda available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("gpu:", torch.cuda.get_device_name(0))

cfg = OmegaConf.load("configs/cifar100_fast_pretrain_resnet18.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("dataset:", cfg.data.dataset, "| model:", cfg.model.model_name, "| classes:", cfg.model.num_classes)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"[CIFAR-100] Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved pretrain checkpoint:", ckpt_path)


/content/project4
cuda available: True
gpu: Tesla T4


100%|██████████| 169M/169M [00:03<00:00, 43.7MB/s]


device: cuda
dataset: cifar100 | model: ResNet18 | classes: 100




Epoch [1/3] Train Loss: 3.5202 Acc: 16.89% | Val Loss: 3.0709 Acc: 24.96%
Epoch [2/3] Train Loss: 2.8075 Acc: 28.89% | Val Loss: 2.7023 Acc: 30.74%
Epoch [3/3] Train Loss: 2.4137 Acc: 36.84% | Val Loss: 2.4659 Acc: 35.64%
[CIFAR-100] Test Loss: 2.4841 Acc: 35.64%
Model saved to /content/project4/outputs/cifar100_fast_pretrain_resnet18/checkpoints/cifar100_fast_pretrain_resnet18.pt
✅ saved pretrain checkpoint: /content/project4/outputs/cifar100_fast_pretrain_resnet18/checkpoints/cifar100_fast_pretrain_resnet18.pt


Load pretrained backbone (ignore classifier head)
CIFAR-100 pretraining produces a ResNet18 with a 100-class final layer.
For CIFAR-10 fine-tuning we reuse the pretrained backbone weights and ignore the final fc layer so we can train a new 10-class head.

In [24]:
%%writefile /content/project4/src/transfer.py
import torch


def load_resnet_backbone_ignoring_fc(model, checkpoint_path: str, device="cpu"):
    """
    Load ResNet weights from a checkpoint but ignore the final classifier layer (fc.*).
    Works for ResNet18/50 where classifier is named 'fc'.
    """
    ckpt = torch.load(checkpoint_path, map_location=device)
    state = ckpt["model_state_dict"] if "model_state_dict" in ckpt else ckpt

    # Remove classifier head weights
    filtered = {k: v for k, v in state.items() if not k.startswith("fc.")}

    missing, unexpected = model.load_state_dict(filtered, strict=False)

    return missing, unexpected


Writing /content/project4/src/transfer.py


Fine-tune config (CIFAR-10)
We fine-tune ResNet18 on CIFAR-10, initializing from the CIFAR-100 pretrained backbone.
We train for a few epochs and compare against training from scratch.

In [25]:
%%writefile /content/project4/configs/cifar10_fast_finetune_from_cifar100_resnet18.yaml
run:
  name: cifar10_fast_finetune_from_cifar100_resnet18
  seed: 42

model:
  model_name: ResNet18
  num_classes: 10

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_finetune_from_cifar100_resnet18.yaml


Fine-tune on CIFAR-10 (starting from CIFAR-100 pretrained weights)
We load the pretrained ResNet18 backbone weights (excluding the classifier head), then fine-tune on CIFAR-10.
We save the fine-tuned checkpoint and report test accuracy.

In [26]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier
from src.transfer import load_resnet_backbone_ignoring_fc

pretrain_ckpt = "/content/project4/outputs/cifar100_fast_pretrain_resnet18/checkpoints/cifar100_fast_pretrain_resnet18.pt"

cfg = OmegaConf.load("configs/cifar10_fast_finetune_from_cifar100_resnet18.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("Finetune model:", cfg.model.model_name, "classes:", cfg.model.num_classes)
print("Loading backbone from:", pretrain_ckpt)

missing, unexpected = load_resnet_backbone_ignoring_fc(clf.model, pretrain_ckpt, device=clf.device)
print("missing keys (expected to include fc.*):", [k for k in missing if k.startswith("fc.")][:5])
print("unexpected keys:", unexpected)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"[CIFAR-10 finetune] Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved finetuned checkpoint:", ckpt_path)


/content/project4
device: cuda
Finetune model: ResNet18 classes: 10
Loading backbone from: /content/project4/outputs/cifar100_fast_pretrain_resnet18/checkpoints/cifar100_fast_pretrain_resnet18.pt
missing keys (expected to include fc.*): ['fc.weight', 'fc.bias']
unexpected keys: []
Epoch [1/3] Train Loss: 1.0444 Acc: 62.72% | Val Loss: 0.9623 Acc: 65.74%
Epoch [2/3] Train Loss: 0.7800 Acc: 72.62% | Val Loss: 0.8003 Acc: 71.06%
Epoch [3/3] Train Loss: 0.6573 Acc: 76.84% | Val Loss: 0.7697 Acc: 72.78%
[CIFAR-10 finetune] Test Loss: 0.7880 Acc: 72.65%
Model saved to /content/project4/outputs/cifar10_fast_finetune_from_cifar100_resnet18/checkpoints/cifar10_fast_finetune_from_cifar100_resnet18.pt
✅ saved finetuned checkpoint: /content/project4/outputs/cifar10_fast_finetune_from_cifar100_resnet18/checkpoints/cifar10_fast_finetune_from_cifar100_resnet18.pt


Baseline for Part 2: ResNet18 from scratch on CIFAR-10
To measure the benefit of external pretraining, we train the same architecture (ResNet18) on CIFAR-10 from scratch using the same training schedule as the fine-tuning run.
We compare test accuracy: scratch vs pretrain→fine-tune.

In [27]:
%%writefile /content/project4/configs/cifar10_fast_scratch_resnet18.yaml
run:
  name: cifar10_fast_scratch_resnet18
  seed: 42

model:
  model_name: ResNet18
  num_classes: 10

train:
  batch_size: 128
  learning_rate: 0.001
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true
  mean: [0.4914, 0.4822, 0.4465]
  std:  [0.2023, 0.1994, 0.2010]

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_scratch_resnet18.yaml


In [28]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import CIFARPipeline
from src.classifier import Classifier

cfg = OmegaConf.load("configs/cifar10_fast_scratch_resnet18.yaml")

set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = CIFARPipeline(cfg)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "| scratch training")

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"[CIFAR-10 scratch ResNet18] Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved scratch checkpoint:", ckpt_path)


/content/project4
device: cuda
model: ResNet18 | scratch training
Epoch [1/3] Train Loss: 1.3851 Acc: 50.28% | Val Loss: 1.2931 Acc: 54.80%
Epoch [2/3] Train Loss: 0.9982 Acc: 64.87% | Val Loss: 1.0205 Acc: 63.92%
Epoch [3/3] Train Loss: 0.8220 Acc: 71.17% | Val Loss: 0.8787 Acc: 68.82%
[CIFAR-10 scratch ResNet18] Test Loss: 0.8920 Acc: 68.98%
Model saved to /content/project4/outputs/cifar10_fast_scratch_resnet18/checkpoints/cifar10_fast_scratch_resnet18.pt
✅ saved scratch checkpoint: /content/project4/outputs/cifar10_fast_scratch_resnet18/checkpoints/cifar10_fast_scratch_resnet18.pt


Part 2 comparison (Transfer Learning)
We compare training ResNet18 on CIFAR-10 from scratch vs fine-tuning ResNet18 initialized from a model pretrained on CIFAR-100.
This directly tests whether external pretraining improves performance on our dataset.

In [29]:
import pandas as pd

part2 = pd.DataFrame([
    {"setup": "Scratch", "model": "ResNet18", "pretrain": "None", "epochs": 3, "test_acc": 68.98},
    {"setup": "Pretrain→Fine-tune", "model": "ResNet18", "pretrain": "CIFAR-100", "epochs": 3, "test_acc": 72.65},
]).sort_values("test_acc", ascending=False)

part2


Unnamed: 0,setup,model,pretrain,epochs,test_acc
1,Pretrain→Fine-tune,ResNet18,CIFAR-100,3,72.65
0,Scratch,ResNet18,,3,68.98


#PART 3

ImageNet transfer pipeline
Pretrained ResNet50 expects ImageNet-style inputs.
We add a dedicated pipeline that resizes CIFAR-10 images to 224×224 and normalizes using ImageNet mean/std.
This pipeline will be used only for Part 3 transfer learning.

In [36]:
%%writefile /content/project4/src/data_pipeline.py
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split


class CIFARPipeline:
    """
    CIFAR-10 / CIFAR-100 pipeline for 32x32 training (Parts 1 & 2).
    """
    def __init__(self, cfg, val_split=0.1):
        self.cfg = cfg
        self.batch_size = cfg.train.batch_size
        self.num_workers = cfg.data.num_workers
        self.download = cfg.data.download
        self.val_split = val_split
        self.mean = cfg.data.mean
        self.std = cfg.data.std
        self.data_root = getattr(cfg.data, "data_root", "./data")

        dataset = cfg.data.dataset.lower().strip()
        if dataset == "cifar10":
            self.dataset_cls = torchvision.datasets.CIFAR10
        elif dataset == "cifar100":
            self.dataset_cls = torchvision.datasets.CIFAR100
        else:
            raise ValueError(f"Unsupported dataset: {cfg.data.dataset}")

        self.train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std),
        ])

        self.eval_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std),
        ])

    def create_loaders(self):
        full_train = self.dataset_cls(
            root=self.data_root, train=True, download=self.download, transform=self.train_transform
        )
        test_ds = self.dataset_cls(
            root=self.data_root, train=False, download=self.download, transform=self.eval_transform
        )

        val_size = int(len(full_train) * self.val_split)
        train_size = len(full_train) - val_size
        train_ds, val_ds = random_split(full_train, [train_size, val_size])

        val_ds.dataset.transform = self.eval_transform

        train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
        val_loader = DataLoader(val_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)

        return train_loader, val_loader, test_loader


class ImageNetTransferPipeline:
    """
    CIFAR-10 pipeline adapted for ImageNet-pretrained networks (Part 3).
    - Resize to 224x224
    - ImageNet normalization
    """
    IMAGENET_MEAN = [0.485, 0.456, 0.406]
    IMAGENET_STD  = [0.229, 0.224, 0.225]

    def __init__(self, cfg, val_split=0.1, image_size=224):
        self.cfg = cfg
        self.batch_size = cfg.train.batch_size
        self.num_workers = cfg.data.num_workers
        self.download = cfg.data.download
        self.val_split = val_split
        self.data_root = getattr(cfg.data, "data_root", "./data")
        self.image_size = image_size

        if cfg.data.dataset.lower().strip() != "cifar10":
            raise ValueError("ImageNetTransferPipeline is intended for CIFAR-10 only.")

        self.train_transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(self.IMAGENET_MEAN, self.IMAGENET_STD),
        ])

        self.eval_transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize(self.IMAGENET_MEAN, self.IMAGENET_STD),
        ])

    def create_loaders(self):
        full_train = torchvision.datasets.CIFAR10(
            root=self.data_root, train=True, download=self.download, transform=self.train_transform
        )
        test_ds = torchvision.datasets.CIFAR10(
            root=self.data_root, train=False, download=self.download, transform=self.eval_transform
        )

        val_size = int(len(full_train) * self.val_split)
        train_size = len(full_train) - val_size
        train_ds, val_ds = random_split(full_train, [train_size, val_size])

        val_ds.dataset.transform = self.eval_transform

        train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
        val_loader = DataLoader(val_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)

        return train_loader, val_loader, test_loader


Overwriting /content/project4/src/data_pipeline.py


Pretrained ResNet50 option
We add a config flag model.pretrained: true/false.
When true and model_name is ResNet50, we load ImageNet pretrained weights and replace the final FC layer with 10 classes.

In [37]:
%%writefile /content/project4/src/classifier.py
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path

from torchvision.models import resnet18, resnet50, ResNet50_Weights
from src.models import SmallCNN, DeepCNN


class Classifier:
    def __init__(self, cfg):
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.num_classes = cfg.model.num_classes
        self.learning_rate = cfg.train.learning_rate

        self.model = self._build_model(cfg).to(self.device)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = self._build_optimizer(cfg.train.optimizer, cfg.train.weight_decay, cfg.train.momentum)

        self.train_losses, self.val_losses = [], []
        self.train_accs, self.val_accs = [], []

    def _build_model(self, cfg):
        name = cfg.model.model_name.lower().strip()
        pretrained = bool(getattr(cfg.model, "pretrained", False))

        # Optional fields for custom CNNs
        use_bn = bool(getattr(cfg.model, "use_batchnorm", False))
        dropout = float(getattr(cfg.model, "dropout", 0.0))

        if name == "smallcnn":
            return SmallCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "deepcnn":
            return DeepCNN(num_classes=cfg.model.num_classes, use_batchnorm=use_bn, dropout=dropout)

        if name == "resnet18":
            model = resnet18(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        if name == "resnet50":
            if pretrained:
                model = resnet50(weights=ResNet50_Weights.DEFAULT)
            else:
                model = resnet50(weights=None)
            model.fc = nn.Linear(model.fc.in_features, cfg.model.num_classes)
            return model

        raise ValueError(f"Unknown model_name: {cfg.model.model_name}")

    def _build_optimizer(self, optimizer_name, weight_decay, momentum):
        if optimizer_name.lower() == "adam":
            return optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=weight_decay)
        if optimizer_name.lower() == "sgd":
            return optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=momentum, weight_decay=weight_decay)
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in train_loader:
            x, y = x.to(self.device), y.to(self.device)

            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    @torch.no_grad()
    def evaluate(self, loader):
        self.model.eval()
        running_loss, correct, total = 0.0, 0, 0

        for x, y in loader:
            x, y = x.to(self.device), y.to(self.device)
            out = self.model(x)
            loss = self.criterion(out, y)

            running_loss += loss.item() * x.size(0)
            pred = out.argmax(dim=1)
            total += y.size(0)
            correct += (pred == y).sum().item()

        return running_loss / total, 100.0 * correct / total

    def train(self, train_loader, val_loader, num_epochs):
        for epoch in range(num_epochs):
            tr_loss, tr_acc = self.train_epoch(train_loader)
            va_loss, va_acc = self.evaluate(val_loader)

            self.train_losses.append(tr_loss)
            self.val_losses.append(va_loss)
            self.train_accs.append(tr_acc)
            self.val_accs.append(va_acc)

            print(
                f"Epoch [{epoch+1}/{num_epochs}] "
                f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.2f}% | "
                f"Val Loss: {va_loss:.4f} Acc: {va_acc:.2f}%"
            )

    def save_model(self, path):
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save({
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "train_losses": self.train_losses,
            "val_losses": self.val_losses,
            "train_accs": self.train_accs,
            "val_accs": self.val_accs,
        }, path)
        print(f"Model saved to {path}")

    def load_model(self, path, load_optimizer: bool = True):
        checkpoint = torch.load(path, map_location=self.device)
        self.model.load_state_dict(checkpoint["model_state_dict"])
        if load_optimizer and "optimizer_state_dict" in checkpoint:
            self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        self.train_losses = checkpoint.get("train_losses", [])
        self.val_losses = checkpoint.get("val_losses", [])
        self.train_accs = checkpoint.get("train_accs", [])
        self.val_accs = checkpoint.get("val_accs", [])
        print(f"✅ Loaded checkpoint from {path} (optimizer loaded={load_optimizer})")


Overwriting /content/project4/src/classifier.py


In [38]:
import importlib, src.data_pipeline, src.classifier
importlib.reload(src.data_pipeline)
importlib.reload(src.classifier)
print("✅ Reloaded data_pipeline + classifier")


✅ Reloaded data_pipeline + classifier


Fast ImageNet transfer experiment (ResNet50)
We fine-tune an ImageNet-pretrained ResNet50 on CIFAR-10 for a few epochs using 224×224 resized inputs.
This validates Part 3 before running longer training.

In [39]:
%%writefile /content/project4/configs/cifar10_fast_resnet50_imagenet.yaml
run:
  name: cifar10_fast_resnet50_imagenet
  seed: 42

model:
  model_name: ResNet50
  num_classes: 10
  pretrained: true

train:
  batch_size: 64          # ResNet50 + 224x224 uses more memory
  learning_rate: 0.0001   # smaller LR for fine-tuning pretrained model
  num_epochs: 3
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true

outputs:
  root: /content/project4/outputs


Overwriting /content/project4/configs/cifar10_fast_resnet50_imagenet.yaml


Fine-tune ImageNet-pretrained ResNet50 (fast run)
We train the new classification head and fine-tune the network for a few epochs on CIFAR-10 using ImageNet-style preprocessing.
We report test accuracy and save a checkpoint.

#runtime ~24 min

In [40]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import ImageNetTransferPipeline
from src.classifier import Classifier
import torch

cfg = OmegaConf.load("configs/cifar10_fast_resnet50_imagenet.yaml")
set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

print("cuda available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("gpu:", torch.cuda.get_device_name(0))

pipe = ImageNetTransferPipeline(cfg, image_size=224)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "| pretrained:", cfg.model.pretrained)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"[ResNet50 ImageNet FT] Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
cuda available: True
gpu: Tesla T4
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 179MB/s]


device: cuda
model: ResNet50 | pretrained: True
Epoch [1/3] Train Loss: 0.3464 Acc: 89.40% | Val Loss: 0.1329 Acc: 95.68%
Epoch [2/3] Train Loss: 0.0893 Acc: 97.15% | Val Loss: 0.1586 Acc: 94.86%
Epoch [3/3] Train Loss: 0.0513 Acc: 98.27% | Val Loss: 0.1258 Acc: 95.94%
[ResNet50 ImageNet FT] Test Loss: 0.1323 Acc: 95.86%
Model saved to /content/project4/outputs/cifar10_fast_resnet50_imagenet/checkpoints/cifar10_fast_resnet50_imagenet.pt
✅ saved: /content/project4/outputs/cifar10_fast_resnet50_imagenet/checkpoints/cifar10_fast_resnet50_imagenet.pt


Baseline for Part 3: ResNet50 from scratch (no ImageNet weights)
We train ResNet50 on CIFAR-10 with the same ImageNet-style preprocessing (224×224 + ImageNet normalization), but without pretrained weights.
This provides a baseline to quantify the benefit of ImageNet pretraining.

In [41]:
%%writefile /content/project4/configs/cifar10_fast_resnet50_scratch_imagenetpipe.yaml
run:
  name: cifar10_fast_resnet50_scratch_imagenetpipe
  seed: 42

model:
  model_name: ResNet50
  num_classes: 10
  pretrained: false

train:
  batch_size: 64
  learning_rate: 0.001
  num_epochs: 1
  optimizer: adam
  weight_decay: 0.0001
  momentum: 0.9

data:
  dataset: cifar10
  data_root: /content/project4/data
  num_workers: 4
  download: true

outputs:
  root: /content/project4/outputs


Writing /content/project4/configs/cifar10_fast_resnet50_scratch_imagenetpipe.yaml


In [42]:
%cd /content/project4
from omegaconf import OmegaConf
from src.utils import set_seed, ensure_run_dirs
from src.data_pipeline import ImageNetTransferPipeline
from src.classifier import Classifier
import torch

cfg = OmegaConf.load("configs/cifar10_fast_resnet50_scratch_imagenetpipe.yaml")
set_seed(cfg.run.seed)
run_dir = ensure_run_dirs(cfg)

pipe = ImageNetTransferPipeline(cfg, image_size=224)
train_loader, val_loader, test_loader = pipe.create_loaders()

clf = Classifier(cfg)
print("device:", clf.device)
print("model:", cfg.model.model_name, "| pretrained:", cfg.model.pretrained)

clf.train(train_loader, val_loader, num_epochs=cfg.train.num_epochs)

test_loss, test_acc = clf.evaluate(test_loader)
print(f"[ResNet50 Scratch] Test Loss: {test_loss:.4f} Acc: {test_acc:.2f}%")

ckpt_path = f"{run_dir}/checkpoints/{cfg.run.name}.pt"
clf.save_model(ckpt_path)
print("✅ saved:", ckpt_path)


/content/project4
device: cuda
model: ResNet50 | pretrained: False
Epoch [1/1] Train Loss: 1.5812 Acc: 42.30% | Val Loss: 1.3433 Acc: 55.52%
[ResNet50 Scratch] Test Loss: 1.3331 Acc: 54.72%
Model saved to /content/project4/outputs/cifar10_fast_resnet50_scratch_imagenetpipe/checkpoints/cifar10_fast_resnet50_scratch_imagenetpipe.pt
✅ saved: /content/project4/outputs/cifar10_fast_resnet50_scratch_imagenetpipe/checkpoints/cifar10_fast_resnet50_scratch_imagenetpipe.pt


Part 3 comparison (ImageNet pretrained ResNet50)
We compare a ResNet50 trained from scratch vs a ResNet50 initialized from ImageNet pretrained weights, using the same ImageNet-style preprocessing.
This highlights the benefit of pretrained representations.

In [43]:
import pandas as pd

part3 = pd.DataFrame([
    {"setup": "Scratch", "model": "ResNet50", "pretrain": "None", "epochs": 1, "image_size": 224, "test_acc": 54.72},
    {"setup": "Fine-tune", "model": "ResNet50", "pretrain": "ImageNet", "epochs": 3, "image_size": 224, "test_acc": 95.86},
]).sort_values("test_acc", ascending=False)

part3


Unnamed: 0,setup,model,pretrain,epochs,image_size,test_acc
1,Fine-tune,ResNet50,ImageNet,3,224,95.86
0,Scratch,ResNet50,,1,224,54.72


Step 27 — Overall summary (Parts 1–3)
We collect representative results across the project:

Part 1: architecture/BN/regularization/optimizer comparisons

Part 2: external pretraining (CIFAR-100 → CIFAR-10)

Part 3: ImageNet pretrained ResNet50 vs scratch

In [44]:
summary = pd.DataFrame([
    # Part 1 (fast)
    {"part": "Part 1", "experiment": "Best fast run", "model": "SmallCNN", "details": "BN=False, dropout=0.0, Adam, 3 epochs", "test_acc": 74.19},

    # Part 2 (fast)
    {"part": "Part 2", "experiment": "Scratch baseline", "model": "ResNet18", "details": "CIFAR-10 scratch, 3 epochs", "test_acc": 68.98},
    {"part": "Part 2", "experiment": "Transfer learning", "model": "ResNet18", "details": "Pretrain CIFAR-100 → finetune CIFAR-10, 3 epochs", "test_acc": 72.65},

    # Part 3
    {"part": "Part 3", "experiment": "Scratch baseline", "model": "ResNet50", "details": "ImageNet-style pipeline (224), scratch, 1 epoch", "test_acc": 54.72},
    {"part": "Part 3", "experiment": "ImageNet transfer", "model": "ResNet50", "details": "ImageNet pretrained → finetune CIFAR-10 (224), 3 epochs", "test_acc": 95.86},
]).sort_values(["part", "test_acc"], ascending=[True, False])

summary


Unnamed: 0,part,experiment,model,details,test_acc
0,Part 1,Best fast run,SmallCNN,"BN=False, dropout=0.0, Adam, 3 epochs",74.19
2,Part 2,Transfer learning,ResNet18,"Pretrain CIFAR-100 → finetune CIFAR-10, 3 epochs",72.65
1,Part 2,Scratch baseline,ResNet18,"CIFAR-10 scratch, 3 epochs",68.98
4,Part 3,ImageNet transfer,ResNet50,"ImageNet pretrained → finetune CIFAR-10 (224),...",95.86
3,Part 3,Scratch baseline,ResNet50,"ImageNet-style pipeline (224), scratch, 1 epoch",54.72
