## Architecture

A VGG-style plain CNN with standard 3×3 convolutions, batch normalisation, and max pooling.

```
Input  (3 × 32 × 32)
  Conv(3→32,  3×3) + BN + ReLU
  Conv(32→32, 3×3) + BN + ReLU + MaxPool(2)   → 32 × 16 × 16
  Conv(32→64, 3×3) + BN + ReLU
  Conv(64→64, 3×3) + BN + ReLU + MaxPool(2)   → 64 × 8 × 8
  Conv(64→96, 3×3) + BN + ReLU + MaxPool(2)   → 96 × 4 × 4
  GlobalAvgPool → Dropout(0.3) → Linear(96→10)
```

Five convolutional layers across three spatial scales. Each layer uses 3×3 kernels with
same-padding, batch normalisation, and ReLU. Spatial resolution is halved three times
via max pooling. Global average pooling replaces a large FC layer.

In [None]:
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath('shared.py')))

import torch
import torch.nn as nn
from source import (
    get_dataloaders, train, evaluate,
    count_parameters, model_size_kb, print_summary,
    SEED, evaluate_pytorch
)

torch.manual_seed(SEED)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'PyTorch {torch.__version__} | Device: {DEVICE}')

trainloader, testloader = get_dataloaders()

  from .autonotebook import tqdm as notebook_tqdm


PyTorch 2.10.0 | Device: cpu


  entry = pickle.load(f, encoding="latin1")


In [3]:
class PlainCNN(nn.Module):
    """VGG-style plain CNN for CIFAR-10.

    ~122K parameters. Architecture: 5 conv layers across 3 spatial scales,
    GlobalAvgPool head. No skip connections or special modules.
    """
    def __init__(self, num_classes=10):
        super().__init__()

        def conv_block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True),
            )

        self.features = nn.Sequential(
            # Scale 1 — 32×32
            conv_block(3,  32),
            conv_block(32, 32),
            nn.MaxPool2d(2),          # → 16×16
            # Scale 2 — 16×16
            conv_block(32, 64),
            conv_block(64, 64),
            nn.MaxPool2d(2),          # → 8×8
            # Scale 3 — 8×8
            conv_block(64, 96),
            nn.MaxPool2d(2),          # → 4×4
        )
        self.pool    = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(0.3)
        self.fc      = nn.Linear(96, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        return self.fc(x)


model = PlainCNN().to(DEVICE)

n  = count_parameters(model)
kb = model_size_kb(model)
print(f'Parameters: {n:,}')
print(f'Model size: {kb:.1f} KB')
assert n < 125_000 and kb < 500, 'Constraint violated before training!'

Parameters: 122,218
Model size: 479.7 KB


In [None]:
final_acc = train(
    model, trainloader, testloader, DEVICE,
    save_path='best_plaincnn.pth',
)
print_summary(model, final_acc, label='PlainCNN')

In [6]:
# Load best model and final evaluation
model.load_state_dict(torch.load("../models/best_plaincnn.pth", map_location="cpu", weights_only=True))
final_acc = evaluate_pytorch(model, testloader, DEVICE)
final_size = model_size_kb(model)

print(f"\n{'='*50}")
print(f"  Test Accuracy: {final_acc:.2f}%")
print(f"{'='*50}")


  Test Accuracy: 87.92%
