In [4]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# HYPERPARAMETERS
num_epochs = 200
batch_size = 128
learning_rate = 0.001
num_classes = 10

# Load DATASET: CIFAR-10
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

train_dataset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test
)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)


# BASELINE CNN FROM LECTURE (2 convs) from Lecture NotesS
class BaselineCNN(nn.Module):
    def __init__(self):
        super(BaselineCNN, self).__init__()
        # Conv1 -> ReLU -> Pool
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding=1)  # 8×32×32
        self.pool1 = nn.MaxPool2d(2, 2)                         # 8×16×16
        # Conv2 -> ReLU -> Pool
        self.conv2 = nn.Conv2d(8, 8, kernel_size=3, padding=1)  # 8×16×16
        self.pool2 = nn.MaxPool2d(2, 2)                         # 8×8×8
        # Fully connected layer
        self.fc1 = nn.Linear(8 * 8 * 8, num_classes)            # 512 → 10

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x


model = BaselineCNN().to(device)
print(model)


# LOSS & OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# TRAINING LOOP
def train(model, loader, criterion, optimizer, epochs):
    model.train()
    start = time.time()
    final_loss = None

    for epoch in range(epochs):
        running_loss = 0.0
        batches = 0

        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            batches += 1

        epoch_loss = running_loss / batches
        final_loss = epoch_loss

        if epoch % 10 == 0 or epoch == epochs - 1:
            print(f"Epoch [{epoch+1}/{epochs}] Loss: {epoch_loss:.4f}")

    total_time = time.time() - start
    return final_loss, total_time


# Train and Evaluate
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

print("\nTraining Baseline CNN (Question 1a)...")
final_loss, train_time = train(model, train_loader, criterion, optimizer, num_epochs)

accuracy = evaluate(model, test_loader)

print("\n====== RESULTS: QUESTION 1(a) ======")
print(f"Training Time: {train_time:.2f} seconds")
print(f"Final Training Loss (epoch 300): {final_loss:.4f}")
print(f"Test Accuracy: {accuracy:.2f}%")
print("====================================")


# MODEL SIZE (PARAMETER COUNT)

total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal Parameters in Baseline CNN: {total_params}")


Using device: cuda
BaselineCNN(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=512, out_features=10, bias=True)
)

Training Baseline CNN (Question 1a)...
Epoch [1/200] Loss: 1.8163
Epoch [11/200] Loss: 1.3123
Epoch [21/200] Loss: 1.2343
Epoch [31/200] Loss: 1.2095
Epoch [41/200] Loss: 1.1877
Epoch [51/200] Loss: 1.1751
Epoch [61/200] Loss: 1.1646
Epoch [71/200] Loss: 1.1566
Epoch [81/200] Loss: 1.1478
Epoch [91/200] Loss: 1.1389
Epoch [101/200] Loss: 1.1420
Epoch [111/200] Loss: 1.1317
Epoch [121/200] Loss: 1.1230
Epoch [131/200] Loss: 1.1201
Epoch [141/200] Loss: 1.1204
Epoch [151/200] Loss: 1.1165
Epoch [161/200] Loss: 1.1146
Epoch [171/200] Loss: 1.1136
Epoch [181/200] Loss: 1.1133
Epo