In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from torch.amp import autocast, GradScaler

#### Section 1

In [6]:
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

batch_size = 512
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

data_iter = iter(trainloader)
images, labels = next(data_iter)
print("Batch of images shape:", images.shape)
print("Batch of labels shape:", labels.shape)

Batch of images shape: torch.Size([512, 3, 32, 32])
Batch of labels shape: torch.Size([512])


#### Section 2

In [None]:
class IntermediateBlock(nn.Module):
    def __init__(self, in_channels, out_channels, num_convs):
        super().__init__()
        self.convs = nn.ModuleList()
        for _ in range(num_convs):
            conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )
            nn.init.kaiming_normal_(conv[0].weight, mode='fan_out', nonlinearity='relu')
            self.convs.append(conv)

        self.weight_generator = nn.Linear(in_channels, num_convs)

    def forward(self, x):
        conv_outputs = [conv(x) for conv in self.convs]
        m = torch.mean(x, dim=[2, 3])
        weights = torch.softmax(self.weight_generator(m), dim=1)

        weights = weights.view(-1, len(self.convs), 1, 1, 1)
        weighted_outputs = torch.stack(conv_outputs, dim=1) * weights
        return torch.sum(weighted_outputs, dim=1)

class OutputBlock(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(in_channels, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        m = torch.mean(x, dim=[2, 3])
        return self.fc(m)

model = nn.Sequential(
    IntermediateBlock(3, 128, 4),   
    IntermediateBlock(128, 256, 4),
    IntermediateBlock(256, 512, 4),
    IntermediateBlock(512, 512, 4),
    IntermediateBlock(512, 512, 4),
    IntermediateBlock(512, 1024, 4),
    OutputBlock(1024)
)

#### Section 3

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")
model = model.to(device)
scaler = GradScaler(device=device.type if device.type == 'cuda' else None, enabled=(device.type == 'cuda'))

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.95, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=2, eta_min=1e-4)
criterion = nn.CrossEntropyLoss(label_smoothing=0.2)

train_loss, train_acc, test_acc = [], [], []
best_acc = 0

for epoch in range(150):
    model.train()
    total, correct = 0, 0

    for inputs, labels in tqdm(trainloader, desc=f'Epoch {epoch+1}/150'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast(device_type=device.type, enabled=(device == 'cuda')):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        train_loss.append(loss.item())
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc.append(100 * correct / total)

    model.eval()
    total_test, correct_test = 0, 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total_test += labels.size(0)
            correct_test += predicted.eq(labels).sum().item()

    acc = 100 * correct_test / total_test
    test_acc.append(acc)
    best_acc = max(best_acc, acc)

    print(f"Epoch {epoch+1}: Train Acc: {train_acc[-1]:.2f}% | "
          f"Test Acc: {acc:.2f}% | Best: {best_acc:.2f}%")

Using device: mps


Epoch 1/150:   0%|          | 0/98 [00:18<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 34.30 GB, other allocations: 898.80 MB, max allowed: 36.27 GB). Tried to allocate 2.00 GB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

#### Visualizations

In [1]:
plt.figure(figsize=(15, 5))

colors = {'train_loss': 'maroon', 'train_acc': 'navy', 'test_acc': 'maroon', 'smoothed_loss': 'maroon'}
plt.subplot(1, 3, 1)
plt.plot(train_loss, alpha=0.6)
plt.title('Training Loss')
plt.xlabel('Batch')

plt.subplot(1, 3, 2)
plt.plot(train_acc, label='Train')
plt.plot(test_acc, label='Test')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 3, 3)
smoothed = np.convolve(train_loss, np.ones(100)/100, mode='valid')
plt.plot(smoothed)
plt.title('Smoothed Loss (100 batches)')
plt.tight_layout()

plt.savefig('training_metrics.png')
plt.show()

print(f"\nFinal Report:")
print(f"Highest Test Accuracy: {best_acc:.2f}%")

NameError: name 'plt' is not defined