# 1. Generalization

In [1]:
# Import necessary libraries and packages
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from typing import Type


In [13]:
# Global parameters
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_epochs = 10

## 1.1. Train the base model

In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

    
    
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_planes = 64

        # Modified the kernel size, stride, and padding to match the original ResNet-18
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        # Changed to adaptive average pooling for flexibility
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [4]:
# Load and normalize the SVHN dataset
transform_svhn = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset_svhn = torchvision.datasets.SVHN(root='./data', split='train', download=True, transform=transform_svhn)
trainloader_svhn = DataLoader(trainset_svhn, batch_size=64, shuffle=True, num_workers=2)

testset_svhn = torchvision.datasets.SVHN(root='./data', split='test', download=True, transform=transform_svhn)
testloader_svhn = DataLoader(testset_svhn, batch_size=64, shuffle=False, num_workers=2)



# Load and normalize the MNIST dataset
transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to be compatible with SVHN
    transforms.Grayscale(num_output_channels=3),  # Convert 1 channel grayscale to 3 channel grayscale
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

testset_mnist = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)
testloader_mnist = DataLoader(testset_mnist, batch_size=64, shuffle=False, num_workers=2)

Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to ./data/train_32x32.mat


100%|██████████| 182040794/182040794 [00:23<00:00, 7711289.32it/s] 


Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to ./data/test_32x32.mat


100%|██████████| 64275384/64275384 [00:11<00:00, 5776874.75it/s] 


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 200731510.78it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 50473205.76it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 68295656.86it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 12322463.63it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [5]:
def train(model, trainloader, criterion, optimizer, device, epochs):
    model.train()
    loss_values = []  # List to store loss values

    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Print the average loss of the entire epoch
        avg_loss = running_loss / len(trainloader)
        print(f'[{epoch + 1}] loss: {avg_loss:.3f}')
        loss_values.append(avg_loss)

    # Plot the training loss
    plt.figure(figsize=(10, 5))
    plt.plot(loss_values, label='Training Loss')
    plt.title('Training Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()




def evaluate(model, testloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

In [31]:
# Setup and repeat Train
net = ResNet18()
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

train(net, trainloader_svhn, criterion, optimizer, device, num_epochs)

[1,   200] loss: 1.991

[1,   400] loss: 0.685

[1,   600] loss: 0.517

[1,   800] loss: 0.456

[1,  1000] loss: 0.417

Accuracy of the network on the SVHN test images: 88.31%

[2,   200] loss: 0.290

[2,   400] loss: 0.290

[2,   600] loss: 0.294

[2,   800] loss: 0.280

[2,  1000] loss: 0.286

Accuracy of the network on the SVHN test images: 90.16%

[3,   200] loss: 0.183

[3,   400] loss: 0.173

[3,   600] loss: 0.190

[3,   800] loss: 0.184

[3,  1000] loss: 0.189

Accuracy of the network on the SVHN test images: 90.88%

[4,   200] loss: 0.109

[4,   400] loss: 0.112

[4,   600] loss: 0.115

[4,   800] loss: 0.111

[4,  1000] loss: 0.123

Accuracy of the network on the SVHN test images: 90.88%

[5,   200] loss: 0.063

[5,   400] loss: 0.059

[5,   600] loss: 0.068

[5,   800] loss: 0.073

[5,  1000] loss: 0.081

Accuracy of the network on the SVHN test images: 91.03%

[6,   200] loss: 0.039

[6,   400] loss: 0.038

[6,   600] loss: 0.036

[6,   800] loss: 0.036

[6,  1000] loss: 0.

In [32]:
# Test
svhn_accuracy = evaluate(net, testloader_svhn, device)
print(f'Accuracy of the network on the SVHN test images: {svhn_accuracy:.2f}%')

mnist_accuracy = evaluate(net, testloader_mnist, device)
print(f'Accuracy of the network on the MNIST test images: {mnist_accuracy:.2f}%')

Accuracy of the network on the SVHN test images: 91.50%


## 1.2. Enhance the generalization

### 1.2.1 Model architecture modification

### 1.2.2. Loss function

In [37]:
class LabelSmoothingCrossEntropyLoss(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropyLoss, self).__init__()
        self.smoothing = smoothing

    def forward(self, input, target):
        log_probs = F.log_softmax(input, dim=-1)

        # Compute the smoothed labels
        num_classes = input.size(-1)
        with torch.no_grad():
            true_dist = torch.zeros_like(log_probs)
            true_dist.fill_(self.smoothing / (num_classes - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), 1.0 - self.smoothing)

        # Return the average loss
        return torch.mean(torch.sum(-true_dist * log_probs, dim=-1))

In [38]:
# Setup and repeat Train
net = ResNet18()
net = net.to(device)
criterion = LabelSmoothingCrossEntropyLoss(smoothing=0.25)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

train(net, trainloader_svhn, criterion, optimizer, device, num_epochs)

In [40]:
# Test
svhn_accuracy = evaluate(net, testloader_svhn, device)
print(f'Accuracy of the network on the SVHN test images: {svhn_accuracy:.2f}%')

mnist_accuracy = evaluate(net, testloader_mnist, device)
print(f'Accuracy of the network on the MNIST test images: {mnist_accuracy:.2f}%')

Accuracy of the network on the SVHN test images: 92.11%

Accuracy of the network on the MNIST test images: 64.97%


### 1.2.3 Data agumentaiton

In [19]:
from torchvision import transforms

batch_size = 64

# Define the augmentation pipeline
augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=10),  # Rotate +/- 10 degrees
    # 92.7 , 70.2
    # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # 10% translation
    # 94.9 , 67.2
    transforms.RandomResizedCrop(size=(32, 32), scale=(0.8, 1.2)),  # Scaling between 80% and 120%
    # 95.1 , 67.9
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # Randomly change brightness, contrast, and saturation
    # 95.8 , 74.9
    # transforms.RandomPerspective(distortion_scale=0.5),  # perspective transformations
    # 95.9 , 67.0
    
    # Top gener: Color + Rotate + Size
    # 95.8 , 81.1
    # Top acc: Perspective + Color + Size
    # 96.2 , 76.4
    # Intersection: Color + Size
    # 96.4 , 72.1
    # All
    # 95.7 , 66.0
    
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Apply the augmentation pipeline only to the training data
trainloader_svhn = torch.utils.data.DataLoader(
    datasets.SVHN(root='./data', split='train', download=True, transform=augmentation_transforms),
    batch_size=batch_size, shuffle=True)

Using downloaded and verified file: ./data/train_32x32.mat


In [None]:
# Setup and repeat Train
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

train(net, trainloader_svhn, criterion, optimizer, device, num_epochs)

[1,   500] loss: 0.354
[1,  1000] loss: 0.329
[2,   500] loss: 0.301
[2,  1000] loss: 0.291
[3,   500] loss: 0.275
[3,  1000] loss: 0.274
[4,   500] loss: 0.251
[4,  1000] loss: 0.261
[5,   500] loss: 0.237
[5,  1000] loss: 0.237
[6,   500] loss: 0.230
[6,  1000] loss: 0.227


In [36]:
# Test
svhn_accuracy = evaluate(net, testloader_svhn, device)
print(f'Accuracy of the network on the SVHN test images: {svhn_accuracy:.2f}%')

mnist_accuracy = evaluate(net, testloader_mnist, device)
print(f'Accuracy of the network on the MNIST test images: {mnist_accuracy:.2f}%')

Accuracy of the network on the SVHN test images: 95.70%
Accuracy of the network on the MNIST test images: 66.05%


### 1.2.4 Transfer learning

Need to run earlier dataloaders to remove the effect of recent data agumentation

In [17]:
# Setup and repeat Train
net = models.resnet18(pretrained=True)
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

train(net, trainloader_svhn, criterion, optimizer, device, num_epochs)


In [None]:
# Test
svhn_accuracy = evaluate(net, testloader_svhn, device)
print(f'Accuracy of the network on the SVHN test images: {svhn_accuracy:.2f}%')

mnist_accuracy = evaluate(net, testloader_mnist, device)
print(f'Accuracy of the network on the MNIST test images: {mnist_accuracy:.2f}%')