In [13]:
import torch
from torchvision import datasets
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision import transforms

In [14]:
# Transform to convert images to PyTorch Tensors and normalize them
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


train_all = datasets.MNIST ('../ data', train =True , download = True, transform=transform) # 60K images
test_data = datasets.MNIST ('../ data', train = False, transform=transform) # test : 10K

train_data, val_data = torch.utils.data.random_split (train_all, [50000, 10000], torch.Generator().manual_seed (0)) # train : 50K ; val : 10K

In [15]:
# epochs
num_epochs = 20

# Data Loader  
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(train_data, batch_size=64, shuffle=True)

In [16]:
'''
Question 1:
    build a 10-class softmax classifier on the images. Train the classifier via storchastic gradient 
    descent, and report test accuracy.
'''

# Define the model
class SoftmaxClassifier(nn.Module):
    def __init__(self):
        super(SoftmaxClassifier, self).__init__()
        self.linear = nn.Linear(784, 10)
    
    def forward(self, x):
        x = x.view(-1, 784)
        x = self.linear(x)
        return x


# Model, Loss, Optimizer
model = SoftmaxClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training Loop
for epoch in range(num_epochs):
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
# Testing the model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}%')


Accuracy: 92.196%


In [17]:
'''
Question 2:
 Insert one hidden layer with 1024 hidden units before the softmax classifier. And use ReLU as
the activation function at the hidden layer. Train and report test accuracy.
'''

# Define the model
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(784, 1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )
        
    
    def forward(self, x):
        x = x.view(-1, 784)
        x = self.linear_relu_stack(x)
        return x


# Model, Loss, Optimizer
model = NeuralNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training Loop
for epoch in range(num_epochs):
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
# Testing the model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}%')

Accuracy: 96.942%


In [None]:
'''
Question 3:
    Let us count the number of learnable parameters in the above model:
        • input-to-hidden-layer weight matrix: 282 × 1024
        • input-to-hidden-layer bias: 1024
        • softmax classifer weight matrix: 1024 × 10
        • softmax classifier bias: 10
    So the total number of learnable parameter is
    282 × 1024 + 1024 + 1024 × 10 + 10 = 814, 090.
    Now, instead of inserting one hidden layer, we insert L(L ≥ 2) hidden layers, each with equal number
    of hidden units. We keep the total learnable parameters at 814, 090. Derive the number of hidden
    units per layer. Express it as a function of L.
'''

'''
Answer for Question 3:
    784H + LH + H^2 * (L−1) + H* 10 + 10 = 814,090
    H = (-(794 + L) + sqrt(L^2 + 3257908 * L - 2625884)) / (2(L - 1)), 
    when L == 1, the number of hidden units is 1024.

'''

In [25]:
'''
Question 4:
    Train a MLP model with architecture defined in problem 3, where L = 2, 3, . . . , 8. Get
    test accuracy for each L. Note that in problem 2, we already get the accuracy when L = 1. Plot the
    accuracy against L, where L = 1, 2, 3, . . . , 8.
'''

import math

def calculate_hidden_units(L):
    if L == 1:
        return 1024
    numerator = -L + math.sqrt(L ** 2 + 3257908 * L - 2625884) - 794
    denominator = 2 * (L - 1)
    return int(round(numerator / denominator))

class DynamicHiddenLayerNet(nn.Module):
    def __init__(self, input_size, output_size, num_layers):
        super(DynamicHiddenLayerNet, self).__init__()
        hidden_units = calculate_hidden_units(num_layers)
        
        layers = [nn.Linear(input_size, hidden_units), nn.ReLU()]
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_units, hidden_units))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_units, output_size))
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.layers(x.view(-1, 784)) 
    

for num_layers in range(1, 9):
    model = DynamicHiddenLayerNet(input_size=784, output_size=10, num_layers=num_layers)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training Loop
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
    # Testing the model
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
    accuracy = 100 * correct / total
    print(f'The number of layers is: {num_layers}, Accuracy: {accuracy}%')

The number of layers is: 1, Accuracy: 96.968%
The number of layers is: 2, Accuracy: 97.44%
The number of layers is: 3, Accuracy: 98.048%
The number of layers is: 4, Accuracy: 98.388%
The number of layers is: 5, Accuracy: 97.904%
The number of layers is: 6, Accuracy: 97.79%
The number of layers is: 7, Accuracy: 94.184%
The number of layers is: 8, Accuracy: 94.34%
