<span style="font-size: 18px;">Question 1</span>

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

In [9]:
torch.manual_seed(42)

# Data loading and preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

<span style="font-size: 18px;">Question 2</span>

In [4]:
# Define different MLP architectures
class MLP1(nn.Module):
    def __init__(self, activation=nn.ReLU()):
        super(MLP1, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x

class MLP2(nn.Module):
    def __init__(self, activation=nn.ReLU()):
        super(MLP2, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 10)
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

class MLP3(nn.Module):
    def __init__(self, activation=nn.ReLU()):
        super(MLP3, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x

class MLP4(nn.Module):
    def __init__(self, activation=nn.ReLU()):
        super(MLP4, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 10)
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

class MLP5(nn.Module):
    def __init__(self, activation=nn.ReLU()):
        super(MLP5, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 10)
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.activation(self.fc4(x))
        x = self.fc5(x)
        return x

In [5]:
# Training function
def train_model(model, epochs=10, learning_rate=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    train_losses = []
    train_accuracies = []
    test_accuracies = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss = running_loss / len(trainloader)
        train_accuracy = 100 * correct / total
        test_accuracy = evaluate_model(model, testloader, device)
        
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'Training Loss: {train_loss:.4f}')
        print(f'Training Accuracy: {train_accuracy:.2f}%')
        print(f'Test Accuracy: {test_accuracy:.2f}%')
        print('--------------------')
    
    return train_losses, train_accuracies, test_accuracies

In [6]:
# Evaluation function
def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return 100 * correct / total

In [7]:
# Train different MLP architectures
mlp_architectures = [
    ("MLP1 (3 layers)", MLP1()),
    ("MLP2 (4 layers)", MLP2()),
    ("MLP3 (3 layers, smaller)", MLP3()),
    ("MLP4 (2 layers)", MLP4()),
    ("MLP5 (5 layers)", MLP5())
]

results = {}
for name, model in mlp_architectures:
    print(f"\nTraining {name}")
    train_losses, train_accuracies, test_accuracies = train_model(model)
    results[name] = {
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'test_accuracies': test_accuracies
    }


Training MLP1 (3 layers)
Epoch 1/10:
Training Loss: 0.4813
Training Accuracy: 82.32%
Test Accuracy: 84.22%
--------------------
Epoch 2/10:
Training Loss: 0.3682
Training Accuracy: 86.44%
Test Accuracy: 86.09%
--------------------
Epoch 3/10:
Training Loss: 0.3275
Training Accuracy: 87.83%
Test Accuracy: 86.56%
--------------------
Epoch 4/10:
Training Loss: 0.3015
Training Accuracy: 88.78%
Test Accuracy: 87.14%
--------------------
Epoch 5/10:
Training Loss: 0.2819
Training Accuracy: 89.39%
Test Accuracy: 87.87%
--------------------
Epoch 6/10:
Training Loss: 0.2650
Training Accuracy: 90.09%
Test Accuracy: 87.14%
--------------------
Epoch 7/10:
Training Loss: 0.2502
Training Accuracy: 90.52%
Test Accuracy: 87.63%
--------------------
Epoch 8/10:
Training Loss: 0.2387
Training Accuracy: 91.03%
Test Accuracy: 87.42%
--------------------
Epoch 9/10:
Training Loss: 0.2279
Training Accuracy: 91.40%
Test Accuracy: 88.50%
--------------------
Epoch 10/10:
Training Loss: 0.2171
Training Acc

In [9]:
#Test different activation functions on the best performing architecture
best_architecture = MLP4
activation_functions = [
    ("ReLU", nn.ReLU()),
    ("Tanh", nn.Tanh()),
    ("Sigmoid", nn.Sigmoid())
]

activation_results = {}
for name, activation in activation_functions:
    print(f"\nTraining best architecture with {name} activation")
    model = best_architecture(activation=activation)
    train_losses, train_accuracies, test_accuracies = train_model(model)
    activation_results[name] = {
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'test_accuracies': test_accuracies
    }


Training best architecture with ReLU activation
Epoch 1/10:
Training Loss: 0.4781
Training Accuracy: 82.44%
Test Accuracy: 83.24%
--------------------
Epoch 2/10:
Training Loss: 0.3670
Training Accuracy: 86.66%
Test Accuracy: 85.62%
--------------------
Epoch 3/10:
Training Loss: 0.3312
Training Accuracy: 87.77%
Test Accuracy: 85.92%
--------------------
Epoch 4/10:
Training Loss: 0.3056
Training Accuracy: 88.68%
Test Accuracy: 84.24%
--------------------
Epoch 5/10:
Training Loss: 0.2887
Training Accuracy: 89.22%
Test Accuracy: 87.69%
--------------------
Epoch 6/10:
Training Loss: 0.2710
Training Accuracy: 89.81%
Test Accuracy: 86.83%
--------------------
Epoch 7/10:
Training Loss: 0.2569
Training Accuracy: 90.44%
Test Accuracy: 87.66%
--------------------
Epoch 8/10:
Training Loss: 0.2469
Training Accuracy: 90.73%
Test Accuracy: 87.95%
--------------------
Epoch 9/10:
Training Loss: 0.2378
Training Accuracy: 91.14%
Test Accuracy: 88.06%
--------------------
Epoch 10/10:
Training Lo

In [None]:
# Plotting functions for visualization
def plot_results(results, title):
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    for name, data in results.items():
        plt.plot(data['train_accuracies'], label=f"{name} (Train)")
        plt.plot(data['test_accuracies'], label=f"{name} (Test)")
    plt.title('Training and Test Accuracies')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    for name, data in results.items():
        plt.plot(data['train_losses'], label=name)
    plt.title('Training Losses')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

# Plot results
plot_results(results, "Comparison of Different MLP Architectures")
plot_results(activation_results, "Comparison of Different Activation Functions")

<span style="font-size: 18px;">Question 3</span>

In [10]:
# Base CNN architecture
class BaseCNN(nn.Module):
    def __init__(self, dropout=False):
        super(BaseCNN, self).__init__()
        # First convolutional layer with 32 3x3 filters a
        self.conv1 = nn.Conv2d(1, 32, 3)
        # MaxPool layer with 2x2 window b
        self.pool = nn.MaxPool2d(2, 2)
        # Calculate size after conv and poolc
        self.dropout = dropout
        if dropout:
            self.dropout_layer = nn.Dropout(0.5)
        # Dense layers d
        self.fc1 = nn.Linear(32 * 13 * 13, 100)
        self.fc2 = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 13 * 13)
        if self.dropout:
            x = self.dropout_layer(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
# Enhanced CNN with two conv layers
class EnhancedCNN(nn.Module):
    def __init__(self, dropout=True):
        super(EnhancedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = dropout
        if dropout:
            self.dropout_layer = nn.Dropout(0.5)
        # Adjusted size for two conv layers
        self.fc1 = nn.Linear(64 * 5 * 5, 100)
        self.fc2 = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 5 * 5)
        if self.dropout:
            x = self.dropout_layer(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [11]:
def train_model(model, trainloader, testloader, epochs, lr=0.01, momentum=0.9):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    
    train_accuracies = []
    test_accuracies = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        train_accuracy = 100 * correct / total
        test_accuracy = evaluate_model(model, testloader, device)
        
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs}:')
            print(f'Training Accuracy: {train_accuracy:.2f}%')
            print(f'Test Accuracy: {test_accuracy:.2f}%')
            print('--------------------')
    
    return train_accuracies, test_accuracies

In [12]:
def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return 100 * correct / total

In [9]:
def plot_accuracies(train_acc, test_acc, title):
    plt.figure(figsize=(10, 6))
    epochs = len(train_acc)
    x = list(range(1, epochs + 1))
    plt.plot(x, train_acc, label='Training Accuracy')
    plt.plot(x, test_acc, label='Test Accuracy')
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.grid(True)
    plt.show()

In [12]:
# Baseline CNN - 10 epochs
print("Training Baseline CNN for 10 epochs:")
model = BaseCNN()
train_acc, test_acc = train_model(model, trainloader, testloader, epochs=10)

Training Baseline CNN for 10 epochs:
Epoch 10/10:
Training Accuracy: 96.50%
Test Accuracy: 91.46%
--------------------


In [13]:
# CNN with Dropout - 50 epochs
print("\nTraining CNN with Dropout for 50 epochs:")
model_dropout = BaseCNN(dropout=True)
train_acc_dropout, test_acc_dropout = train_model(model_dropout, trainloader, testloader, epochs=50)


Training CNN with Dropout for 50 epochs:
Epoch 10/50:
Training Accuracy: 92.57%
Test Accuracy: 90.73%
--------------------
Epoch 20/50:
Training Accuracy: 94.36%
Test Accuracy: 90.86%
--------------------
Epoch 30/50:
Training Accuracy: 95.47%
Test Accuracy: 91.47%
--------------------
Epoch 40/50:
Training Accuracy: 96.19%
Test Accuracy: 91.28%
--------------------
Epoch 50/50:
Training Accuracy: 96.70%
Test Accuracy: 91.55%
--------------------


In [1]:
plot_accuracies(train_acc_dropout, test_acc_dropout, 'CNN with Dropout - Training vs Test Accuracy')

NameError: name 'plot_accuracies' is not defined