In [None]:
!pip install torch
!pip install torchvision
!pip install matplotlib
!pip install numpy
!pip install torchsummary



In [12]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.nn import functional as F

In [13]:
#Load and preprocess the Fashion-MNIST dataset from torchvision rather than using the file on canvas (same dataset)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

#Splitting the data into train and test splits
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

#Load data
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)


In [14]:
data_iter = iter(train_loader)
images, labels = next(data_iter)

# Print the shape of the images
print(f"Image batch shape: {images.shape}")

Image batch shape: torch.Size([64, 1, 28, 28])


In [15]:
#Define the LeNet-5 architecture based upon LeCun et al., 1998
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(-1, 16 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#Checking if a GPU with Cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


#Initialize the model, loss function, and optimizer
model = LeNet5().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
#Define the LeNet-5 architecture based upon LeCun et al., 1998 with dropout and batch normalization
class LeNet5Regularized(nn.Module):
    def __init__(self, dropout_rate=0.5, use_bn=False):
        super(LeNet5Regularized, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.dropout = nn.Dropout(dropout_rate)
        self.use_bn = use_bn
        if use_bn:
            self.bn1 = nn.BatchNorm2d(6)
            self.bn2 = nn.BatchNorm2d(16)

#1. Test using bn before relu and only doing dropout after first fully connected layer
#2. Test using bn after relu and dropout after second fully connected layer as well
    def forward(self, x):
        x = self.conv1(x)
        if self.use_bn:
            x = self.bn1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        if self.use_bn:
            x = self.bn2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [17]:
# Training and evaluation function for all epochs
def train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, num_epochs, device):
    train_accuracies = []
    test_accuracies = []
    best_test_accuracy = 0.0
    best_epoch = 0

    # Loop over epochs
    for epoch in range(num_epochs):
        model.train()
        train_correct = 0
        train_total = 0

        # Go through all training data
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        # Calculate train accuracy for epoch
        train_accuracy = 100 * train_correct / train_total
        train_accuracies.append(train_accuracy)

        # Evaluate
        model.eval()
        test_correct = 0
        test_total = 0

        # Go through all test data
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        # Calculate test accuracy for epoch
        test_accuracy = 100 * test_correct / test_total
        test_accuracies.append(test_accuracy)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')

        # Check if this is the best accuracy so far
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            best_epoch = epoch + 1

    return train_accuracies, test_accuracies, best_test_accuracy, best_epoch

In [18]:
# Hyperparameter definitions
num_epochs = 20
learning_rate = 0.001
weight_decay = 1e-4
dropout_rate = 0.5

# Directory to save the best model weights
save_dir = './saved_models/'
os.makedirs(save_dir, exist_ok=True)

# Train and evaluate different models
models = {
    'Base Model': LeNet5().to(device),
    'Dropout': LeNet5Regularized(dropout_rate=dropout_rate).to(device),
    'Weight Decay': LeNet5().to(device),
    'Batch Normalization': LeNet5Regularized(dropout_rate=0, use_bn=True).to(device)
}

results = {}

# Train and save best weights
for name, model in models.items():
    print(f"\nTraining {name} model:")

    if name == 'Weight Decay':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Subdirectory for each model's weights
    model_save_dir = os.path.join(save_dir, name.lower().replace(" ", "_"))
    os.makedirs(model_save_dir, exist_ok=True)
    best_model_path = os.path.join(model_save_dir, 'best_model.pth')

    # Train the model and get accuracies and the best epoch
    train_accuracies, test_accuracies, best_test_accuracy, best_epoch = train_and_evaluate(
        model, train_loader, test_loader, optimizer, criterion, num_epochs, device
    )

    # Save the best model
    torch.save(model.state_dict(), best_model_path)
    print(f"Best model for {name} saved with test accuracy: {best_test_accuracy:.2f}% at epoch {best_epoch}")

    # Store results for plotting and summarizing
    results[name] = {
        'train': train_accuracies,
        'test': test_accuracies,
        'best_test_accuracy': best_test_accuracy,
        'best_epoch': best_epoch
    }


# Plotting based on format defined in assignment
plt.figure(figsize=(20, 15))
for i, (name, accuracies) in enumerate(results.items()):
    plt.subplot(2, 2, i+1)
    plt.plot(range(1, num_epochs+1), accuracies['train'], label='Train')
    plt.plot(range(1, num_epochs+1), accuracies['test'], label='Test')
    plt.title(f'{name}')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()

# Saving all charts to a file
plt.tight_layout()
plt.savefig('convergence_graphs-2.png')
plt.close()

# Create a summary table for the losses both train and test
summary_table = {
    'Model': [],
    'Final Train Accuracy (%)': [],
    'Final Test Accuracy (%)': [],
    'Best Epoch': []
}

for name, accuracies in results.items():
    summary_table['Model'].append(name)
    summary_table['Final Train Accuracy (%)'].append(f"{accuracies['train'][-1]:.2f}")
    summary_table['Final Test Accuracy (%)'].append(f"{accuracies['test'][-1]:.2f}")
    summary_table['Best Epoch'].append(accuracies['best_epoch'])

print("\nSummary Table:")
print(f"{'Model':<20} {'Final Train Accuracy (%)':<25} {'Final Test Accuracy (%)':<25} {'Best Epoch':<15}")
print("-" * 85)
for i in range(len(summary_table['Model'])):
    print(f"{summary_table['Model'][i]:<20} {summary_table['Final Train Accuracy (%)'][i]:<25} {summary_table['Final Test Accuracy (%)'][i]:<25} {summary_table['Best Epoch'][i]:<15}")



Training Base Model model:
Epoch [1/20], Train Accuracy: 78.30%, Test Accuracy: 83.00%
Epoch [2/20], Train Accuracy: 86.68%, Test Accuracy: 85.83%
Epoch [3/20], Train Accuracy: 88.61%, Test Accuracy: 87.24%
Epoch [4/20], Train Accuracy: 89.48%, Test Accuracy: 88.74%
Epoch [5/20], Train Accuracy: 90.36%, Test Accuracy: 88.86%
Epoch [6/20], Train Accuracy: 90.77%, Test Accuracy: 89.69%
Epoch [7/20], Train Accuracy: 91.38%, Test Accuracy: 89.83%
Epoch [8/20], Train Accuracy: 91.76%, Test Accuracy: 90.22%
Epoch [9/20], Train Accuracy: 92.24%, Test Accuracy: 90.27%
Epoch [10/20], Train Accuracy: 92.66%, Test Accuracy: 90.16%
Epoch [11/20], Train Accuracy: 93.00%, Test Accuracy: 90.23%
Epoch [12/20], Train Accuracy: 93.40%, Test Accuracy: 90.69%
Epoch [13/20], Train Accuracy: 93.72%, Test Accuracy: 90.54%
Epoch [14/20], Train Accuracy: 94.01%, Test Accuracy: 90.06%
Epoch [15/20], Train Accuracy: 94.28%, Test Accuracy: 90.48%
Epoch [16/20], Train Accuracy: 94.60%, Test Accuracy: 90.58%
Epoch

In [None]:
from google.colab import files
!zip -r /content/saved_models.zip /content/saved_models
files.download('saved_models.zip')