In [10]:
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import utils

# Question 7 and 8

## Loading data

In [11]:
# load all the mnist data with the data loader and put in the following tensors: (x_train_data, y_train_data), (x_test_data, y_test_data)

def load_mnist_data():

    transform = transforms.Compose([transforms.ToTensor()])

    train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
    test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_data, batch_size=len(train_data))
    test_loader = DataLoader(test_data, batch_size=len(test_data))

    x_train_data, y_train_data = next(iter(train_loader))
    x_test_data, y_test_data = next(iter(test_loader))

    return x_train_data, y_train_data, x_test_data, y_test_data



In [None]:

from torch.utils.data import DataLoader, random_split

def load_mnist_data_with_augmentations():
    # Define augmentations for the training set
    train_transform = transforms.Compose([
        # transforms.RandomRotation(20),  # Randomly rotate images by ±10 degrees 
        # transforms.ToTensor(),  # Convert images to tensors
        # transforms.Normalize((0.5,), (0.5,))  # Normalize images to [-1, 1]
        transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # Rotation, translation, scaling
        transforms.RandomPerspective(distortion_scale=0.1, p=0.5),  # Perspective distortion
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),  # Add Gaussian blur
        transforms.ToTensor(),  # Convert to tensor
        transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]

    ])
    
    # Define transformations for the test set (no augmentations, just normalization)
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load datasets with the respective transformations
    train_dataset = datasets.MNIST(root='data', train=True, download=True, transform=train_transform)
    test_dataset = datasets.MNIST(root='data', train=False, download=True, transform=test_transform)

    # Split training data into training and validation sets
    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    # Create DataLoaders for training, validation, and testing
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=len(test_dataset), shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

    return train_loader, val_loader, test_loader


x_train_full,y_train_full,_,_ = load_mnist_data()
    
x_train_full = x_train_full.to("cuda")
y_train_full = y_train_full.to("cuda")



In [13]:
train_loader, val_loader, test_loader = load_mnist_data_with_augmentations()

In [14]:
#Split the training data into 50 000 training instances and 10 000 validation instances

def split_data(x_train_data, y_train_data):

    x_train_data, x_val_data = x_train_data[:50000], x_train_data[50000:]
    y_train_data, y_val_data = y_train_data[:50000], y_train_data[50000:]

    return (x_train_data, y_train_data), (x_val_data, y_val_data)

In [15]:
#(x_train, y_train), (x_val, y_val) = split_data(x_train_full, y_train_full)

## Implementig the network and training

### Utils

In [16]:

def calculate_loss_and_accuracy(model, x_data, y_data, criterion, batch):
    model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        output = model(x_data)
        loss += criterion(output, y_data).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(y_data.view_as(pred)).sum().item()
    return loss / len(x_data), correct / len(x_data)

### Defining model and training procedure

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# Define the neural network
class MNISTConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)  # 1 input channel, 16 output channels
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)  # 16 input channels, 32 output channels
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # 32 input channels, 64 output channels
        self.fc1 = nn.Linear(64 * 3 * 3, 10)  # Flattened to a fully connected layer with 10 outputs

    def forward(self, x):
        x = F.relu(self.conv1(x))  # First convolution + ReLU
        x = F.max_pool2d(x, 2)     # Max pooling 2x2
        x = F.relu(self.conv2(x))  # Second convolution + ReLU
        x = F.max_pool2d(x, 2)     # Max pooling 2x2
        x = F.relu(self.conv3(x))  # Third convolution + ReLU
        x = F.max_pool2d(x, 2)     # Max pooling 2x2
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)            # Fully connected layer
        return x



# Training loop that computes the running loss per epoch and validation loss and accuracy per epoch
def train(model, train_loader,val_loader, optimizer, criterion, epochs=10, batch_size=16):

    first_epoch_running_loss = []

    train_evaluations = np.zeros((epochs, 2))
    val_evaluations = np.zeros((epochs, 2))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    x_val, y_val = next(iter(val_loader))
    x_val, y_val = x_val.to(device), y_val.to(device)



    for epoch in range(epochs):
        for x_train,y_train in train_loader:

            x_train, y_train = x_train.to(device), y_train.to(device)
            model.train()
            #desc=f'Batches for epoch {epoch + 1}/{epochs}'


            optimizer.zero_grad()
            output = model(x_train)
            loss = criterion(output, y_train)
            loss.backward()
            optimizer.step()

            if epoch == 0:
                # get average loss and append to list
                first_epoch_running_loss.append(loss.item())


        train_loss, train_acc = calculate_loss_and_accuracy(model, x_train_full, y_train_full, criterion, batch_size)
        val_loss, val_acc = calculate_loss_and_accuracy(model, x_val, y_val, criterion, batch_size)

        train_evaluations[epoch] = [train_loss, train_acc]
        val_evaluations[epoch] = [val_loss, val_acc]
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

    return first_epoch_running_loss, train_evaluations, val_evaluations


# Training on CPU (use "cuda" for GPU training if available)


# put the data on the device






### Training

In [18]:
# Train the model

model = MNISTConvNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


first_epoch_running_loss, train_evaluations, val_evaluations = train(model, train_loader, val_loader, optimizer, criterion, epochs=3000, batch_size=64)


UnboundLocalError: cannot access local variable 'x_train_full' where it is not associated with a value

In [None]:
import torch

def test(model, test_loader, device='cpu'):
    """
    Test the model on the whole test set.

    Args:
        model: The trained PyTorch model.
        test_loader: DataLoader for the test dataset.
        device: The device to run the evaluation on (default is 'cpu').

    Returns:
        accuracy: The accuracy of the model on the test set.
    """
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient computation for testing
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Evaluate on the test set
test_accuracy = test(model, test_loader, device)

Test Accuracy: 98.85%


## Plotting evaluation measures

TODO here we plot the results stored in *first_epoch_running_loss, train_evaluations, val_evaluations*

In [None]:
utils.plot_results(first_epoch_running_loss, train_evaluations, val_evaluations)