In [30]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import numpy as np
import os
from copy import deepcopy
from tqdm.auto import tqdm
from mlp_pytorch import MLP
import cifar10_utils

import torch
import torch.nn as nn
import torch.optim as optim

In [31]:
################################################################################
# MIT License
#
# Copyright (c) 2024 University of Amsterdam
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to conditions.
#
# Author: Deep Learning Course (UvA) | Fall 2024
# Date Created: 2024-10-28
################################################################################
"""
This module implements a multi-layer perceptron (MLP) in PyTorch.
You should fill in code into indicated sections.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch.nn as nn
from collections import OrderedDict


class MLP(nn.Module):
    """
    This class implements a Multi-layer Perceptron in PyTorch.
    It handles the different layers and parameters of the model.
    Once initialized an MLP object can perform forward.
    """

    def __init__(self, n_inputs, n_hidden, n_classes, use_batch_norm=False):
        """
        Initializes MLP object.

        Args:
          n_inputs: number of inputs.
          n_hidden: list of ints, specifies the number of units
                    in each linear layer. If the list is empty, the MLP
                    will not have any linear layers, and the model
                    will simply perform a multinomial logistic regression.
          n_classes: number of classes of the classification problem.
                     This number is required in order to specify the
                     output dimensions of the MLP
          use_batch_norm: If True, add a Batch-Normalization layer in between
                          each Linear and ELU layer.

        TODO:
        Implement module setup of the network.
        The linear layer have to initialized according to the Kaiming initialization.
        Add the Batch-Normalization _only_ is use_batch_norm is True.

        Hint: No softmax layer is needed here. Look at the CrossEntropyLoss module for loss calculation.
        """

        #######################
        # PUT YOUR CODE HERE  #
        #######################

        # Initialize the parent class
        super(MLP, self).__init__()
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_classes = n_classes
        self.use_batch_norm = use_batch_norm

        # Initialize the layers
        self.layers = OrderedDict()

        # Add the hidden layers
        if len(n_hidden) > 0:
            for i, hidden_units in enumerate(n_hidden):
                # Linear layer with Kaiming initialization
                self.layers[f'Hidden_{i}'] = nn.Linear(n_inputs, hidden_units)
                nn.init.kaiming_normal_(self.layers[f'Hidden_{i}'].weight)
            
                # Batch normalization
                if use_batch_norm:
                    self.layers[f'BatchNorm_{i}'] = nn.BatchNorm1d(hidden_units)
            
                # Activation function
                self.layers[f'ELU_{i}'] = nn.ELU()
            
                # Update the number of inputs
                n_inputs = hidden_units
        
        # Add the output layer (no activation function)
        self.layers['Output'] = nn.Linear(n_inputs, n_classes)
        nn.init.kaiming_normal_(self.layers['Output'].weight)

        # Instantiate the model
        self.model = nn.Sequential(self.layers)
        #######################
        # END OF YOUR CODE    #
        #######################

    def __str__(self):
        """
        Prints the architecture of the network.
        """
        return str(self.model)


    def forward(self, x):
        """
        Performs forward pass of the input. Here an input tensor x is transformed through
        several layer transformations.

        Args:
          x: input to the network
        Returns:
          out: outputs of the network

        TODO:
        Implement forward pass of the network.
        """

        #######################
        # PUT YOUR CODE HERE  #
        #######################
        out = self.model(x)
        #######################
        # END OF YOUR CODE    #
        #######################

        return out

    @property
    def device(self):
        """
        Returns the device on which the model is. Can be useful in some situations.
        """
        return next(self.parameters()).device



In [61]:
mlp_torch = MLP(3072, [128], 10, use_batch_norm=False)

In [62]:
mlp_torch

MLP(
  (model): Sequential(
    (Hidden_0): Linear(in_features=3072, out_features=128, bias=True)
    (ELU_0): ELU(alpha=1.0)
    (Output): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [45]:
# Set the random seeds for reproducibility
seed = 42
data_dir = '../data'
batch_size = 128
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():  # GPU operation have separate seed
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False

# Set default device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('mps')

# Loading the dataset
cifar10 = cifar10_utils.get_cifar10(data_dir)
cifar10_loader = cifar10_utils.get_dataloader(cifar10, batch_size=batch_size,
                                              return_numpy=False)

Files already downloaded and verified
Files already downloaded and verified


In [47]:
def accuracy(predictions, targets):
    """
    Computes the prediction accuracy, i.e. the average of correct predictions
    of the network.

    Args:
      predictions: 2D float array of size [batch_size, n_classes], predictions of the model (logits)
      labels: 2D int array of size [batch_size, n_classes]
              with one-hot encoding. Ground truth labels for
              each sample in the batch
    Returns:
      accuracy: scalar float, the accuracy of predictions,
                i.e. the average correct predictions over the whole batch

    TODO:
    Implement accuracy computation.
    """

    #######################
    # PUT YOUR CODE HERE  #
    #######################
    # Compute predicted classes
    predicted_classes = np.argmax(predictions, axis=1)

    # Calculate accuracy
    accuracy = np.mean(predicted_classes == targets)
    #######################
    # END OF YOUR CODE    #
    #######################

    return accuracy


def evaluate_model(model, data_loader):
    """
    Performs the evaluation of the MLP model on a given dataset.

    Args:
      model: An instance of 'MLP', the model to evaluate.
      data_loader: The data loader of the dataset to evaluate.
    Returns:
      avg_accuracy: scalar float, the average accuracy of the model on the dataset.

    TODO:
    Implement evaluation of the MLP model on a given dataset.

    Hint: make sure to return the average accuracy of the whole dataset,
          independent of batch sizes (not all batches might be the same size).
    """

    #######################
    # PUT YOUR CODE HERE  #
    #######################
    total_correct = 0
    total_samples = 0

    for batch_data, batch_targets in data_loader:
        # Flatten the whole batch
        batch_data = batch_data.view(batch_data.size(0), -1)
        print(f"Batch data shape: {batch_data.shape}")

        # Forward pass
        predictions = model(batch_data)

        batch_accuracy = accuracy(predictions, batch_targets)
        total_correct += batch_accuracy * batch_data.shape[0]
        total_samples += batch_data.shape[0]

    # Compute average accuracy
    avg_accuracy = total_correct / total_samples
    #######################
    # END OF YOUR CODE    #
    #######################

    return avg_accuracy

In [50]:
# Set the random seeds for reproducibility
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():  # GPU operation have separate seed
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False

# Set default device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Loading the dataset
cifar10 = cifar10_utils.get_cifar10(data_dir)
cifar10_loader = cifar10_utils.get_dataloader(cifar10, batch_size=batch_size,
                                              return_numpy=False)

#######################
# PUT YOUR CODE HERE  #
#######################
n_inputs = 32 * 32 * 3
n_classes = 10
hidden_dims = [128]
use_batch_norm = True
lr = 1e-3
epochs = 1

# TODO: Initialize model and loss module
model = MLP(n_inputs, hidden_dims, n_classes, use_batch_norm).to(device)
loss_module = nn.CrossEntropyLoss()
# TODO: Training loop including validation
# TODO: Do optimization with the simple SGD optimizer
val_accuracies = []
# TODO: Test best model
test_accuracy = 0.0
# TODO: Add any information you might want to save for plotting
logging_dict = {'train_loss': [], 'val_accuracy': [], 'train_epoch_loss': []}

optimizer = optim.SGD(model.parameters(), lr=lr)
best_model = None

# Training loop
for epoch in tqdm(range(epochs)):
    model.train()
    for inputs, targets in cifar10_loader['train']:
        # Flatten data
        inputs = inputs.reshape(inputs.shape[0], -1)
        # Move data to device
        inputs, targets = inputs.to(device), targets.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(inputs)
        loss = loss_module(outputs, targets)
        logging_dict['train_loss'].append(loss.item())
        # Backward pass
        loss.backward()
        optimizer.step()

    # Evaluate model on validation set
    model.eval()

Files already downloaded and verified
Files already downloaded and verified


  0%|          | 0/1 [00:00<?, ?it/s]

In [57]:
for inputs, targets in cifar10_loader['validation']:
    # Flatten the inputs
    inputs = inputs.view(inputs.size(0), -1)
    # Move the inputs and targets to the device
    print(inputs.shape, targets.shape)
    predictions = model(inputs)
    print(predictions.shape)
    break

torch.Size([128, 3072]) torch.Size([128])
torch.Size([128, 10])


In [71]:
np.mean(np.argmax(predictions.detach().numpy(), axis=1) == targets.numpy())

0.2890625

In [68]:
targets

tensor([7, 9, 1, 5, 7, 9, 2, 1, 8, 6, 9, 8, 6, 7, 5, 8, 5, 8, 9, 0, 0, 6, 6, 1,
        4, 9, 3, 9, 5, 9, 8, 2, 5, 5, 1, 6, 5, 0, 5, 2, 2, 1, 1, 0, 1, 1, 2, 5,
        4, 9, 7, 3, 4, 4, 7, 4, 9, 4, 3, 1, 7, 3, 7, 8, 0, 4, 4, 6, 3, 4, 1, 2,
        9, 4, 1, 2, 4, 1, 4, 0, 2, 3, 0, 0, 2, 3, 0, 9, 3, 3, 4, 6, 2, 3, 5, 0,
        6, 3, 0, 7, 7, 4, 5, 9, 5, 5, 1, 8, 1, 3, 9, 0, 7, 7, 6, 5, 2, 2, 1, 1,
        9, 1, 2, 6, 4, 6, 9, 2])