In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np

We will finally put all the pieces together and train our MLP to classify the FashionMNIST dataset.

# Previously

Remember that we already have our data and model classes ready from previous notebooks. Let us repeat the code here for convenience. If you have completed the previous notebooks you should be very familiar with this code

### Data Module

In [None]:
import torchvision
from torchvision import datasets, transforms

class DataModule():

    def __init__(self, batch_size=64):
        self.batch_size = batch_size # size of the batches

    def get_dataloader(self, train):
        raise NotImplementedError

    def train_dataloader(self):
        # returns train dataloader
        return self.get_dataloader(train=True)

    def val_dataloader(self):
        # returns test dataloader
        return self.get_dataloader(train=False)

class FashionMNIST(DataModule):

    def __init__(self, root, *args, **kwargs):
        super().__init__(*args, **kwargs)

        transform = transforms.Compose([transforms.ToTensor(),
                                        transforms.Normalize((0.5,), (0.5,))])

        self.train = datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
        self.val = datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    def get_dataloader(self, train):
        data = self.train if train else self.val
        return torch.utils.data.DataLoader(data, self.batch_size, shuffle=train)

    def text_classes(self, indices):
        label_list = ['t-shirt/top', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
        return [label_list[int(i)] for i in indices]

    def visualize(self, X, y, nrows=1, ncols=5):
        labels = self.text_classes(y)
        self.show_images(X.squeeze(1), nrows, ncols, titles=labels)

    def show_images(self, imgs, num_rows, num_cols, titles, scale=1.5):
        figsize = (num_cols * scale, num_rows * scale)
        _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
        axes = axes.flatten()
        for i, (ax, img) in enumerate(zip(axes, imgs)):
            img = img.squeeze().numpy()
            ax.imshow(img, cmap='Greys_r')
            ax.axes.get_xaxis().set_visible(False)
            ax.axes.get_yaxis().set_visible(False)
            ax.set_title(titles[i])
        return axes

### MLP from Scratch

In [None]:
from torch import nn
import torch.nn.functional as F

class MLPScratch(nn.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr=0.01, sigma=0.01):
        super().__init__()
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.num_hiddens = num_hiddens
        self.lr = lr

        self.W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens) * sigma)
        self.b1 = nn.Parameter(torch.zeros(num_hiddens))
        self.W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs) * sigma)
        self.b2 = nn.Parameter(torch.zeros(num_outputs))

    def forward(self, X):
        H = F.relu(torch.matmul(X, self.W1) + self.b1)
        return torch.matmul(H, self.W2) + self.b2

# Start Here

Now we want to create our `data` and `model` objects from our classes `FashionMNIST` and `MLPScratch`. Before we start we just need to pay attention to a small detail. Our input data consists of images of 28Ã—28 that cannot be fed as an input to our MLP. We will ignore the spatial structure in our data and just consider each pixel as a feature. So, make sure to set the number of inputs of our MLP accordingly. For the number of hidden units use 256.

Additionally create an `optimizer` with `torch.optim.SGD` with learning rate 0.1. As `criterion` use the `CrossEntropyLoss()`.

In [None]:
# Exercise
data =
model =
optimizer =
criterion =

Make sure you are able to run the following cell. Note that we need to reshape our input before calling `forward` (make sure you understand why)

In [None]:
X,y = next(iter(data.train_dataloader()))
output = model(X.reshape(-1,model.num_inputs))

# Train

Now it is time to create our training loop. You have already done each step separately, now it is time to put them together. Make sure your loss is decreasing at each epoch

In [None]:
# Exercise - complete the code with the training loop.
# You only need one line of code after each comment

max_epochs = 5 # number of epochs

# Loop over the epochs
for

    running_loss = 0

    # Loop over the train dataloader
    for

        # Forward


        # Compute the loss with criterion


        # Remember to zero the gradients


        # Backward


        # Optimizer step

        # update the loss
        running_loss += loss.item() # update the loss

    print(f"Finished epoch {epoch+1} : current training loss is {running_loss/len(data.train_dataloader())}")


# Prediction

Let us inspect a bit closer the predictions for a batch of our model. We will take the validation dataset and do a forward pass to get the values of predictions

In [None]:
# Exercise take one batch of the validation dataset and get the predicted classes


Plot the predictions and true data for some images of this batch (remember that you can use `data.visualize`)

In [None]:
# exercise plot true and predicted y

Complete the following code to get the accuracy on the validation dataset

In [None]:
correct_samples = 0

for X, y in data.val_dataloader():
    with torch.no_grad():
        # Get the predictions
        preds =

        # Update number of correct_samples
        correct_samples +=

acc = correct_samples/len(data.val_dataloader().dataset)
print(f'Accuracy in validation dataset {acc}')

# MLP using PyTorch

As you may imagine, we do not wish to specify weights and bias for each layer of our network from now on. The implementation of our MLP was only done for didatic purposes. A more concise implementation could be done using [`nn.Linear`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html). You just need to create two fully connected layers with the correct number of inputs and outputs. In the forward method, you will directly call those layers with manually computing from weights and bias.

In [None]:
from torch import nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr=0.01, sigma=0.01):
        super().__init__()
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.num_hiddens = num_hiddens
        self.lr = lr

        # exercise - create the two fully connected layers
        self.fc1 =
        self.fc2 =

    def forward(self, X):
      # Exercise - implement the forward method


You can repeat the training to make sure your class is working