In [None]:
import torch
import torchvision
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
import numpy as np

import torch.nn as nn
import torch.nn.functional as F

We will put into practice what we learnt in the first part of the Lab with a different dataset and model. This time we will use the CIFAR10 dataset and we will build a CNN.

# Dataset CIFAR10

Start by building the DataLoader class similarly to what you did last time.

In [None]:
class DataModule():

    def __init__(self, batch_size=64):
        self.batch_size = batch_size # size of the batches

    def get_dataloader(self, train):
        raise NotImplementedError

    def train_dataloader(self):
        # returns train dataloader
        return self.get_dataloader(train=True)

    def val_dataloader(self):
        # returns test dataloader
        return self.get_dataloader(train=False)

class CIFAR10(DataModule):

    def __init__(self, root, *args, **kwargs):
        super().__init__(*args, **kwargs)

        transform = transforms.Compose( [transforms.ToTensor(),
                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        # Exercise - load CIFAR


    def get_dataloader(self, train):
        data = self.train if train else self.val
        return torch.utils.data.DataLoader(data, self.batch_size, shuffle=train, num_workers = 2)

    def text_classes(self, indices):
        # Exercise return the text labels for CIFAR


    # ---------- Visualization --------------------------------------------- #
    def visualize(self, X, y, nrows=1, ncols=5):
        labels = self.text_classes(y)
        self.show_images(X.squeeze(1), nrows, ncols, titles=labels)

    def show_images(self,imgs, num_rows, num_cols, titles=None, scale=1.5):
        figsize = (num_cols * scale, num_rows * scale)
        _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
        axes = axes.flatten()
        for i, (ax, img) in enumerate(zip(axes, imgs)):
            un_img = img/2 + 0.5 # unormalized
            fixed_img = np.transpose(un_img.numpy(),(1, 2, 0))
            ax.imshow(fixed_img, cmap='Greys_r')
            ax.axes.get_xaxis().set_visible(False)
            ax.axes.get_yaxis().set_visible(False)
            if titles:
                ax.set_title(titles[i])
        return axes

In [None]:
data = CIFAR10(root='./data', batch_size=64)

Look at the samples in this dataset. They are still images, but there is a relevant difference with respect to the previous dataset

In [None]:
# Exercise - look at a batch

Now we have 3 different color channels. Plot some of the images

In [None]:
# Exercise - plot some images with data.visualize

# Create the model

Here we will create a new model, similarly to the MLP. However, we will consider different layers and naturally a different `forward` method. Our netwok will contain a convolutional base followed by a dense block.

**Convolutional base**
This should contain a sequence of Convolutional (see documentation for `torch.nn.Conv2d`) and Maxpooling layers (see documentation for `nn.MaxPool2d`). Look at the example below and observe the shape affter each transformation

In [None]:
conv1 = nn.Conv2d(3, 6, 5)
pool = nn.MaxPool2d(2, 2)

X, y = next(iter(data.train_dataloader()))
print(X.shape)

output = conv1(X)
print(output.shape)

after_relu = F.relu(output)
print(after_relu.shape)

after_pool = pool(after_relu)
print(after_pool.shape)

Your network should contain


- Convolutional layer 1: 3 input channels, 6 output channels, square convolution kernel of size 5.
- Convolutional layer 2: 6 input channels, 16 output channels, square convolution kernel of size 5.

Each convolutional layer is followed by the activation function and a Maxpooling layer as in the example above

**Dense block**

The dense block is composed of 3 fully connected layers (remember to flatten X before the first layer)
- FC1 : 120 outputs
- FC2 : 84 outputs
- FC3 :

All the dimensions that are not specified are determined by previous layers or by the dataset.

Let us look at a sequence of a Convolutional Layer and a Maxpooling layer with ReLU activation

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        # exercise
        # convolutional block
        self.conv1 =
        self.pool =
        self.conv2 =

        # Dense block layers
        self.fc1 =
        self.fc2 =
        self.fc3 =

    def forward(self, x):
        # Exercise - implement the forward method

        return x

Let us check our model

In [None]:
model = CNN()
model

# Train your model

You can now train your model just as we did for the MLP. Use the Cross entropy loss and the SGD optimizer.

In [None]:
# Exercise - create data, model, criterion and optimizer


In [None]:
# Exercise - training loop


Test your model with the validation dataset

In [None]:
# Exercise - validation dataset


# Improve your model

Try to change the architecture of the network to improve your model