# MNIST Classification with Pytorch

In this notebook we demonstrate how to build a classification pipeline for the MNIST problem using pytorch.

In [None]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn

from torchsummary import summary

from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [15, 10]

### Loading data

As we did in our previous lessons, we will load the images, normalize them and cast the class IDs to one-hot encoding format.

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Dataset params
num_classes = 10
size = x_train.shape[1]

# Normalization
x_train = x_train/255
x_test = x_test/255

# One-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

print('Train set:   ', len(y_train), 'samples')
print('Test set:    ', len(y_test), 'samples')
print('Sample dims: ', x_train.shape)

num_samples = len(y_train)
num_samples

### Build Classifier

Now we are going to build a classifier model using pytorch. For this purpose, we will inherit from the nn.Module object. Note that we ned to indicate pytorch how the forward pass look like (so it can also automatically compùte the backward pass).

In [None]:
inputs = Input(shape=---)
net = Dense(10, 'relu')(inputs)
branch1 = Dense(2)(net)
branch2 = Dense(2)(net)
out = add([branch1, branch2])

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.out = nn.Linear(in_features=32*7*7, out_features=10)

    def forward(self, x):
        x = self.conv1(x) # output shape (14, 14, 16)
        x = self.conv2(x) # output shape (7, 7, 32)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output

### Prepare for Training

At this stage, we need to build the model, define the loss and the optimizer to be used.

In [None]:
model = Classifier().to('cuda')
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [None]:
summary(model, (1, 28, 28))

Unlike keras, pytorch cannot automatically work with numpy arrays. We need to parse the arrays to pytorch tensors.

**Important:** By default, pytorch uses channels-first ordering!

In [None]:
x_test.shape

In [None]:
x_test = torch.from_numpy(x_test[:, np.newaxis, ...]).float().cuda()
y_test = torch.from_numpy(y_test).float().cuda()

In [None]:
x_test.shape

### Built the Pipeline

Finally, we have to define the training loops and the evaluation step.

In [None]:
num_epochs = 10
batch_size = 64


for epoch in range(0, num_epochs):
    # Set model to train mode
    model.train()

    # You might want to set some metrics to track
    loss_monitor, acc_monitor = [], []

    # Training loop, i.e., what do we do with each batch of data?
    for idx in range(0, num_samples//batch_size):
        # Select samples for current batch
        x = x_train[idx*batch_size:(idx+1)*batch_size, np.newaxis, ...]
        y = y_train[idx*batch_size:(idx+1)*batch_size, ...]
        # Convert batch to pytorch tensors
        x = torch.from_numpy(x).float().cuda()
        y = torch.from_numpy(y).float().cuda()

        # Reset optimizer
        optimizer.zero_grad()

        # Forward pass (runs batch data through the model)
        outputs = model(x)

        # Compute loss function
        loss = criterion(outputs, y)

        # Run backward pass, i.e., compute gradients of loss function with respect to model parameters
        loss.backward()

        # Apply optimizer, i.e., update network weights
        optimizer.step()

        # Update monitors (if you have any)
        loss_monitor.append(loss.item())
        acc_monitor.append(torch.sum(torch.argmax(outputs, axis=1) == torch.argmax(y, axis=1)).item()/batch_size)

    # Logs for tracking the progress
    print('Epoch ' + str(epoch) + ' | \t loss: ' + str(np.mean(loss_monitor)), '\t' + str(np.mean(acc_monitor)))

    # Validation step
    # Set model to evaluation mode (avoids computing gradients and switches off any parameter tracings)
    model.eval()
    # Get predictions from test data
    outputs = model(x_test)
    # Log performance
    print('Eval  ' + str(torch.sum(torch.argmax(outputs, axis=1) == torch.argmax(y_test, axis=1)).item()/len(y_test)))


In [None]:
print('Eval  \t\t' + str(torch.sum(torch.argmax(outputs, axis=1) == torch.argmax(y_test, axis=1)).item()/len(y_test)))

In [None]:
model.conv1[0]