In [16]:
import torch
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

<h2> Here we import the Fashion MNIST data</h2>
<p>We don't really care too much for the type of data since the goal is just to code a Residual Neural Network</p>
<p>Furthermore, the architure is not as large or deep as it could be but this was done on purpose. To train the model quickly we wanted to be able to run it on our GPU. If the architecture was too large, this would not have been possible.</p>

In [3]:
training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=False,
    transform=transforms.ToTensor()
)

test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=False,
    transform=transforms.ToTensor()
)

# We use a training batch of 64 and a test batches of 1
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data)

<h2>Here we build the residual block that will be used in our model</h2>

In [23]:
class ResidualBlock(nn.Module):
    def __init__(self, in_dimension, out_dimension):
        super(ResNet2, self).__init__()
        
        self.conv1 = nn.Conv2d(
            in_channels = in_dimension, 
            out_channels = out_dimension,
            kernel_size = 3,
            padding='same'
        )
        self.conv2 = nn.Conv2d(
            in_channels = in_dimension, 
            out_channels = out_dimension,
            kernel_size = 3,
            padding='same'
        )
    def forward(self, X):
        
        residual = X
        
        # Perform first conv block
        X = self.conv1(X)
        X = F.relu(X)
        
        # Perform second conv block
        X = self.conv2(X)
        # Add skip connection before activation
        X = F.relu(X+residual)
        
        return X
        
        

<h3>We don't make the model too deep or use larger kernels to keep memory consumption low; this allows us to throw our model onto our GPU and make computations much more quickly.</h3>
<p> The model is gives as follow:</p>
<ol>
    <li>convolutional operation, zero padding</li>
    <li>residual block (same padding)</li>
    <li>max pool (kernel 2, stride 2)</li>
    <li>flatten into vector</li>
    <li>fully connected layer (relu activation)</li>
    <li>fully connected layer and softmax</li>
</ol>
  

In [11]:
model2 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3), # dim = 10, 26, 26 
    ResidualBlock(in_dimension=10, out_dimension=10), # dim = 10, 26, 26 
    nn.MaxPool2d(kernel_size=2, stride=2), # dim = 10, 13, 13 
    nn.Flatten(), # dim = 10 x 13 x 13 
    nn.Linear(10*13*13, 10*13*13),
    nn.ReLU(),
    nn.Linear(10*13*13, 10),
    nn.Softmax()
)

In [12]:
device = torch.device('cuda')

In [13]:
model2.to(device)

Sequential(
  (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (1): ResNet2(
    (conv1): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (conv2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=same)
  )
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Flatten(start_dim=1, end_dim=-1)
  (4): Linear(in_features=1690, out_features=1690, bias=True)
  (5): ReLU()
  (6): Linear(in_features=1690, out_features=10, bias=True)
  (7): Softmax(dim=None)
)

In [14]:
loss_function = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-4)

In [15]:
j = 0
for j in range(10):
    for i, data in enumerate(train_dataloader):
        X,y = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        y_pred = model2.forward(X)
        loss = loss_function(y_pred, y)

        loss.backward()
        optimizer.step()
    print(f'Epoch {j}: loss {loss}')

Epoch 0: loss 1.6450124979019165
Epoch 1: loss 1.6301871538162231
Epoch 2: loss 1.652033805847168
Epoch 3: loss 1.6457017660140991
Epoch 4: loss 1.6288050413131714
Epoch 5: loss 1.6033552885055542
Epoch 6: loss 1.575186848640442
Epoch 7: loss 1.5828412771224976
Epoch 8: loss 1.581514835357666
Epoch 9: loss 1.578014850616455


In [22]:
for i, data in enumerate(test_dataloader):
    X, y = data
    X = X.to(device)
    y_pred = model2.forward(X)
    print(f"Prediction: {torch.argmax(y_pred).item()}, Actual: {y.item()}")
    if i == 5:
        break

Prediction: 9, Actual: 9
Prediction: 2, Actual: 2
Prediction: 1, Actual: 1
Prediction: 1, Actual: 1
Prediction: 6, Actual: 6
Prediction: 1, Actual: 1
