In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Set working directory equal to location of current notebook file
os.getcwd()
working_directory = os.path.dirname(os.path.abspath(__vsc_ipynb_file__))
os.chdir(working_directory)
os.getcwd()

'/home/rory/Dev/AIML/practice/deep-learning-with-pytorch'

In [3]:
# Convert mnist image files into tensor of 4 dimensions (# of images, height, width, and color channel)
transform = transforms.ToTensor()

In [4]:
# Get train data
train_data = datasets.MNIST(root=rf'./input/cnn_data', train=True, download=True, transform=transform)

In [5]:
# Get test data
test_data = datasets.MNIST(root=rf'./input/cnn_data', train=False, download=True, transform=transform)

In [6]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./input/cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [7]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./input/cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [8]:
# Create a small batch size for images ... let's say 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(train_data, batch_size=10, shuffle=False)

In [9]:
# Define our CNN Model
# Describe convolutional layer and what it's doing (2 conv layers)
# This is just an example
conv1 = nn.Conv2d(1, 6, 3, 1)
conv2 = nn.Conv2d(6, 16, 3, 1)

In [10]:
# Grab 1 MNIST record/image
for i, (X_train, y_train) in enumerate(train_data):
    break

In [12]:
X_train.shape

torch.Size([1, 28, 28])

In [13]:
x = X_train.view(1,1,28,28)

In [14]:
# Perform our first convolution
x = F.relu(conv1(x)) # Rectified linear unit for our activation function

In [15]:
x

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.2096, 0.2096, 0.2096,  ..., 0.2096, 0.2096, 0.2096],
          [0.2096, 0.2096, 0.2096,  ..., 0.2096, 0.2096, 0.2096],
          [0.2096, 0.2096, 0.2096,  ..., 0.2096, 0.2096, 0.2096],
          ...,
          [0.2096, 0.2096, 0.2652,  ..., 0.2096, 0.2096, 0.2096],
          [0.2096, 0.2096, 0.0729,  ..., 0.2096, 0.2096, 0.2096],
          [0.2096, 0.2096, 0.2096,  ..., 0.2096, 0.2096, 0.2096]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0

In [17]:
# 1 single image, 6 is the filters we asked for, 26x26 is now the image because padding wasn't used and 2 were dropped around the edges
x.shape

torch.Size([1, 6, 26, 26])

In [18]:
# Pass through the pooling layer
x = F.max_pool2d(x,2,2) # Kernel of 2 and stride of 2

In [19]:
x.shape # 25 / 2 = 13

torch.Size([1, 6, 13, 13])

In [20]:
# Do our second convolutional layer
x = F.relu(conv2(x))

In [21]:
x.shape # Again, we didn't set padding and we lose 2 pixels around the outside

torch.Size([1, 16, 11, 11])

In [22]:
# Pooling layer
x = F.max_pool2d(x,2,2)

In [23]:
x.shape # 11 / 2 = 5.5 but we have to round down, because we can't invent data to round up

torch.Size([1, 16, 5, 5])

In [24]:
((28-2) / 2 - 2) / 2

5.5