In [2]:
# the kernel depth must be same as the receptive depth and input depth cos
# we do an element wise multiplication on the matrix followed by a summation
# yielding a single number (feature map)
# sliding/convolving the kernel pnce across the image produces one feature map
# continuing producing as many feture maps as possible
# more kernels = more feature map channels

# Pooling/Downsampling simply reduces the feature map size to what we like
# Max pooling is taking the maximumm number out of the feature map
# Average pooling is taking the average from the feature map and downsampled number becomes the average

# valid padding - Zero padding
# Every time you do not pad your image, you will always have a small output size
# Same padding - non-zero padding; done to make sure our output size is same as input size
# by padding a bunch of zeros around

# how to calculate the matrix size we want for padding:
# O = [(W-K+2P)/S] + 1
# O: output height/length, W: input height/length
# K: filter size (kernel size), P: padding, S: stride
# P = (k-1)/2


In [3]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

#Loading dataset
train_dataset =dsets.MNIST(root='./data',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)
test_dataset = dsets.MNIST(root='./data',
                          train=False,
                          transform=transforms.ToTensor())

In [4]:
print(train_dataset.train_data.size())



torch.Size([60000, 28, 28])


In [5]:
print(train_dataset.train_labels.size())



torch.Size([60000])


In [6]:
print(test_dataset.test_data.size())



torch.Size([10000, 28, 28])


In [7]:
print(test_dataset.test_labels.size())



torch.Size([10000])


In [8]:
# Make dataset iterable
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

In [9]:
# We  need to calculate our expected output given an input size on 28;
# from train and test dataset and we are using 1 stride

# here, we are using kernel size =5; for convolution 
# non zero padding P=(k-1)/2
# P = (5-1)/2 = 2
# stride = 1
# remember: O = [(W-K+2P)/S] + 1; for convolution
# O = W/K; for pooling...here we can choose filter size of 2; k =2

# so for the 1st convolution; O = [(28-5+2(2))/1] + 1 = 28
# so for 1st max pooling: O = W/K = 28/2 = 14

# convolution 2; O = [(14-5+2(2))/1] + 1 = 14
# max pooling 2; O = W/K = 14/2 = 7

In [10]:
# Building model class
# in_channels = 1 because we are using MNIST image; which is just a single grayscale color
# out_channels = 16; the number of kernels we choose; one unique kernel produces
# one feature map, so here we have 16 feature map

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU() # after every convoluntional layer, we need to pass through a non linearity & ReLU is the best
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)  # to reduce the image size
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)  # 16 feature maps fed into the in_channel from conv1
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 7 * 7, 10)   # ouput dimension is always 10; input_dim from the outpu channel on conv2 and the maxpool 2 dimension calculated in the above cell
        
        def forward(self, x):
            # Covoluntion 1
            out = self.cnn1(x)
            out = self.relu1(out)
            
            # Max pool 1
            out = self.maxpool1(out)
            
            # Convolution 2
            out = self.cnn2(out)
            out = self.relu2(out)
            
            # Max pool 2
            out = self.maxpool2(out)
            
            # Resize
            # Original size: (100, 32, 7, 7) 100, because of batch_size; input 32, 7, 7
            # out.size(0): 100
            # New out size: (100, 32*7*7)
            out = out.view(out.size(0), -1)  # -1 means reshape to the remaining values: 32*7*7
            
            # Linear function (readout)
            out = self.fc1(out)
            
            return out

In [11]:
# Instantiate Model Class
model = CNNModel()

In [12]:
# Instantiate loss class
criterion = nn.CrossEntropyLoss()

In [13]:
# Instantiate Optimizer Class
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)