<a href="https://colab.research.google.com/github/markpedraza/Starter-Neural-Network/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
%matplotlib inline


In [2]:
# Convert MNIST Image Files into a Tensor of 4-Dimensions (# of images, Height, Width, Color Channel)
transform = transforms.ToTensor()

In [3]:
# Train Data
train_data = datasets.MNIST(root='/cnn_data', train=True, download=True, transform=transform)
# root = save directory
# train = MNIST data set includes test and train data. We want train data
# download = We want to download the MNIST data to our local google colab
# transformer = we want to transform it from images to a tensor before loading it in


In [4]:
# Test Data
test_data = datasets.MNIST(root='/cnn_data', train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
# Create a small batch size for images. We'll do 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [8]:
# Define our CNN model
# - padding: Look at the images from our data set. Notice that numbers dont actually reach the sides of the image.
    #        The convolutional layer will drop that "dead space" if we dont set some padding. It doesnt matter much in this dataset but for others it likely will.
# Describe convolutional layer and what its doing (2 convolutional layers)
conv1 =  nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)
# Remember, that the input in layer 2 will match the output from the previous layer
conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)

In [9]:
# Grab 1 MNIST record/image
for i, (X_train, y_train) in enumerate(train_data):
  break


In [10]:
X_train.shape # 1 image, size 28x28 pixels.

torch.Size([1, 28, 28])

In [11]:
x = X_train.view(1,1,28,28) # 1 batch, of 1 image, of 28 pixels, of 28 pixels

In [12]:
# Perform our first convolutions
x = F.relu(conv1(x)) # Rectified Linear Unit for our activation function.

In [13]:
# 1 image, 6 is the filters we asked for (6 feature maps, 6 filters), and now 26 by 26 pixels. Data surrounding the image is lost (since numbers dont usually go past the center)
x.shape # Now the convolution layer has been run, and this came out on the other side.


torch.Size([1, 6, 26, 26])

In [14]:
# pas thru the pooling layer
x = F.max_pool2d(x, 2, 2) # kernal size of 2 and a stride size of 2

In [15]:
# 1 image, 6 as before, 14 x 14 means that the pixels have been taken away and got smaller. 26/2 = 13.
x.shape

torch.Size([1, 6, 13, 13])

In [16]:
# Do our second convolutional layer
x = F.relu(conv2(x))

In [17]:
# 1 image, 16 features (as was defined in the 2nd conv layer), 14 x 14 pixels. If we didnt set padding, this would be smaller.
x.shape

torch.Size([1, 16, 11, 11])

In [18]:
# Another pooling layer
x = F.max_pool2d(x, 2, 2)

In [19]:
# 11 / 2 = 5.5 but we have to round down, because you cant invent data to round up.
x.shape

torch.Size([1, 16, 5, 5])

In [20]:
# Model Class
class ConvolutionalNetwork(nn.Module):
  def __init__(self):
    super().__init__()

    # This is the same as the conv variables from earlier in the notebook, but now in our class
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)

    # Fully Connected Layer
    # - in_features: We take the output from x.shape in the last shell and multiply it together
    # - out_features: arbitrary. flatten out to 120 neurons
    # Remember, the input features should match the output of the previous layer
    self.fc1 = nn.Linear(in_features=16*5*5, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=84)
    self.fc3 = nn.Linear(in_features=84, out_features=10)

  # Foward function to push everything foward through the conv layer, pool layer, and connected layer
  def forward(self, X):
    X = F.relu(self.conv1(X))
    X = F.max_pool2d(X, 2, 2) # 2x2 kernal and stride 2
    # Second pass
    X = F.relu(self.conv2(X))
    X = F.max_pool2d(X, 2, 2) # 2x2 kernal and stride 2

    # Re-View to flatten it out
    X = X.view(-1,16*5*5) # negative one so that we can vary the batch size

    # Fully Connected Layers
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.fc3(X)

    return F.log_softmax(X, dim=1)

In [22]:
# Create and Instance of our Model
SEED = 645
torch.manual_seed(SEED)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [23]:
# Loss Function Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Smaller the learning rate, the longer its gonna take to train