In [57]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [58]:
# convert MNIST image files into a tensor of 4dim (num_of_images, height, width, color chan)
transform = transforms.ToTensor()

In [59]:
# train data
train_data = datasets.MNIST(root='./cnn_data', train=True, download=True, transform=transform)

In [60]:
# test_data
test_data = datasets.MNIST(root='./cnn_data', train=False, download=True, transform=transform)

In [61]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [62]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [63]:
# create a small batch size for images
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [64]:
# define cnn model
# describe convolutional layer (2 layers)
conv1 = nn.Conv2d(1, 6, 3, 1)
conv2 = nn.Conv2d(6, 16, 3, 1)

In [65]:
# grab one 1 MINST image
for i, (X_train, y_train) in enumerate(train_data):
    break
X_train.shape

torch.Size([1, 28, 28])

In [66]:
x = X_train.view(1,1,28,28)

In [67]:
# perform first convolution
x = F.relu(conv1(x))

In [68]:
# 1st dim is 1 image, 6 is the num of filters, 26x26 is the image size and is reduced because of no padding set
x.shape

torch.Size([1, 6, 26, 26])

In [69]:
# pass through the pooling layer
x = F.max_pool2d(x, 2, 2)

In [70]:
x.shape # 26/2 = 13

torch.Size([1, 6, 13, 13])

In [71]:
# second convolutional layer
x = F.relu(conv2(x))

In [72]:
x.shape # no padding so we lost 1 pixel on each side

torch.Size([1, 16, 11, 11])

In [73]:
# second pooling layor
x = F.max_pool2d(x,2,2)

In [74]:
x.shape # 11/2 = 5.5 but have to round down since we cant create data to round up

torch.Size([1, 16, 5, 5])

In [95]:
# building the model
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,3,1)
        self.conv2 = nn.Conv2d(6,16,3,1)
        # fully connected layers
        self.fc1 = nn.Linear(5*5*16, 120) # takes in 5*5*16 because thats the dim of our input image after CN and pooling
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # ending with 10 because there are 10 defining classes

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X,2,2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X,2,2)
        # re-view data to flatten
        X  = X.view(-1, 16*5*5) # -1 so we can vary batch size
        # fully connected layers
        X = F.relu(self.fc1(X))
        print("bruh")
        X = F.relu(self.fc2(X))
        X = self.fc3(x)
        return F.log_softmax(X, dim=1)


In [92]:
# create an instance of the model
torch.manual_seed(41)
model = CNN()
model

CNN(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [96]:
# loss function optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [97]:
import time
start_time = time.time()

# create stats vars
epoch = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epoch):
    train_corr = 0
    test_corr = 0

    # train
    for b, (X_train, y_train) in enumerate(train_loader):
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)

        predicted = torch.max(y_pred.data, 1)[1] # add up the number of correct predictions.
        batch_corr = (predicted == y_train).sum()
        train_corr += batch_corr

        # update params
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print results
        if b%500 == 0:
            print(f'Epoch: {i} Batch: {b} Loss: {loss.item()}')
    
    train_losses.append(loss)
    train_correct.append(train_corr)

    # test
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):
            y_val = model(X_test)
            predicted = torch.max(y_val.data, 1)[1] # adding correct predictions
            test_corr += (predicted == y_test).sum()
    loss = criterion(y_pred, y_test)
    test_losses.append(loss)
    test_correct.append(test_corr)

stop_time = time.time()
elapsed = stop_time - start_time
print(f'Training took: {elapsed/60} minutes')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (80x5 and 84x10)