<a href="https://colab.research.google.com/github/sketchydough/Pytorch-DL-Practice/blob/main/CNN_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [120]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [121]:
#convert mnist image files into a tensor of 4-D (#images,height,width,color channel)
transform=transforms.ToTensor()  #changing images to a tensor


In [122]:
#train data
train_data=datasets.MNIST(root='/cnn_data',train=True,download=True,transform=transform)

In [123]:
#test data
test_data=datasets.MNIST(root='/cnn_data',train=False,download=True,transform=transform)

In [124]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [125]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [126]:
#create a small batch size for images..let 10
train_loader= DataLoader(train_data,batch_size=10,shuffle=True)
test_loader= DataLoader(test_data,batch_size=10,shuffle=False)

In [127]:
#define our cnn model
#describe convolutional layer and what its doing (2 convolutional layers)

conv1=nn.Conv2d(1,6,3,1) #input images,#convolutional layers, #kernal 3x3 size, stride one ata time
conv2=nn.Conv2d(6,16,3,1) #here 6 inputd cuz from prev conv we get 6 o/p, 16 convolutional layers, 3x3 kernal, 1 o/p

In [128]:
#grab 1 mnist record/image
for i, (X_train, y_train) in enumerate(train_data):
  break

In [129]:
X_train

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [130]:
X_train.shape

torch.Size([1, 28, 28])

In [131]:
#changing it to 4d
x=X_train.view(1,1,28,28)

In [132]:
#perform our first convolution
x=F.relu(conv1(x)) #rectified linear unit for our activation function

In [133]:
x.shape #1 single image, 6 filters we asked for, 26x26 ? cuz it shrinks since we didnt set the padding when we defined our model

torch.Size([1, 6, 26, 26])

In [134]:
#pass thru the pooling layer
x=F.max_pool2d(x,2,2) #kernal of 2 and stride of 2

In [135]:
x.shape #13 cuz we set the kernal and stride of 2 for pooling  26/2=13


torch.Size([1, 6, 13, 13])

In [136]:
# lets do our second convolutional layer
x=F.relu(conv2(x))

In [137]:
x.shape # 13x13 and since we didnt set any padding ->so we loose 2 pxels around the outside of the image

torch.Size([1, 16, 11, 11])

In [138]:
#pooling layer
x=F.max_pool2d(x,2,2)

In [139]:
x.shape #11/2=5.5 but we have to round down, bco you cant invent data to round up

torch.Size([1, 16, 5, 5])

In [140]:
#model class

class ConvolutionalNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1= nn.Conv2d(1,6,3,1)
    self.conv2= nn.Conv2d(6,16,3,1)
    #fully connected layer
    self.fc1=nn.Linear(5*5*16,120) #flattening out to 120 neurons
    self.fc2=nn.Linear(120,84)
    self.fc3=nn.Linear(84,10)

    #need a forward function to define the layout
  def forward(self,X):
    X=F.relu(self.conv1(X))
    X=F.max_pool2d(X,2,2) #2x2 kernal and stride 2
    #second pass
    X=F.relu(self.conv2(X))
    X=F.max_pool2d(X,2,2)

      #re-view to flatten it out
    X=X.view(-1, 16*5*5) #negative one so that we can vary the batch size

      #fully connected layers
    X=F.relu(self.fc1(X))
    X=F.relu(self.fc2(X))
    X=self.fc3(X)
    return F.log_softmax(X, dim=1)

In [141]:
#create an instance of our model
torch.manual_seed(41)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [142]:
#loss function optimizer
criterion= nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.01) #smaller the learnign rate, longer its gonna take to train

In [144]:
import time # to see how long this takes
start_time= time.time()

#create variables to track things
epochs=5
train_losses=[]
test_losses=[]
test_correct=[]
train_correct=[]

#For loop of epochs
for i in range(epochs):
  train_corr=0
  test_corr=0

  #Train
  for b,(X_train,y_train) in enumerate(train_loader):
    b+=1 #start batches at 1
    y_pred =model(X_train) #get predicted value from the trianing set. not flattened, its 2d
    loss= criterion(y_pred,y_train) #calulate loss

    predicted=torch.max(y_pred.data, 1)[1] #add up the number of correct predictions. indexed off the first point
    batch_corr=(predicted == y_train).sum() #how many we got correct from this batch. true=1,false=0 and sum it up
    train_corr += batch_corr #add up the number of correct predictions

    #update parameters
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    #print results
    if b%600==0:
      print(f'epoch: {i} batch: {b} loss: {loss.item()}')

  train_losses.append(loss)
  train_correct.append(train_corr)

  #Test
  with torch.no_grad(): #no grad so we dont update our weights and biases with test data
    for b,(X_test,y_test) in enumerate(test_loader):
      y_val=model(X_test)
      predicted=torch.max(y_val.data,1)[1] #adding up correct predictions
      test_corr += (predicted == y_test).sum() # T=1 F=0 and sum it away

  loss=criterion(y_val,y_test)
  test_losses.append(loss)
  test_correct.append(test_corr)



current_time=time.time()
total=current_time-start_time
print(f'Training took: {total/60} minutes')

epoch: 0 batch: 600 loss: 0.3602105975151062
epoch: 0 batch: 1200 loss: 0.26941999793052673
epoch: 0 batch: 1800 loss: 0.0018595507135614753
epoch: 0 batch: 2400 loss: 0.09844367951154709
epoch: 0 batch: 3000 loss: 1.2523682117462158
epoch: 0 batch: 3600 loss: 0.014283351600170135
epoch: 0 batch: 4200 loss: 0.02023053541779518
epoch: 0 batch: 4800 loss: 0.42354097962379456
epoch: 0 batch: 5400 loss: 0.22341743111610413
epoch: 0 batch: 6000 loss: 0.05625303462147713
epoch: 1 batch: 600 loss: 0.00150180677883327
epoch: 1 batch: 1200 loss: 0.0004955746699124575
epoch: 1 batch: 1800 loss: 0.2673165202140808
epoch: 1 batch: 2400 loss: 1.3810557126998901
epoch: 1 batch: 3000 loss: 0.0004503824166022241
epoch: 1 batch: 3600 loss: 0.09446550160646439
epoch: 1 batch: 4200 loss: 0.000286845926893875
epoch: 1 batch: 4800 loss: 0.23478055000305176
epoch: 1 batch: 5400 loss: 0.006459338124841452
epoch: 1 batch: 6000 loss: 0.31828704476356506
epoch: 2 batch: 600 loss: 0.23991870880126953
epoch: 2 ba