In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [None]:
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='../data/cnn_files', train=True, download=False, transform=transform)
test_data = datasets.MNIST(root='../data/cnn_files', train=False, download=False, transform=transform)

In [None]:
train_data

In [None]:
test_data

In [None]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [None]:
############### let's see how does Conv layer works

In [None]:
conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)
# can consider the in_channels as the number of feature maps produced by previous layer
# can consider the out_channels as the number of feature maps producing by current layer == number of filters

conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)

In [None]:
for i, (X_train, y_train) in enumerate(train_data):
    break

In [None]:
X_train.shape

In [None]:
# have to change the shape of the input to a 4D batch (however we can create a batch of 1 image)
x = X_train.view(1,1,28,28)

In [None]:
x = F.relu(conv1(x))
x.shape

In [None]:
x = F.max_pool2d(x, 2, 2)
x.shape

In [None]:
x = F.relu(conv2(x))
x.shape

In [None]:
x = F.max_pool2d(x, 2, 2)
x.shape

In [None]:
###############

In [None]:
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)
        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 16*5*5)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = F.log_softmax(self.fc3(X), dim=1)
        
        return X

In [None]:
torch.manual_seed(42)
model = ConvolutionalNetwork()
model

In [None]:
for params in model.parameters():
    print(params.numel())

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time
start_time = time.time()

epochs = 5
train_losses_list = []
test_losses_list = []
train_corrects_list = []
test_corrects_list = []

for i in range(epochs):
    train_correct = 0
    test_correct = 0
    
    for batch_num, (X_train, y_train) in enumerate(train_loader):
        batch_num += 1
        
        y_pred = model.forward(X_train)
        loss = criterion(y_pred, y_train)
        
        predicted = torch.max(y_pred.data, dim=1)[1]
        batch_corrects = (predicted == y_train).sum()
        train_correct += batch_corrects
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch_num % 600 == 0:
            accuracy_till_here = (train_correct.item()/(batch_num*10))*100
            print(f'epoch: {i}, batch number:{batch_num}, loss:{loss.item()}, accuracy:{accuracy_till_here}')

    train_losses_list.append(loss)
    train_corrects_list.append(train_correct)
                                  
    with torch.no_grad():
        for batch_num, (X_test, y_test) in enumerate(test_loader):
            batch_num += 1

            y_eval = model.forward(X_test)

            predicted = torch.max(y_eval.data, dim=1)[1]
            batch_corrects = (predicted == y_test).sum()
            test_correct += batch_corrects

        loss = criterion(y_eval, y_test)
        test_losses_list.append(loss)
        test_corrects_list.append(test_correct)

duration = time.time() - start_time
print(f'Training took {duration/60} mins')

In [None]:
plt.plot(train_losses_list, label='train loss')
plt.plot(test_losses_list, label='validation loss')
plt.title('loss at epoch')
plt.legend()

In [None]:
plt.plot([(t.item()/60000)*100 for t in train_corrects_list], label='train accuracy')
plt.plot([(t.item()/10000)*100 for t in test_corrects_list], label='validation accuracy')
plt.title('Accuracy at the end of each epoch')
plt.legend()

In [None]:
test_all_loader = DataLoader(test_data, batch_size=10000, shuffle=False)
with torch.no_grad():
    for X_test, y_test in test_all_loader:
        y_val = model.forward(X_test)
        predicted = torch.max(y_val, 1)[1]
        corrects = (predicted == y_test).sum()
corrects.item()/len(X_test)  

In [None]:
confusion_matrix(predicted, y_test)

In [None]:
# let's check for a single data
plt.imshow(test_data[2000][0].reshape(28,28), cmap='gray')

In [None]:
model.eval()
with torch.no_grad():
    the_prediction = model.forward(test_data[2000][0].view(1,1,28,28))
the_prediction.argmax().item()