## <font color='green'> <div align="center">In the name of God </div></font>

### <font color='red'> Author: Sayed Kamaledin Ghiasi-Shrirazi</font> <a href="http://profsite.um.ac.ir/~k.ghiasi">(http://profsite.um.ac.ir/~k.ghiasi)</a> 

# CNN on MNIST with PyTorch

### importing general modules

In [1]:
import numpy as np
import scipy.io as sio
import matplotlib as mpl
import matplotlib.pyplot as plt

### importing PyTorch modules

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Defining network by inheriting from nn.Module

In [12]:
# https://github.com/BVLC/caffe/blob/master/examples/mnist/lenet.prototxt
class SemiLeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5,padding =0)
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5,padding =0)
        self.lin1  = nn.Linear(in_features=40*4*4, out_features=500)
        self.lin2  = nn.Linear(in_features=500, out_features=10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(F.relu (x), kernel_size=2, stride = 2)      
        x = self.conv2(x)
        x = F.max_pool2d(F.relu (x), kernel_size=2, stride = 2)
        x = x.view(-1, 40*4*4)
        x = self.lin1(x)
        x = F.relu (x)
        x = self.lin2(x)
        
        return x

In [13]:
net = SemiLeNet()
print (net)

SemiLeNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 40, kernel_size=(5, 5), stride=(1, 1))
  (lin1): Linear(in_features=640, out_features=500, bias=True)
  (lin2): Linear(in_features=500, out_features=10, bias=True)
)


### Choosing device

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (device)

cuda:0


## Loading MNIST

In [15]:
MnistTrainX = sio.loadmat ('../../datasets/mnist/MnistTrainX')['MnistTrainX'] / 255;
MnistTrainY = sio.loadmat ('../../datasets/mnist/MnistTrainY')['MnistTrainY'];
MnistTestX  = sio.loadmat ('../../datasets/mnist/MnistTestX')['MnistTestX'] / 255;
MnistTestY  = sio.loadmat ('../../datasets/mnist/MnistTestY')['MnistTestY'];

N = 60000
MnistTrainX = MnistTrainX[:N,:]
MnistTrainY = MnistTrainY[:N,:]
XTrain = MnistTrainX
yTrain = MnistTrainY.squeeze()
XTest = MnistTestX
yTest = MnistTestY.squeeze()
N, dim = XTrain.shape

## Optimization

In [16]:
num_epochs = 20
batch_size = 100
report_after_X_iterations = 600
learning_rate = 0.01
num_batches = N // batch_size
NTest = 10000
num_test_batches = NTest // batch_size

In [17]:
net.to(device)

optimizer = optim.SGD(net.parameters(), lr= learning_rate, momentum=0.9)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                           milestones=[7,14], gamma=0.3)
criterion = nn.CrossEntropyLoss()

for epoch in range (num_epochs):
    if epoch:
        scheduler.step()
        new_lr = optimizer.param_groups[0]['lr']
        print (F'Learning rate after scheduler.step(): {new_lr}')
    for itr in range (num_batches):
        X = torch.tensor (MnistTrainX[itr*batch_size:(itr+1)*batch_size,:], dtype=torch.float)
        X = X.view (-1,1,28,28)
        T = MnistTrainY[itr*batch_size:(itr+1)*batch_size]
        T = torch.tensor (T.squeeze(), dtype = torch.long)
        X = X.to(device)
        T = T.to(device)
        output = net(X)
        loss = criterion(output, T)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if ((itr+1) % report_after_X_iterations == 0):
            print('\n---- iteration #{0} of {1} at epoch #{2} of {3} ---- :'.format(
                itr+1, num_batches, epoch+1, num_epochs))
            score = 0.0
            with torch.no_grad():
                for i in range(num_batches):
                    X = MnistTrainX[i * batch_size:(i + 1) * batch_size, :]
                    X = np.reshape (X, (-1,1,28,28))
                    T = MnistTrainY[i * batch_size:(i + 1) * batch_size]
                    T = T.squeeze()
                    X = torch.tensor(X, dtype=torch.float).to(device)
                    #T = torch.tensor(T, dtype=torch.long).to(device)
                    output = net(X)
                    prediction = torch.argmax(output, dim=1).cpu().numpy()
                    score += np.sum(prediction == T)
            score /= N
            score *= 100
            print('Loss = {0}, Accuracy on training data = {1}%'.format(loss.item(), score))
            
            
            score = 0.0
            with torch.no_grad():
                for i in range(num_test_batches):
                    X = MnistTestX[i * batch_size:(i + 1) * batch_size, :]
                    X = np.reshape (X, (-1,1,28,28))
                    T = MnistTestY[i * batch_size:(i + 1) * batch_size]
                    T = T.squeeze()
                    X = torch.tensor(X, dtype=torch.float).to(device)
                    output = net(X)
                    prediction = torch.argmax(output, dim=1).cpu().numpy()
                    score += np.sum(prediction == T)
            score /= NTest
            score *= 100
            print('Loss = {0}, Accuracy on testing data = {1}%'.format(loss.item(), score))            


---- iteration #600 of 600 at epoch #1 of 20 ---- :
Loss = 0.2428543120622635, Accuracy on training data = 96.93833333333333%
Loss = 0.2428543120622635, Accuracy on testing data = 97.32%
Learning rate after scheduler.step(): 0.01

---- iteration #600 of 600 at epoch #2 of 20 ---- :
Loss = 0.23620660603046417, Accuracy on training data = 98.10166666666666%
Loss = 0.23620660603046417, Accuracy on testing data = 98.02%
Learning rate after scheduler.step(): 0.01

---- iteration #600 of 600 at epoch #3 of 20 ---- :
Loss = 0.22546741366386414, Accuracy on training data = 98.40833333333333%
Loss = 0.22546741366386414, Accuracy on testing data = 98.13%
Learning rate after scheduler.step(): 0.01

---- iteration #600 of 600 at epoch #4 of 20 ---- :
Loss = 0.21167248487472534, Accuracy on training data = 98.72%
Loss = 0.21167248487472534, Accuracy on testing data = 98.35000000000001%
Learning rate after scheduler.step(): 0.01

---- iteration #600 of 600 at epoch #5 of 20 ---- :
Loss = 0.20153665