## Convolutional Neural Network - MNIST Classification

### Import Libraries

In [0]:
import numpy as np 
import pandas as pd 
import torch

from torch import nn
import torch.nn.functional as F
from torchvision import datasets,transforms

In [2]:
# Check if GPU is available

train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


### Loading Datasets

In [0]:
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
trainset=datasets.MNIST('~/.pytorch/MNIST_data/',train=True,transform=transform,download=True)
validset=datasets.MNIST('~/.pytorch/MNIST_data/',train=False,transform=transform,download=True)

train_loader=torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True,num_workers=0)
valid_loader=torch.utils.data.DataLoader(validset,batch_size=64,shuffle=True,num_workers=0)

### Define CNN Architecture

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # First Convolutional layer with 10 filters of size 3
        self.conv1 = nn.Conv2d(1, 10, 3, 1)
        
        # Second Convolutional layer with 15 filters of size 3
        self.conv2 = nn.Conv2d(10, 15, 3, 1)
        
        # Fully-Connected Layers
        self.fc1 = nn.Linear(5*5*15, 50)
        self.dropout1 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        # Apply ReLU activation on convolutional layers
        x = F.relu(self.conv1(x))
        # Apply Max pooling
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 5*5*15)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
      
      
# create a complete CNN
model = Net()
print(model)

# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

Net(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 15, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=375, out_features=50, bias=True)
  (dropout1): Dropout(p=0.25)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


**Total No. of Parameters in the Network**

In [5]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total_params",pytorch_total_params)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Trainable_params",pytorch_total_params)

Total_params 20775
Trainable_params 20775


### Define Loss Function and Optimizer

In [0]:
import torch.optim as optim
criterion = nn.NLLLoss()

# specify optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

### Train the Model

In [7]:
# Number of epochs to train the model
n_epochs = 10

valid_loss_min = np.Inf # Track change in validation loss

for epoch in range(1, n_epochs+1):

    # Track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    # Model Training
    model.train()
    for data, target in train_loader:
        # Move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
            
        # Clear the gradients of all optimized variables
        optimizer.zero_grad()
        
        # Forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        
        # Calculate the batch loss
        loss = criterion(output, target)
        
        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        
        # Perform a single optimization step (parameter update)
        optimizer.step()
        
        # Ipdate training loss
        train_loss += loss.item()*data.size(0)
        
    # Model Evaluation
    model.eval()
    for data, target in valid_loader:
        # Move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # Forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # Calculate the batch loss
        loss = criterion(output, target)
        # Update average validation loss 
        valid_loss += loss.item()*data.size(0)
    
    # Calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
        
    # Print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # Save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_mnist.pt')
        valid_loss_min = valid_loss

Epoch: 1 	Training Loss: 0.895129 	Validation Loss: 0.256567
Validation loss decreased (inf --> 0.256567).  Saving model ...
Epoch: 2 	Training Loss: 0.287467 	Validation Loss: 0.167601
Validation loss decreased (0.256567 --> 0.167601).  Saving model ...
Epoch: 3 	Training Loss: 0.207972 	Validation Loss: 0.125102
Validation loss decreased (0.167601 --> 0.125102).  Saving model ...
Epoch: 4 	Training Loss: 0.169872 	Validation Loss: 0.101224
Validation loss decreased (0.125102 --> 0.101224).  Saving model ...
Epoch: 5 	Training Loss: 0.149901 	Validation Loss: 0.083112
Validation loss decreased (0.101224 --> 0.083112).  Saving model ...
Epoch: 6 	Training Loss: 0.135077 	Validation Loss: 0.076729
Validation loss decreased (0.083112 --> 0.076729).  Saving model ...
Epoch: 7 	Training Loss: 0.122149 	Validation Loss: 0.069791
Validation loss decreased (0.076729 --> 0.069791).  Saving model ...
Epoch: 8 	Training Loss: 0.113352 	Validation Loss: 0.067008
Validation loss decreased (0.06979

### Model Testing

In [0]:
# specify the image classes
classes = ['0', '1', '2', '3', '4',
           '5', '6', '7', '8', '9']

In [9]:
batch_size=16

# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
# iterate over test data
for data, target in valid_loader:
  
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
        
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    
    # calculate the batch loss
    loss = criterion(output, target)
    
    # update test loss 
    test_loss += loss.item()*data.size(0)
    
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)  
    
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(valid_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.061018

Test Accuracy of     0: 98% (260/265)
Test Accuracy of     1: 98% (306/310)
Test Accuracy of     2: 97% (238/243)
Test Accuracy of     3: 98% (230/233)
Test Accuracy of     4: 98% (261/264)
Test Accuracy of     5: 98% (207/210)
Test Accuracy of     6: 97% (240/247)
Test Accuracy of     7: 96% (223/231)
Test Accuracy of     8: 95% (240/251)
Test Accuracy of     9: 96% (248/258)

Test Accuracy (Overall): 97% (2453/2512)


**We have achieved accuracy of 97% with 20775 trainable parameters**