In [4]:
# Required packages
# CUDA 10.2
# torch==1.7.1
# torchvision==0.8.2
# tqdm==4.36.1
# matplotlib==2.1.0
# numpy==1.18.4

In [5]:
from __future__ import print_function
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from time import *
%matplotlib inline

## Convolutional Neural Network (CNN) Model

In [6]:
class DLS_Model(nn.Module):
    def __init__(self):
        super(DLS_Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 2)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(32, 64, 2)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(64, 128, 1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        self.fc1_bn = nn.BatchNorm1d(128)
        
        self.fc2 = nn.Linear(128, 64)
        self.fc2_bn = nn.BatchNorm1d(64)
        
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):

        x = F.relu(self.pool1((self.conv1(x))))
        x = F.relu(self.pool2((self.conv2(x))))
        x = F.relu(self.pool3((self.conv3(x))))

        #print(x.shape)
        x = x.view(-1, 128 * 3 * 3)
        
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = self.fc3(x)
        return x


## Start Training and Testing

In [7]:
def training(model, trainloader, device, optimizer, num_epoch, criterion):
    model.train()
    for epoch in range(1,num_epoch+1):
        with tqdm(trainloader, unit="batch") as tepoch:
            for inputs, labels in tepoch:
                tepoch.set_description(f"Epoch {epoch}")
                
                inputs, labels = inputs.to(device), labels.to(device) 

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                tepoch.set_postfix(loss=loss.item())
                #sleep(0.0001)
    PATH = './mnist.pth'
    torch.save(model.state_dict(), PATH)

def testing(model,testloader,device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the test images: %d %%' % (
        100.0 * correct / total))

    
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def show_some_image(trainloader):
    examples = enumerate(trainloader)
    batch_idx, (example_data, example_targets) = next(examples)
    fig = plt.figure(figsize=(8,10))
    for i in range(4):
        plt.subplot(1,4,i+1)
        plt.tight_layout()
        plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
        plt.title("Ground Truth: {}".format(example_targets[i]))
        plt.xticks([])
        plt.yticks([])

# specify the GPU device
use_cuda = True
print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
print(f'Model is using {device}')


model = DLS_Model()
model = model.to(device)

BATCH_SIZE = 128
num_epoch = 50
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

trainloader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([
            transforms.ToTensor(),])),batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=False, download=True, transform=transforms.Compose([
            transforms.ToTensor(),])),batch_size=BATCH_SIZE, shuffle=True)



print(model)
print(f'Model Parameters {count_parameters(model)/1000000}m')   

saved_model = './mnist.pth'
if os.path.exists(saved_model):
    print(f'Only Test')
    model.load_state_dict(torch.load(saved_model))
    print(f'Let\'s visualize some test samples')
    show_some_image(testloader)
    testing(model,testloader,device)
else:
    training(model, trainloader, device, optimizer, num_epoch, criterion)
    testing(model,testloader,device)
    print(f'Let\'s visualize some test samples')
    show_some_image(testloader)
    

Epoch 1:   1%|▏         | 6/469 [00:00<00:09, 51.36batch/s, loss=1.79]

Torch 1.8.0+cu101 CUDA 10.1
Model is using cuda
DLS_Model(
  (conv1): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1152, out_features=128, bias=True)
  (fc1_bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc2_bn): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)
Model Parameters 0.17361m


Epoch 1: 100%|██████████| 469/469 [00:09<00:00, 49.02batch/s, loss=0.048]
Epoch 2:  93%|█████████▎| 436/469 [00:08<00:00, 49.58batch/s, loss=0.0808]


KeyboardInterrupt: ignored