Target: Resized the image and applied random rotation to check for higher accuracy
Result:
Parameters: 6800
Best Train Accuracy: 96.36
Best Test Accuracy: 97.10
Analysis:
The model still seems to be doing better post,the training and image rotation being introduced. 
We would apply change the LR to step wise in the next repo and it would provide a good result. 

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

In [3]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [4]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=2, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


In [5]:
dropout_value = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 26

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 24
        
        # TRANSITION BLOCK 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 24
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 10
      # OUTPUT BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=6)
        ) # output_size = 1

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        ) 


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.gap(x)        
        x = self.convblock8(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [6]:
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
           Dropout-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,608
              ReLU-6           [-1, 32, 24, 24]               0
       BatchNorm2d-7           [-1, 32, 24, 24]              64
           Dropout-8           [-1, 32, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             320
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1

In [7]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=0.38192346692085266 Batch_id=468 Accuracy=72.58: 100%|██████████| 469/469 [00:10<00:00, 44.50it/s] 



Test set: Average loss: 0.3857, Accuracy: 8879/10000 (88.79%)

EPOCH: 1


Loss=0.17081141471862793 Batch_id=468 Accuracy=91.48: 100%|██████████| 469/469 [00:10<00:00, 43.80it/s] 



Test set: Average loss: 0.2291, Accuracy: 9294/10000 (92.94%)

EPOCH: 2


Loss=0.20131903886795044 Batch_id=468 Accuracy=93.56: 100%|██████████| 469/469 [00:07<00:00, 63.95it/s] 



Test set: Average loss: 0.1797, Accuracy: 9436/10000 (94.36%)

EPOCH: 3


Loss=0.1604292392730713 Batch_id=468 Accuracy=94.41: 100%|██████████| 469/469 [00:06<00:00, 67.12it/s]  



Test set: Average loss: 0.1522, Accuracy: 9539/10000 (95.39%)

EPOCH: 4


Loss=0.0762137696146965 Batch_id=468 Accuracy=94.94: 100%|██████████| 469/469 [00:07<00:00, 66.03it/s]   



Test set: Average loss: 0.1508, Accuracy: 9543/10000 (95.43%)

EPOCH: 5


Loss=0.28377851843833923 Batch_id=468 Accuracy=95.30: 100%|██████████| 469/469 [00:09<00:00, 50.55it/s] 



Test set: Average loss: 0.1316, Accuracy: 9588/10000 (95.88%)

EPOCH: 6


Loss=0.15945471823215485 Batch_id=468 Accuracy=95.46: 100%|██████████| 469/469 [00:09<00:00, 48.78it/s]  



Test set: Average loss: 0.1146, Accuracy: 9649/10000 (96.49%)

EPOCH: 7


Loss=0.08458700776100159 Batch_id=468 Accuracy=95.70: 100%|██████████| 469/469 [00:09<00:00, 47.98it/s]  



Test set: Average loss: 0.1163, Accuracy: 9645/10000 (96.45%)

EPOCH: 8


Loss=0.16979320347309113 Batch_id=468 Accuracy=95.74: 100%|██████████| 469/469 [00:09<00:00, 50.32it/s]  



Test set: Average loss: 0.1154, Accuracy: 9638/10000 (96.38%)

EPOCH: 9


Loss=0.1637675017118454 Batch_id=468 Accuracy=95.89: 100%|██████████| 469/469 [00:10<00:00, 44.54it/s]   



Test set: Average loss: 0.1041, Accuracy: 9695/10000 (96.95%)

EPOCH: 10


Loss=0.09798132628202438 Batch_id=468 Accuracy=96.11: 100%|██████████| 469/469 [00:07<00:00, 65.21it/s]  



Test set: Average loss: 0.1112, Accuracy: 9671/10000 (96.71%)

EPOCH: 11


Loss=0.06918288767337799 Batch_id=468 Accuracy=96.14: 100%|██████████| 469/469 [00:06<00:00, 71.79it/s]  



Test set: Average loss: 0.0944, Accuracy: 9710/10000 (97.10%)

EPOCH: 12


Loss=0.08876154571771622 Batch_id=468 Accuracy=96.25: 100%|██████████| 469/469 [00:06<00:00, 71.91it/s]  



Test set: Average loss: 0.1016, Accuracy: 9705/10000 (97.05%)

EPOCH: 13


Loss=0.13509151339530945 Batch_id=468 Accuracy=96.32: 100%|██████████| 469/469 [00:06<00:00, 71.57it/s]  



Test set: Average loss: 0.1038, Accuracy: 9677/10000 (96.77%)

EPOCH: 14


Loss=0.2368658035993576 Batch_id=468 Accuracy=96.36: 100%|██████████| 469/469 [00:06<00:00, 70.85it/s]   



Test set: Average loss: 0.0996, Accuracy: 9687/10000 (96.87%)

