Target: Resized the image and applied random rotation to check for higher accuracy
Result:
Parameters: 6800
Best Train Accuracy: 96.63
Best Test Accuracy: 97.57
Analysis:
The model still seems to be doing better post,the training and image rotation being introduced. 
We would apply change the LR to step wise in the next repo and it would provide a good result. 

In [4]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [5]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


In [6]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [7]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=2, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? False


In [8]:
dropout_value = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 26

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 24
        
        # TRANSITION BLOCK 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 24
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 10
      # OUTPUT BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=6)
        ) # output_size = 1

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        ) 


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.gap(x)        
        x = self.convblock8(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [9]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

cpu
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
           Dropout-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,608
              ReLU-6           [-1, 32, 24, 24]               0
       BatchNorm2d-7           [-1, 32, 24, 24]              64
           Dropout-8           [-1, 32, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             320
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1,

In [10]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

In [11]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=0.30758172273635864 Batch_id=937 Accuracy=81.21: 100%|██████████| 938/938 [01:12<00:00, 12.99it/s]



Test set: Average loss: 0.2102, Accuracy: 9366/10000 (93.66%)

EPOCH: 1


Loss=0.43526706099510193 Batch_id=937 Accuracy=93.32: 100%|██████████| 938/938 [01:10<00:00, 13.27it/s] 



Test set: Average loss: 0.1596, Accuracy: 9538/10000 (95.38%)

EPOCH: 2


Loss=0.3168605864048004 Batch_id=937 Accuracy=94.67: 100%|██████████| 938/938 [01:10<00:00, 13.27it/s]  



Test set: Average loss: 0.1525, Accuracy: 9554/10000 (95.54%)

EPOCH: 3


Loss=0.2543390393257141 Batch_id=937 Accuracy=95.07: 100%|██████████| 938/938 [01:10<00:00, 13.32it/s]  



Test set: Average loss: 0.1944, Accuracy: 9378/10000 (93.78%)

EPOCH: 4


Loss=0.11248791217803955 Batch_id=937 Accuracy=95.42: 100%|██████████| 938/938 [01:22<00:00, 11.38it/s] 



Test set: Average loss: 0.1508, Accuracy: 9545/10000 (95.45%)

EPOCH: 5


Loss=0.25050804018974304 Batch_id=937 Accuracy=95.75: 100%|██████████| 938/938 [01:21<00:00, 11.49it/s] 



Test set: Average loss: 0.1102, Accuracy: 9673/10000 (96.73%)

EPOCH: 6


Loss=0.21353310346603394 Batch_id=937 Accuracy=95.96: 100%|██████████| 938/938 [01:16<00:00, 12.20it/s] 



Test set: Average loss: 0.1061, Accuracy: 9672/10000 (96.72%)

EPOCH: 7


Loss=0.2083400934934616 Batch_id=937 Accuracy=96.04: 100%|██████████| 938/938 [01:14<00:00, 12.61it/s]  



Test set: Average loss: 0.1089, Accuracy: 9679/10000 (96.79%)

EPOCH: 8


Loss=0.17614677548408508 Batch_id=937 Accuracy=96.16: 100%|██████████| 938/938 [01:13<00:00, 12.80it/s] 



Test set: Average loss: 0.1103, Accuracy: 9650/10000 (96.50%)

EPOCH: 9


Loss=0.16259783506393433 Batch_id=937 Accuracy=96.23: 100%|██████████| 938/938 [01:13<00:00, 12.83it/s] 



Test set: Average loss: 0.1031, Accuracy: 9699/10000 (96.99%)

EPOCH: 10


Loss=0.36132216453552246 Batch_id=937 Accuracy=96.30: 100%|██████████| 938/938 [01:13<00:00, 12.82it/s] 



Test set: Average loss: 0.0970, Accuracy: 9702/10000 (97.02%)

EPOCH: 11


Loss=0.37874099612236023 Batch_id=937 Accuracy=96.42: 100%|██████████| 938/938 [01:14<00:00, 12.61it/s] 



Test set: Average loss: 0.0819, Accuracy: 9757/10000 (97.57%)

EPOCH: 12


Loss=0.02146323211491108 Batch_id=937 Accuracy=96.51: 100%|██████████| 938/938 [01:13<00:00, 12.82it/s]  



Test set: Average loss: 0.1018, Accuracy: 9688/10000 (96.88%)

EPOCH: 13


Loss=0.03559792786836624 Batch_id=937 Accuracy=96.46: 100%|██████████| 938/938 [01:13<00:00, 12.79it/s] 



Test set: Average loss: 0.1298, Accuracy: 9602/10000 (96.02%)

EPOCH: 14


Loss=0.009203317575156689 Batch_id=937 Accuracy=96.63: 100%|██████████| 938/938 [01:11<00:00, 13.06it/s]



Test set: Average loss: 0.0972, Accuracy: 9697/10000 (96.97%)

