<a href="https://colab.research.google.com/github/realpranav93/EVA5/blob/master/S5/EVA5_Session_5_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Target:
Decrease the size of parameters by replacing last big 7x7 convolution with global average pooling and acheive more efficiency and remove the slight overfit by adding regularization in the form of batch normalization.

###Results:
1. Parameters: 11,228
2. Best Train Accuracy: 99.37%
3. Best Test Accuracy: 99.19%

###Analysis:
1. There is need for more parameters when last layer conv layer was replaced by gap
2. Decrease parameter from top of the architecture and add layer post gap might help
3. There is need for increase of capacity of model without increase in paramters - Augmentation needs to come to rescue here
 

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        #block1
        self.conv1 = nn.Conv2d(1, 10, 3) #26
        self.Batchnorm1 = nn.BatchNorm2d(10)
        self.conv2 = nn.Conv2d(10, 18, 3) #24
        self.Batchnorm2 = nn.BatchNorm2d(18)

        self.conv3 = nn.Conv2d(18,18,3) #22
        self.Batchnorm3 = nn.BatchNorm2d(18)
        #self.dp3 = nn.Dropout(p = 0.20)

        #transition block
        self.pool1 = nn.MaxPool2d(2, 2) #11
        self.pool1trns = nn.Conv2d(18, 10, 1)#11
        self.Batchnormtrns1 = nn.BatchNorm2d(10)
        
        #block2
        self.conv4 = nn.Conv2d(10, 18, 3)#9
        self.Batchnorm4 = nn.BatchNorm2d(18)
        #self.dp4 = nn.Dropout(p = 0.20)
        self.conv5 = nn.Conv2d(18, 26, 3)#7
        self.Batchnorm5 = nn.BatchNorm2d(26)
        #self.dp5 = nn.Dropout(p = 0.20)

        #block3
        self.pool2trns = nn.Conv2d(26,10,1) #7
        self.Batchnormtrns2 = nn.BatchNorm2d(10)
        #self.conv6 = nn.Conv2d(10,10,7) #1

        self.conv6_avgp = nn.AvgPool2d(kernel_size=7)


    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.Batchnorm1(x)
        
        #x = self.dropout(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.Batchnorm2(x)

        
        #x = self.dropout(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.Batchnorm3(x)
        #x = self.dp3(x)
        
        #x = self.dropout(x)
        x = self.pool1(x)
        x = self.pool1trns(x)
        x = F.relu(x)
        x = self.Batchnormtrns1(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.Batchnorm4(x)
        #x = self.dp4(x)


        x = self.conv5(x)
        x = F.relu(x)
        x = self.Batchnorm5(x)
       #x = self.dp5(x)
        
        x = self.pool2trns(x)
        x = F.relu(x)
        x = self.Batchnormtrns2(x)


        x = self.conv6_avgp(x)

        x = x.view(-1, 10)
        return F.log_softmax(x,dim=-1)

In [None]:
#!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 26, 26]             100
       BatchNorm2d-2           [-1, 10, 26, 26]              20
            Conv2d-3           [-1, 18, 24, 24]           1,638
       BatchNorm2d-4           [-1, 18, 24, 24]              36
            Conv2d-5           [-1, 18, 22, 22]           2,934
       BatchNorm2d-6           [-1, 18, 22, 22]              36
         MaxPool2d-7           [-1, 18, 11, 11]               0
            Conv2d-8           [-1, 10, 11, 11]             190
       BatchNorm2d-9           [-1, 10, 11, 11]              20
           Conv2d-10             [-1, 18, 9, 9]           1,638
      BatchNorm2d-11             [-1, 18, 9, 9]              36
           Conv2d-12             [-1, 26, 7, 7]           4,238
      BatchNorm2d-13             [-1, 26, 7, 7]              52
           Conv2d-14             [-1, 1

In [None]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    #pbar = tqdm(train_loader)
    train_loss = 0 
    train_correct = 0
    #scheduler.step()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        #pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(test_loader.dataset)
    print('Epoch: {:.0f},LR: {}.\nTrain set: train Average loss: {:.4f}, train_Accuracy: {}/{} ({:.4f}%)\n'.format(
        epoch,optimizer.param_groups[0]['lr'],train_loss, train_correct, len(train_loader.dataset),
        100. * train_correct / len(train_loader.dataset)))
        

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred_test = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred_test.eq(target.view_as(pred_test)).sum().item()

    test_loss /= len(test_loader.dataset)

    
    print('Test set: test Average loss: {:.4f}, test Accuracy: {}/{} ({:.4f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))




In [None]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 16):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Epoch: 1,LR: 0.01.
Train set: train Average loss: 2.8560, train_Accuracy: 55055/60000 (91.7583%)

Test set: test Average loss: 0.1326, test Accuracy: 9790/10000 (97.9000%)

Epoch: 2,LR: 0.01.
Train set: train Average loss: 0.7195, train_Accuracy: 58680/60000 (97.8000%)

Test set: test Average loss: 0.0826, test Accuracy: 9839/10000 (98.3900%)

Epoch: 3,LR: 0.01.
Train set: train Average loss: 0.5025, train_Accuracy: 59009/60000 (98.3483%)

Test set: test Average loss: 0.0625, test Accuracy: 9871/10000 (98.7100%)

Epoch: 4,LR: 0.01.
Train set: train Average loss: 0.3993, train_Accuracy: 59168/60000 (98.6133%)

Test set: test Average loss: 0.0549, test Accuracy: 9876/10000 (98.7600%)

Epoch: 5,LR: 0.01.
Train set: train Average loss: 0.3342, train_Accuracy: 59306/60000 (98.8433%)

Test set: test Average loss: 0.0422, test Accuracy: 9912/10000 (99.1200%)

Epoch: 6,LR: 0.01.
Train set: train Average loss: 0.2966, train_Accuracy: 59353/60000 (98.9217%)

Test set: test Average loss: 0.0416, 