<a href="https://colab.research.google.com/github/parth-mango/EVA5-Assignments/blob/main/EVA_A5F2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Target
  * Using Batchnorm to improve the performance

#Result
  * Parameters - 19,942
  * Performance - 99.12%

#Analysis
  * There is some over fitting in the model
  * We should add dropout to improve that
  * We have to reduce the no of parameters below 10k


In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
def calc_rf(jin, k, rin):
    rout = rin + ((k-1) * jin)
    return rout

calc_rf(4,2,28)

32

In [None]:
 #Model paramters: 19942

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(1, 4, 3, padding=1),    # 28x28x1 > 28x28x4   : RF 3x3
            nn.ReLU(),
            nn.BatchNorm2d(4),
            nn.Conv2d(4, 8, 3, padding=1),    # 28x28x4 > 28x28x8   : RF 5x5
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 12, 3, padding=1),   # 28x28x8 > 28x28x12  : RF 7x7
            nn.ReLU(),
            nn.BatchNorm2d(12),
            nn.Conv2d(12, 16, 3, padding=1),  # 28x28x12 > 28x28x16 : RF 9x9
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2,2),                # 28x28x16 > 14x14x16 : RF 10x10
            nn.Conv2d(16, 32, 3, padding=1),  # 14x14x16 > 14x14x32 : RF 14x14
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 3, padding=1),  # 14x14x32 > 14x14x32 : RF 18x18
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2,2),                # 14x14x32 > 7x7x32   : RF 20x20
            nn.Conv2d(32, 10, 3),             # 7x7x32 > 5x5x10     : RF 28x28
            nn.AvgPool2d(5, 2),               # 5x5x10 > 1x1x10     : RF 32x32
        )
 
    def forward(self, x):
        x = self.conv(x)
        
        x = x.view(-1, 10)
        x = F.log_softmax(x, dim=1)
        return x

In [None]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 28, 28]              40
              ReLU-2            [-1, 4, 28, 28]               0
       BatchNorm2d-3            [-1, 4, 28, 28]               8
            Conv2d-4            [-1, 8, 28, 28]             296
              ReLU-5            [-1, 8, 28, 28]               0
       BatchNorm2d-6            [-1, 8, 28, 28]              16
            Conv2d-7           [-1, 12, 28, 28]             876
              ReLU-8           [-1, 12, 28, 28]               0
       BatchNorm2d-9           [-1, 12, 28, 28]              24
           Conv2d-10           [-1, 16, 28, 28]           1,744
             ReLU-11           [-1, 16, 28, 28]               0
      BatchNorm2d-12           [-1, 16, 28, 28]              32
        MaxPool2d-13           [-1, 16, 14, 14]               0
           Conv2d-14           [-1, 32,

In [None]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [None]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    y_pred = model(data)
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)
    loss.backward()
    optimizer.step()
    pred = y_pred.argmax(dim=1, keepdim=True)  
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)
    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))
    return test_loss

In [None]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


for epoch in range(1, 16):
    train(model, device, train_loader, optimizer, epoch)
    test_loss = test(model, device, test_loader)
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))


Loss=0.03528311103582382 Batch_id=468 Accuracy=92.68: 100%|██████████| 469/469 [00:16<00:00, 28.34it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0800, Accuracy: 9766/10000 (97.66%)

Epoch-1 lr: 0.01


Loss=0.024119079113006592 Batch_id=468 Accuracy=98.42: 100%|██████████| 469/469 [00:16<00:00, 28.43it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0643, Accuracy: 9811/10000 (98.11%)

Epoch-2 lr: 0.01


Loss=0.03214762359857559 Batch_id=468 Accuracy=98.84: 100%|██████████| 469/469 [00:16<00:00, 28.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0388, Accuracy: 9888/10000 (98.88%)

Epoch-3 lr: 0.01


Loss=0.014667770825326443 Batch_id=468 Accuracy=99.06: 100%|██████████| 469/469 [00:16<00:00, 28.66it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0398, Accuracy: 9882/10000 (98.82%)

Epoch-4 lr: 0.01


Loss=0.016438618302345276 Batch_id=468 Accuracy=99.21: 100%|██████████| 469/469 [00:16<00:00, 28.74it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0305, Accuracy: 9907/10000 (99.07%)

Epoch-5 lr: 0.01


Loss=0.08460652828216553 Batch_id=468 Accuracy=99.33: 100%|██████████| 469/469 [00:16<00:00, 29.17it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0283, Accuracy: 9906/10000 (99.06%)

Epoch-6 lr: 0.01


Loss=0.00478768115863204 Batch_id=468 Accuracy=99.43: 100%|██████████| 469/469 [00:16<00:00, 28.78it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0258, Accuracy: 9918/10000 (99.18%)

Epoch-7 lr: 0.01


Loss=0.0032646760810166597 Batch_id=468 Accuracy=99.55: 100%|██████████| 469/469 [00:16<00:00, 29.05it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0271, Accuracy: 9912/10000 (99.12%)

Epoch-8 lr: 0.01


Loss=0.00458310404792428 Batch_id=468 Accuracy=99.54: 100%|██████████| 469/469 [00:15<00:00, 29.62it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0293, Accuracy: 9897/10000 (98.97%)

Epoch-9 lr: 0.01


Loss=0.039331234991550446 Batch_id=468 Accuracy=99.62: 100%|██████████| 469/469 [00:15<00:00, 29.33it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0239, Accuracy: 9926/10000 (99.26%)

Epoch-10 lr: 0.01


Loss=0.07034050673246384 Batch_id=468 Accuracy=99.72: 100%|██████████| 469/469 [00:15<00:00, 29.85it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0289, Accuracy: 9910/10000 (99.10%)

Epoch-11 lr: 0.01


Loss=0.003006775863468647 Batch_id=468 Accuracy=99.78: 100%|██████████| 469/469 [00:15<00:00, 29.81it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0274, Accuracy: 9912/10000 (99.12%)

Epoch-12 lr: 0.01


Loss=0.0029718782752752304 Batch_id=468 Accuracy=99.79: 100%|██████████| 469/469 [00:15<00:00, 30.19it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0312, Accuracy: 9898/10000 (98.98%)

Epoch-13 lr: 0.01


Loss=0.0018994067795574665 Batch_id=468 Accuracy=99.84: 100%|██████████| 469/469 [00:15<00:00, 29.74it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0266, Accuracy: 9921/10000 (99.21%)

Epoch-14 lr: 0.01


Loss=0.002032920252531767 Batch_id=468 Accuracy=99.92: 100%|██████████| 469/469 [00:15<00:00, 29.81it/s]



Test set: Average loss: 0.0305, Accuracy: 9907/10000 (99.07%)

Epoch-15 lr: 0.01
