<a href="https://colab.research.google.com/github/nkanungo/EVA6/blob/main/DNN_Pytorch_Architecture_Changes_Step2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Step2 of architecture changes:**

**Target**: In this step I will try to change the vanilla architecture by changing number of parameters within 10,000 . I will reduce number of kernels also remove all the bias parameters by setting bias value to False. I will run for 15 epochs. My expectation is that it may slightly reduce from the validation accuracy achieved in vanilla architecture

**Result**: I have got Train accuracy:98.99   validation accuracy:98.92 Number of parameters: 8,442

**Analysis**: As expected validation accuracy has slightly reduced to 98.92 from the vanilla architecure with accuracy 99.09. This could be becuase I reduced number of kernels. But unlike vanilla architecture, the difference between Train Accuray and Validation accuracy is very less, which means it is not overfitting. Also, number of parameters 8442 is well within my target of 10K parameters

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        dropout_prob=0.1
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 14, 3, padding=1, bias=False),
            nn.ReLU()
        ) # Input=28, Output=28, rf=3

        self.conv2 = nn.Sequential(
            nn.Conv2d(14, 14, 3, padding=1, bias=False),
            nn.ReLU()
        ) # Input=28, Output=28, rf=5

        self.pool1= nn.MaxPool2d(2, 2) # Input=28, Output=14, rf=6

        self.conv3 = nn.Sequential(
            nn.Conv2d(14, 14, 3, padding=1, bias=False),
            nn.ReLU()
        ) # Input=14, Output=14, rf=10
        self.conv4 = nn.Sequential(
            nn.Conv2d(14, 14, 3, padding=1, bias=False),
            nn.ReLU()
        ) # Input=14, Output=14, rf=14

        self.pool2= nn.MaxPool2d(2, 2) # Input=14, Output=7, rf=16

        self.conv5 = nn.Sequential(
            nn.Conv2d(14, 14, 3, padding=0, bias=False),
            nn.ReLU(),
        ) # Input=7, Output=5, rf=24
    
        self.conv6 = nn.Sequential(
            nn.Conv2d(14, 10, 3, padding=0, bias=False),
        ) # Input=5, Output=3, rf=32

        self.global_avgpool = nn.AdaptiveAvgPool2d(1)  # Input=3, Output=1, rf=40
      
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.conv4(x)

        x = self.pool2(x)
        x = self.conv5(x)
        x = self.conv6(x)
               
        x = self.global_avgpool(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 14, 28, 28]             126
              ReLU-2           [-1, 14, 28, 28]               0
            Conv2d-3           [-1, 14, 28, 28]           1,764
              ReLU-4           [-1, 14, 28, 28]               0
         MaxPool2d-5           [-1, 14, 14, 14]               0
            Conv2d-6           [-1, 14, 14, 14]           1,764
              ReLU-7           [-1, 14, 14, 14]               0
            Conv2d-8           [-1, 14, 14, 14]           1,764
              ReLU-9           [-1, 14, 14, 14]               0
        MaxPool2d-10             [-1, 14, 7, 7]               0
           Conv2d-11             [-1, 14, 5, 5]           1,764
             ReLU-12             [-1, 14, 5, 5]               0
           Conv2d-13             [-1, 10, 3, 3]           1,260
AdaptiveAvgPool2d-14             [-1, 1



In [None]:


#torch.manual_seed(11)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([                                    
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [None]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

In [None]:
from torch.optim.lr_scheduler import StepLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)   

  0%|          | 0/469 [00:00<?, ?it/s]

EPOCH: 0


Loss=0.36412978172302246 Batch_id=468 Accuracy=58.63: 100%|██████████| 469/469 [00:10<00:00, 43.47it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1614, Accuracy: 9501/10000 (95.01%)

EPOCH: 1


Loss=0.07658711075782776 Batch_id=468 Accuracy=95.41: 100%|██████████| 469/469 [00:09<00:00, 47.67it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1216, Accuracy: 9613/10000 (96.13%)

EPOCH: 2


Loss=0.04459930956363678 Batch_id=468 Accuracy=96.87: 100%|██████████| 469/469 [00:10<00:00, 46.42it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0697, Accuracy: 9770/10000 (97.70%)

EPOCH: 3


Loss=0.0222440417855978 Batch_id=468 Accuracy=97.42: 100%|██████████| 469/469 [00:09<00:00, 46.93it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0801, Accuracy: 9740/10000 (97.40%)

EPOCH: 4


Loss=0.08626339584589005 Batch_id=468 Accuracy=97.88: 100%|██████████| 469/469 [00:09<00:00, 47.30it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0505, Accuracy: 9832/10000 (98.32%)

EPOCH: 5


Loss=0.023888519033789635 Batch_id=468 Accuracy=98.18: 100%|██████████| 469/469 [00:10<00:00, 46.39it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0491, Accuracy: 9823/10000 (98.23%)

EPOCH: 6


Loss=0.08451653271913528 Batch_id=468 Accuracy=98.31: 100%|██████████| 469/469 [00:09<00:00, 47.71it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0404, Accuracy: 9851/10000 (98.51%)

EPOCH: 7


Loss=0.011897970922291279 Batch_id=468 Accuracy=98.52: 100%|██████████| 469/469 [00:10<00:00, 45.08it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0443, Accuracy: 9855/10000 (98.55%)

EPOCH: 8


Loss=0.010737155564129353 Batch_id=468 Accuracy=98.62: 100%|██████████| 469/469 [00:10<00:00, 46.48it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0437, Accuracy: 9849/10000 (98.49%)

EPOCH: 9


Loss=0.056621845811605453 Batch_id=468 Accuracy=98.70: 100%|██████████| 469/469 [00:09<00:00, 47.35it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0362, Accuracy: 9869/10000 (98.69%)

EPOCH: 10


Loss=0.06341630965471268 Batch_id=468 Accuracy=98.81: 100%|██████████| 469/469 [00:09<00:00, 46.92it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0337, Accuracy: 9885/10000 (98.85%)

EPOCH: 11


Loss=0.02271348237991333 Batch_id=468 Accuracy=98.83: 100%|██████████| 469/469 [00:10<00:00, 46.59it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0351, Accuracy: 9880/10000 (98.80%)

EPOCH: 12


Loss=0.07799442857503891 Batch_id=468 Accuracy=98.89: 100%|██████████| 469/469 [00:09<00:00, 46.93it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0329, Accuracy: 9878/10000 (98.78%)

EPOCH: 13


Loss=0.021052902564406395 Batch_id=468 Accuracy=98.91: 100%|██████████| 469/469 [00:09<00:00, 47.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0350, Accuracy: 9877/10000 (98.77%)

EPOCH: 14


Loss=0.020980238914489746 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:10<00:00, 45.64it/s]



Test set: Average loss: 0.0315, Accuracy: 9892/10000 (98.92%)

