<a href="https://colab.research.google.com/github/tapasML/Quiz9/blob/main/MNIST_Session_4_Arch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from datetime import datetime

now = datetime.now()

current_time = now.strftime("%H:%M:%S")
print("Current Time =", current_time)

Current Time = 23:06:35


**Install required packages**

select GPU as device

In [2]:
!pip install torchsummary
from torchsummary import summary
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")



**Define the Network**

Since parameters size is retricted, we can not suddenly expand and reduce channels in a layer, as it hurts learning weights. 
Instead start small (10 channels) and increase uniformly in baby steps.

Using padding in first two blocks, to preserve every pixel of information we got.

---



Block #1:

[Conv-> ReLU-> BatchNorm] -> [Conv-> ReLU-> BatchNorm] -> MaxPool -> Dropout


---


Block #2:

[Conv-> ReLU-> BatchNorm] -> [Conv-> ReLU-> BatchNorm] -> MaxPool -> Dropout


---


Block #3:

[Conv-> GAP]


---


Block #4:

[Flatten -> Log_SoftMax]


In [3]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3,  32, 3, padding=1) 
        self.conv2 = nn.Conv2d(32, 32, 3, padding=1) 
        self.pool1 = nn.MaxPool2d(2, 2)      #16        
        
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv4 = nn.Conv2d(64, 64, 3, padding=1) 
        self.conv5 = nn.Conv2d(64, 64, 3, padding=1) 
        self.pool2 = nn.MaxPool2d(2, 2)   #8
        
            
        self.conv6 = nn.Conv2d(64, 128, 3, padding=0) 
        self.conv7 = nn.Conv2d(128, 128, 3, padding=0)  
        self.conv8 = nn.Conv2d(128, 128, 3, padding=1)                    
        self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4)

        self.batchNorm_1 = nn.BatchNorm2d(32)
        self.batchNorm_2 = nn.BatchNorm2d(32)
        self.batchNorm_3 = nn.BatchNorm2d(64)
        self.batchNorm_4 = nn.BatchNorm2d(64)
        self.batchNorm_5 = nn.BatchNorm2d(64)
        
        self.batchNorm_6 = nn.BatchNorm2d(128)
        self.batchNorm_7 = nn.BatchNorm2d(128)
        self.batchNorm_8 = nn.BatchNorm2d(128)
        self.fc = nn.Linear(128, 10)

      

    def forward(self, x):
        x = self.pool1(self.batchNorm_2(F.relu(self.conv2(self.batchNorm_1(F.relu(self.conv1(x)))))))
        
        y= self.batchNorm_4(F.relu(self.conv4(  self.batchNorm_3(F.relu(self.conv3(x))))))
        x= self.batchNorm_5(F.relu(self.conv5(y)))
        x=  self.pool1(x)
        z = self.batchNorm_7(F.relu(self.conv7(  self.batchNorm_6(F.relu(self.conv6(x))))))
        x = self.batchNorm_8(F.relu(self.conv8(z)))
                      
        x = self.avg_pool(x) 
        x = x.view(-1, 128)
        x = self.fc(x)
        return F.log_softmax(x)

**Instatiate Network**

Assign GPU to network model

Print the model



In [4]:
model = Net().to(device)
summary(model, input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
       BatchNorm2d-2           [-1, 32, 32, 32]              64
            Conv2d-3           [-1, 32, 32, 32]           9,248
       BatchNorm2d-4           [-1, 32, 32, 32]              64
         MaxPool2d-5           [-1, 32, 16, 16]               0
            Conv2d-6           [-1, 64, 16, 16]          18,496
       BatchNorm2d-7           [-1, 64, 16, 16]             128
            Conv2d-8           [-1, 64, 16, 16]          36,928
       BatchNorm2d-9           [-1, 64, 16, 16]             128
           Conv2d-10           [-1, 64, 16, 16]          36,928
      BatchNorm2d-11           [-1, 64, 16, 16]             128
        MaxPool2d-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 6, 6]          73,856
      BatchNorm2d-14            [-1, 12



**Load Train and Test data**

set Block size.

Since we are using BatchNormalization, we should not normalize the data data while loading



In [5]:
import torch
import torchvision
import torchvision.transforms as transforms

torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor()
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Files already downloaded and verified


**Train and Test Network Flow**

Print Logs

In [6]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        tqdm._instances.clear()       


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    tqdm._instances.clear()

**Execute The Network**

Using SGD with learning rate = 0.01 and momentum.

Since majorority of learning is done by 10 epochs, after 10, reduce the learning rate (using a scheduler) so as to reduce overshooting of weights



In [None]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.1)
for epoch in range(1, 25):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

loss=0.9001146554946899 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 31.94it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.9189, Accuracy: 6733/10000 (67.33%)



loss=0.47179263830184937 batch_id=390: 100%|██████████| 391/391 [00:11<00:00, 33.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.7239, Accuracy: 7485/10000 (74.85%)



loss=0.6154791116714478 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 31.90it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.6158, Accuracy: 7894/10000 (78.94%)



loss=0.6148496866226196 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 32.23it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.6073, Accuracy: 7895/10000 (78.95%)



loss=0.3904905617237091 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 31.70it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5440, Accuracy: 8186/10000 (81.86%)



loss=0.35304635763168335 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 32.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5822, Accuracy: 8071/10000 (80.71%)



loss=0.29712095856666565 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 32.24it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5474, Accuracy: 8216/10000 (82.16%)



loss=0.34353044629096985 batch_id=390: 100%|██████████| 391/391 [00:11<00:00, 32.61it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5473, Accuracy: 8311/10000 (83.11%)



loss=0.34219232201576233 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 32.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5693, Accuracy: 8338/10000 (83.38%)



loss=0.23331637680530548 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 31.33it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.5838, Accuracy: 8308/10000 (83.08%)



loss=0.08692455291748047 batch_id=390: 100%|██████████| 391/391 [00:11<00:00, 32.62it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.4924, Accuracy: 8539/10000 (85.39%)



loss=0.042441558092832565 batch_id=390: 100%|██████████| 391/391 [00:12<00:00, 31.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


Test set: Average loss: 0.4982, Accuracy: 8527/10000 (85.27%)



loss=0.03397040441632271 batch_id=335:  86%|████████▌ | 335/391 [00:10<00:01, 30.92it/s]