<a href="https://colab.research.google.com/github/prgawade/battlefieldofAI/blob/main/S4/S4-Assignment-Solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
from __future__ import print_function
#PyTorch is an open source machine learning framework
#PyTorch is a Python package that provides two high-level features:
#Tensor computation (like NumPy) with strong GPU acceleration
#Deep neural networks built on a tape-based autograd system
import torch
# A neural networks library deeply integrated with autograd designed for maximum flexibility
import torch.nn as nn
#Convolution functions such as conv1d , conv2d, conv3d
import torch.nn.functional as F
#torch.optim is a package implementing various optimization algorithms.
# Adam - Implements Adam algorithm
# SGD Implements stochastic gradient descent (optionally with momentum).
import torch.optim as optim
# The torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision.
# Transforms are common image transformations available in the torchvision.transforms module. 
from torchvision import datasets, transforms

In [28]:
import torch.nn as nn
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=128,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.BatchNorm2d(128),
            nn.Dropout(0.1),
            nn.Conv2d(128, 8, 1, 1, 1),
            nn.Conv2d(8, 16, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(0.1),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 16, 5, 1, 2),     
            nn.ReLU(), 
            nn.BatchNorm2d(16),
            nn.Dropout(0.1),
            nn.MaxPool2d(kernel_size=2), 
            nn.Conv2d(16, 16, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(0.1),
            nn.Conv2d(16, 8, 1, 1, 1)              
        )

        self.out = nn.Sequential(
            nn.Linear(8 * 8 * 8, 10),                 
        )
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        ########### Applying global average pooling ########################
        x = F.adaptive_avg_pool2d(x, (8, 8))
        # flatten the output of conv2 to (batch_size, 8 * 8 * 8)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return  F.log_softmax(output)   # return x for visualization

In [29]:
!pip install torchsummary
#API to view the visualization of the model, which is helpful while debugging your network.
from torchsummary import summary
# Returns a bool indicating if CUDA is currently available.
# NVIDIA’s CUDA is a general purpose parallel computing platform and programming model that accelerates deep learning and other compute-intensive apps by taking advantage of the parallel processing power of GPUs.
use_cuda = torch.cuda.is_available()
# A torch.device is an object representing the device on which a torch.Tensor is or will be allocated.
device = torch.device("cuda" if use_cuda else "cpu")
# Specify which device to use
model = Net().to(device)
# Model summary
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 28, 28]           3,328
              ReLU-2          [-1, 128, 28, 28]               0
       BatchNorm2d-3          [-1, 128, 28, 28]             256
           Dropout-4          [-1, 128, 28, 28]               0
            Conv2d-5            [-1, 8, 30, 30]           1,032
            Conv2d-6           [-1, 16, 30, 30]           1,168
              ReLU-7           [-1, 16, 30, 30]               0
       BatchNorm2d-8           [-1, 16, 30, 30]              32
           Dropout-9           [-1, 16, 30, 30]               0
        MaxPool2d-10           [-1, 16, 15, 15]               0
           Conv2d-11           [-1, 16, 15, 15]           6,416
             ReLU-12           [-1, 16, 15, 15]               0
    

  return  F.log_softmax(output)   # return x for visualization


In [30]:

#Sets the seed for generating random numbers. Returns a torch.Generator object.

torch.manual_seed(1)
#batch_size = 128
batch_size = 128
#PyTorch's DataLoader class, which in addition to our Dataset class, also takes in the following important arguments:

#batch_size, which denotes the number of samples contained in each generated batch.
#shuffle. If set to True, we will get a new order of exploration at each pass (or just keep a linear exploration scheme otherwise). Shuffling the order in which examples are fed to the classifier is helpful so that batches between epochs do not look alike. Doing so will eventually make our model more robust.
#num_workers, which denotes the number of processes that generate batches in parallel. A high enough number of workers assures that CPU computations are efficiently managed, i.e. that the bottleneck is indeed the neural network's forward and backward operations on the GPU (and not data generation).
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
# Download training data 
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, 
                    transform=transforms.Compose([ # Composes several transforms together.
                        transforms.ToTensor(), # Convert a PIL Image or numpy.ndarray to tensor.
                        transforms.Normalize((0.1307,), (0.3081,)) # Normalize(mean, std[, inplace]) Normalize a tensor image with mean and standard deviation.
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [31]:
from tqdm import tqdm
# model defined above
# device in this case will cpu
# train loader to download training images and transform into tensor 
# SGD optimizer
# The learning rate controls how quickly the model is adapted to the problem. Smaller learning rates require more training epochs given the smaller changes made to the weights each update, whereas larger learning rates result in rapid changes and require fewer training epochs.
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # Sets the gradients of all optimized torch.Tensor s to zero.
        output = model(data)
        # loss function
        loss = F.nll_loss(output, target) # loss function - The negative log likelihood loss.
        loss.backward() 
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [32]:

model = Net().to(device)

#torch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False)
#params (iterable) – iterable of parameters to optimize or dicts defining parameter groups

#lr (float) – learning rate

#momentum (float, optional) – momentum factor (default: 0)

#weight_decay (float, optional) – weight decay (L2 penalty) (default: 0)

#dampening (float, optional) – dampening for momentum (default: 0)

#nesterov (bool, optional) – enables Nesterov momentum (default: False)

#maximize (bool, optional) – maximize the params based on the objective, instead of minimizing (default: False)

#foreach (bool, optional) – whether foreach implementation of optimizer is used (default: None)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
epochs = 20
for epoch in range(1, epochs + 1):
    print('epoch value is , ', epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)



epoch value is ,  1


  return  F.log_softmax(output)   # return x for visualization
loss=0.1726485937833786 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 23.06it/s]



Test set: Average loss: 0.0986, Accuracy: 9691/10000 (97%)

epoch value is ,  2


loss=0.02403418719768524 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.96it/s]



Test set: Average loss: 0.0323, Accuracy: 9902/10000 (99%)

epoch value is ,  3


loss=0.027550719678401947 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.89it/s]



Test set: Average loss: 0.0283, Accuracy: 9906/10000 (99%)

epoch value is ,  4


loss=0.0842796340584755 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.03it/s]



Test set: Average loss: 0.0300, Accuracy: 9896/10000 (99%)

epoch value is ,  5


loss=0.0314621664583683 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.06it/s]



Test set: Average loss: 0.0230, Accuracy: 9921/10000 (99%)

epoch value is ,  6


loss=0.02799431048333645 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.02it/s]



Test set: Average loss: 0.0279, Accuracy: 9908/10000 (99%)

epoch value is ,  7


loss=0.0137522853910923 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.93it/s]



Test set: Average loss: 0.0231, Accuracy: 9929/10000 (99%)

epoch value is ,  8


loss=0.01810109056532383 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.68it/s]



Test set: Average loss: 0.0233, Accuracy: 9930/10000 (99%)

epoch value is ,  9


loss=0.09114695340394974 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.34it/s]



Test set: Average loss: 0.0220, Accuracy: 9927/10000 (99%)

epoch value is ,  10


loss=0.036803558468818665 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.68it/s]



Test set: Average loss: 0.0207, Accuracy: 9937/10000 (99%)

epoch value is ,  11


loss=0.03330874443054199 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.15it/s]



Test set: Average loss: 0.0182, Accuracy: 9944/10000 (99%)

epoch value is ,  12


loss=0.026055367663502693 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.44it/s]



Test set: Average loss: 0.0239, Accuracy: 9928/10000 (99%)

epoch value is ,  13


loss=0.004545198287814856 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.58it/s]



Test set: Average loss: 0.0180, Accuracy: 9938/10000 (99%)

epoch value is ,  14


loss=0.004932833835482597 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.66it/s]



Test set: Average loss: 0.0225, Accuracy: 9932/10000 (99%)

epoch value is ,  15


loss=0.0004928396665491164 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.76it/s]



Test set: Average loss: 0.0182, Accuracy: 9944/10000 (99%)

epoch value is ,  16


loss=0.0180299561470747 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.71it/s]



Test set: Average loss: 0.0176, Accuracy: 9948/10000 (99%)

epoch value is ,  17


loss=0.0023433775641024113 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.64it/s]



Test set: Average loss: 0.0194, Accuracy: 9947/10000 (99%)

epoch value is ,  18


loss=0.003397254506126046 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.86it/s]



Test set: Average loss: 0.0175, Accuracy: 9941/10000 (99%)

epoch value is ,  19


loss=0.0004057574551552534 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.80it/s]



Test set: Average loss: 0.0185, Accuracy: 9946/10000 (99%)

epoch value is ,  20


loss=0.00013964359823148698 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.04it/s]



Test set: Average loss: 0.0208, Accuracy: 9934/10000 (99%)

