<a href="https://colab.research.google.com/github/prgawade/battlefieldofAI/blob/main/EVA4_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import print_function
#PyTorch is an open source machine learning framework
#PyTorch is a Python package that provides two high-level features:
#Tensor computation (like NumPy) with strong GPU acceleration
#Deep neural networks built on a tape-based autograd system
import torch
# A neural networks library deeply integrated with autograd designed for maximum flexibility
import torch.nn as nn
#Convolution functions such as conv1d , conv2d, conv3d
import torch.nn.functional as F
#torch.optim is a package implementing various optimization algorithms.
# Adam - Implements Adam algorithm
# SGD Implements stochastic gradient descent (optionally with momentum).
import torch.optim as optim
# The torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision.
# Transforms are common image transformations available in the torchvision.transforms module. 
from torchvision import datasets, transforms

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # torch.nn.Conv2d Applies a 2D convolution over an input signal composed of several input planes.
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
        # input channel is 1 , output channels 32 , number of kernels is 32, size of kernel is 3 X 3
        #28 X 28 X 1 | (3 X 3 X 1) X 32 | 26 X 26 X 32
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input - 28 X 28 X 1 # Output - 28 X 28 X 32 # output  RF - 3 
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1) #input - 28 X 28 X 32 # Output - 28 X 28 X 64 # output  RF - 9
        self.pool1 = nn.MaxPool2d(2, 2) #input - 28 X 28 X 64 # Output - 14 X 14 X 64 # output  RF 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1) # input 14 X 14 X 64 # Output - 14 X 14 X 128 # output  RF
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1) # input 14 X 14 X 128 # Output - 14 X 14 X 256 # output  RF
        self.pool2 = nn.MaxPool2d(2, 2) # input 14 X 14 X 256 # Output - 7 X 7 X 256 # output  RF
        self.conv5 = nn.Conv2d(256, 512, 3) # input 7 X 7 X 256 # Output - 5 X 5 X 512 # output  RF
        self.conv6 = nn.Conv2d(512, 1024, 3)# input 5 X 5 X 512 # Output - 3 X 3 X 1024 # output  RF
        self.conv7 = nn.Conv2d(1024, 10, 3) # input 3 X 3 X 1024 # Output - 1 X 1 X 10 # output  RF

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
!pip install torchsummary
#API to view the visualization of the model, which is helpful while debugging your network.
from torchsummary import summary
# Returns a bool indicating if CUDA is currently available.
# NVIDIA’s CUDA is a general purpose parallel computing platform and programming model that accelerates deep learning and other compute-intensive apps by taking advantage of the parallel processing power of GPUs.
use_cuda = torch.cuda.is_available()
# A torch.device is an object representing the device on which a torch.Tensor is or will be allocated.
device = torch.device("cuda" if use_cuda else "cpu")
# Specify which device to use
model = Net().to(device)
# Model summary
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 

  return F.log_softmax(x)


In [None]:

#Sets the seed for generating random numbers. Returns a torch.Generator object.

torch.manual_seed(1)
batch_size = 128
#PyTorch's DataLoader class, which in addition to our Dataset class, also takes in the following important arguments:

#batch_size, which denotes the number of samples contained in each generated batch.
#shuffle. If set to True, we will get a new order of exploration at each pass (or just keep a linear exploration scheme otherwise). Shuffling the order in which examples are fed to the classifier is helpful so that batches between epochs do not look alike. Doing so will eventually make our model more robust.
#num_workers, which denotes the number of processes that generate batches in parallel. A high enough number of workers assures that CPU computations are efficiently managed, i.e. that the bottleneck is indeed the neural network's forward and backward operations on the GPU (and not data generation).
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
# Download training data 
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, 
                    transform=transforms.Compose([ # Composes several transforms together.
                        transforms.ToTensor(), # Convert a PIL Image or numpy.ndarray to tensor.
                        transforms.Normalize((0.1307,), (0.3081,)) # Normalize(mean, std[, inplace]) Normalize a tensor image with mean and standard deviation.
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [7]:
from tqdm import tqdm
# model defined above
# device in this case will cpu
# train loader to download training images and transform into tensor 
# SGD optimizer
# The learning rate controls how quickly the model is adapted to the problem. Smaller learning rates require more training epochs given the smaller changes made to the weights each update, whereas larger learning rates result in rapid changes and require fewer training epochs.
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # Sets the gradients of all optimized torch.Tensor s to zero.
        output = model(data)
        # loss function
        loss = F.nll_loss(output, target) # loss function - The negative log likelihood loss.
        loss.backward() 
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [15]:

model = Net().to(device)

#torch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False)
#params (iterable) – iterable of parameters to optimize or dicts defining parameter groups

#lr (float) – learning rate

#momentum (float, optional) – momentum factor (default: 0)

#weight_decay (float, optional) – weight decay (L2 penalty) (default: 0)

#dampening (float, optional) – dampening for momentum (default: 0)

#nesterov (bool, optional) – enables Nesterov momentum (default: False)

#maximize (bool, optional) – maximize the params based on the objective, instead of minimizing (default: False)

#foreach (bool, optional) – whether foreach implementation of optimizer is used (default: None)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
epochs = 4
for epoch in range(1, epochs + 1):
    print('epoch value is , ', epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

# Test set: Average loss: 0.3189, Accuracy: 8758/10000 (88%) - lr=0.01, momentum=0.9 , epoch 1
# Test set: Average loss: 0.4987, Accuracy: 7927/10000 (79%) - lr=0.02, momentum=0.9 , epoch 1 
# Test set: Average loss: 1.8482, Accuracy: 3000/10000 (30%) - lr=0.03, momentum=0.9 , epoch 1 
# lr=0.01, momentum=0.9 , epoch 9
# accuracy at epoch 3
#Test set: Average loss: 0.0377, Accuracy: 9874/10000 (99%)


epoch value is ,  1


  return F.log_softmax(x)
loss=0.45001575350761414 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.72it/s]



Test set: Average loss: 0.5257, Accuracy: 7852/10000 (79%)

epoch value is ,  2


loss=0.26906776428222656 batch_id=468: 100%|██████████| 469/469 [00:18<00:00, 24.70it/s]



Test set: Average loss: 0.2764, Accuracy: 8885/10000 (89%)

epoch value is ,  3


loss=0.21987885236740112 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.87it/s]



Test set: Average loss: 0.2649, Accuracy: 8912/10000 (89%)

epoch value is ,  4


loss=0.1741223931312561 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.88it/s]



Test set: Average loss: 0.2600, Accuracy: 8915/10000 (89%)

