<a href="https://colab.research.google.com/github/navrat/EVA_phase1_2022_23/blob/main/EVA4_Session_2_NaveenR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch # torch is a library to perform numerical computations. Torch provides a wide range of functions for working with arrays, including mathematical operations, linear algebra, and random number generation. It also has many features specifically designed for working with neural networks, such as tools for defining and training models, and algorithms for optimization and regularization 
import torch.nn as nn # torch.nn provides different classes within that make up the individidual components of a neural network like layers, containers. every module in torch subclasses the nn module
import torch.nn.functional as F # Module that contains a host of stateless functions to create neural networks. One of the main advantages of using torch.nn.functional is that it allows users to easily define and apply custom functions to their data, without having to create and train a full neural network model
import torch.optim as optim # torch.optim is a PyTorch package containing various optimization algorithms. Most commonly used methods for optimizers are already supported and custom optimizers can be built into it as well.
from torchvision import datasets, transforms # The TorchVision datasets subpackage is a convenient utility for accessing well-known public image and video datasets. It contains methods like __getitem__ and __len__ since this module and its datasets are subclasses of torch.utils.data.Datasets. Transforms are common image transformations. They can be chained together using Compose. Additionally, there is the torchvision.transforms.functional module. Functional transforms give fine-grained control over the transformations. This is useful if you have to build a more complex transformation pipeline (like for segmentation tasks).


In [8]:
class Net(nn.Module): # defines the class Net by inheriting from nn.Module 
    def __init__(self): # Constructors are used to initializing the object’s state. The task of constructors is to initialize(assign values) to the data members of the class when an object of the class is created. Like methods, a constructor also contains a collection of statements(i.e. instructions) that are executed at the time of Object creation. It is run as soon as an object of a class is instantiated. The method is useful to do any initialization you want to do with your object. Keyword self represents the instance of a class and binds the attributes with the given arguments
        super(Net, self).__init__() # In an inherited subclass, a parent class can be referred with the use of the super() function. The super function returns a temporary object of the superclass that allows access to all of its methods to its child class.
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) # convolution layer with 32 of 3*3 kernels. 1*28*28 (1*30*30 with padding) - 32*28*28 - RF = 3*3
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1) # convolution layer with 64 of 3*3 kernels.  32*28*28 (1*30*30 with padding) - 64*28*28 - RF = 5*5
        self.pool1 = nn.MaxPool2d(2, 2) # max pooling using a 2*2 kernel. 64*28*28 - 64*14*14 - RF = 10*10
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1) # convolution layer with 128 of 3*3 kernels.  64*14*14 (64*16*16 with padding) - 128*14*14 - RF = 12*12
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1) # convolution layer with 256 of 3*3 kernels.  128*14*14 (128*16*16 with padding) - 256*14*14 - RF = 14*14
        self.pool2 = nn.MaxPool2d(2, 2) # max pooling using a 2*2 kernel. 256*14*14 - 256*7*7 - RF = 28*28
        self.conv5 = nn.Conv2d(256, 512, 3)# convolution layer with 512 of 3*3 kernels.  256*7*7 - 512*5*5 - RF = 30*30 (larger than image size?)
        self.conv6 = nn.Conv2d(512, 1024, 3)# convolution layer with 1024 of 3*3 kernels.  512*5*5 - 1024*3*3 - RF = 32*32 (larger than image size?)
        self.conv7 = nn.Conv2d(1024, 10, 3)# convolution layer with 10 of 3*3 kernels.  1024*3*3 - 10*1*1 - RF = 34*34 (larger than image size?)
        self.lin = nn.Linear(10,10)
    def forward(self, x): # module to implement the sequential neural network
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x))))) # sequence of conv1 taking input imagge and output activated by rely, passes to conv2 activated by relu and pooled. 
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x))))) # sequence of taking above output and passing to conv3 and output activated by rely, passes to conv4 activated by relu and pooled. 
        x = F.relu(self.conv6(F.relu(self.conv5(x)))) # sequence of taking above output and passing to conv5 and output activated by relu. passes to conv6 and activated by relu.
        x = F.relu(self.conv7(x))# sequence of taking above output and passing to conv7 and output activated by relu
        x = self.lin(x.view(-1, 10)) # flattening the output to 1-d (original vs. addition of lin)
        return F.log_softmax(x) # return the logged softmax of the 1-d

In [9]:
# !pip install torchsummary
# from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
           Linear-10                   [-1, 10]             110
Total params: 6,379,896
Trainable params: 6,379,896
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estima

  return F.log_softmax(x) # return the logged softmax of the 1-d


In [10]:


torch.manual_seed(1) # setting seed for reproducibility oninitializations and optimizer tasks
batch_size = 128 # number of images used in each batch to train the network

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, # loading the mnist train dataset
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs) # shuffling randomly selected images in each train batch
test_loader = torch.utils.data.DataLoader( # loading the mnist test dataset
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs) # shuffling randomly selected images in each test batch


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [11]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [12]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # learning rate and momentum based stochastic gradient descent

for epoch in range(1, 2): # model trained for 1 epochs (1 pass of full data)
    train(model, device, train_loader, optimizer, epoch) # trained on train data with model defined before
    test(model, device, test_loader) # scored on test for accuracy

  return F.log_softmax(x) # return the logged softmax of the 1-d
loss=0.10277990251779556 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 23.37it/s]



Test set: Average loss: 0.0856, Accuracy: 9725/10000 (97%)



# controlled for batch size

batch - loss - accuracy

128 - 1.9 - 28%

64 - 1.866 - 29% 

32 - 1.8658  - 29% 

16 - 1.454 - 48%

controlled for learning rate

at 0.001

batch - loss - accuracy

128 - 2.17 - 19%

32 - 1.8995 - 28%

16 - 1.27 - 48%