<a href="https://colab.research.google.com/github/bhargav23/Dataset/blob/master/Copy_of_EVA4_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input - 28x28 Output 28x28x32 RF 3
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1) #input 28x28 output 28*28*64 RF 5
        self.pool1 = nn.MaxPool2d(2, 2) # input 28x28 output 14*14*64 RF 6
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1) #input 14*14 output 14*14*128 RF 10
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1) #input 14*14 op 14*14*256 rf 14
        self.pool2 = nn.MaxPool2d(2, 2) # input 14*14 op 7*7*256 rf 16
        self.conv5 = nn.Conv2d(256, 512, 3) #input 7*7 op 5*5 rf 24
        self.conv6 = nn.Conv2d(512, 1024, 3) #input 5*5 op 3*3 rf 32
        self.conv7 = nn.Conv2d(1024, 10, 3) #ip 3*3 op 1*1 rf 40 nn.AvgPool2D
        

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available() #Returns a bool indicating if CUDA is currently available.
device = torch.device("cuda" if use_cuda else "cpu") 
#A torch.device is an object representing the device on which a torch.Tensor is or will be allocated.
# https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device
model = Net().to(device) #It moves the model weights to GPU.
summary(model, input_size=(1, 28, 28))



RuntimeError: ignored

In [0]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([ #Composes several transforms together.
                        transforms.ToTensor(), #ToTensor() works for the image, whose elements are in range 0 to 255. You can write your custom Transforms to suit your needs.
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(), 
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

'''
Normalizing the data helps the network converge.

If it’s not normalized each image will not be on the same scale, some images will induce bigger errors, other less errors.

Everything error will be added to the gradient with the same weight and backpropagated. 
Weight corrections will be overestimated for some images and underestimated on others.

At worst your optimizer will not find a way to minimize your loss due to oscillating too much, 
probably it will converge but will be slower, at best there won’t be any difference.
'''


'\nNormalizing the data helps the network converge.\n\nIf it’s not normalized each image will not be on the same scale, some images will induce bigger errors, other less errors.\n\nEverything error will be added to the gradient with the same weight and backpropagated. \nWeight corrections will be overestimated for some images and underestimated on others.\n\nAt worst your optimizer will not find a way to minimize your loss due to oscillating too much, \nprobably it will converge but will be slower, at best there won’t be any difference.\n'

In [0]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train() #Sets module in training mode
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar): # If you want access to the index of each element within the body of a loop, use the built-in enumerate function:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # Clears the gradients of all optimized torch.Tensor s.
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

#All optimizers implement a step() method, that updates the parameters. It can be used in two ways:

#optimizer.step()
#This is a simplified version supported by most optimizers. The function can be called once the gradients are computed using e.g. backward().

def test(model, device, test_loader):
    model.eval() #Sets module in evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
'''
model.eval() will notify all your layers that you are in eval mode, that way, 
batchnorm or dropout layers will work in eval mode instead of training mode.

torch.no_grad() impacts the autograd engine and deactivate it. 
It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).
'''



'\nmodel.eval() will notify all your layers that you are in eval mode, that way, \nbatchnorm or dropout layers will work in eval mode instead of training mode.\n\ntorch.no_grad() impacts the autograd engine and deactivate it. \nIt will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).\n'

In [0]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# model.parameters() Returns an iterator over module parameters.
#This is typically passed to an optimizer.

for epoch in range(1,2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)




ValueError: ignored