<a href="https://colab.research.google.com/github/payalbhatia/EVA/blob/master/Pytorch_CLassification_on_MNIST_Dataset_EVA4_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn # create network -- Object Oriented way of doing things.
# The way it works is you first define an nn.Module object,  and then invoke it's 'forward' method to run it
import torch.nn.functional as F  #nn.functional provides some layers / activations in form 
# of functions that can be directly called on the input rather than defining the an object.
import torch.optim as optim # train network
from torchvision import datasets, transforms
#Torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision

**forward function**. This is where you define how your output is computed. This function doesn't need to be explicitly called, and can be run by just calling the nn.Module instance like a function with the input as it's argument.

## ***Custom***

In [0]:
print(torch.__version__)

1.3.1


In [0]:
x = torch.empty([2,3])
print(x)
print(x.shape)

tensor([[2.3348e-36, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])
torch.Size([2, 3])


In [0]:
y = torch.empty(2,3)
print(y)
print(y.shape)

tensor([[2.3346e-36, 0.0000e+00, 4.4842e-44],
        [0.0000e+00,        nan, 0.0000e+00]])
torch.Size([2, 3])


In [0]:
z = torch.zeros(2,3, dtype=torch.long)
print(z)

tensor([[0, 0, 0],
        [0, 0, 0]])


In [0]:
b = torch.tensor([5.09, 3])
print(b)
print(b.shape)

tensor([5.0900, 3.0000])
torch.Size([2])


In [0]:
c = torch.tensor([[[5.09, 3,6], [3.09, 5.98, 8]], 
                  [[8,9, 4.6], [9.9, 11.01, 12.4]], 
                  [[11,48, 41.6], [93.9, 10.04, 15.7]],
                  [[78,91, 54.6], [59.9, 24.01, 31.4]]])
print(c)
print(c.shape)

tensor([[[ 5.0900,  3.0000,  6.0000],
         [ 3.0900,  5.9800,  8.0000]],

        [[ 8.0000,  9.0000,  4.6000],
         [ 9.9000, 11.0100, 12.4000]],

        [[11.0000, 48.0000, 41.6000],
         [93.9000, 10.0400, 15.7000]],

        [[78.0000, 91.0000, 54.6000],
         [59.9000, 24.0100, 31.4000]]])
torch.Size([4, 2, 3])


In [0]:
print(c[:,:, 1])
print(c[:,:, 1].shape)

tensor([[ 3.0000,  5.9800],
        [ 9.0000, 11.0100],
        [48.0000, 10.0400],
        [91.0000, 24.0100]])
torch.Size([4, 2])


*tensor.view() method is used to reshape the tensor.*

In [0]:
x = torch.randn(4, 5)
y = x.view(20)
z = x.view(-1, 2, 2)
print(x, "\n", x.size())
print(y, "\n", y.size())
print(z, "\n", z.size())

tensor([[-0.2729, -0.4808,  0.8522,  0.7713, -1.0865],
        [-0.4579,  0.8608,  0.7706, -0.0243, -0.9568],
        [-0.4691, -0.1337,  0.6014,  0.0589, -0.2400],
        [ 0.1048,  1.4462,  0.8613, -1.8027, -1.0471]]) 
 torch.Size([4, 5])
tensor([-0.2729, -0.4808,  0.8522,  0.7713, -1.0865, -0.4579,  0.8608,  0.7706,
        -0.0243, -0.9568, -0.4691, -0.1337,  0.6014,  0.0589, -0.2400,  0.1048,
         1.4462,  0.8613, -1.8027, -1.0471]) 
 torch.Size([20])
tensor([[[-0.2729, -0.4808],
         [ 0.8522,  0.7713]],

        [[-1.0865, -0.4579],
         [ 0.8608,  0.7706]],

        [[-0.0243, -0.9568],
         [-0.4691, -0.1337]],

        [[ 0.6014,  0.0589],
         [-0.2400,  0.1048]],

        [[ 1.4462,  0.8613],
         [-1.8027, -1.0471]]]) 
 torch.Size([5, 2, 2])


*check the availability of cuda*



In [0]:
torch.cuda.is_available()

True

*Get Id of default device*

In [0]:
torch.cuda.current_device()

0

*Get name device with ID '0'*

In [0]:
torch.cuda.get_device_name(0)

'Tesla T4'

In [0]:
# Create a tensor for CPU
# This will occupy CPU RAM
tensor_cpu = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], device='cpu')
 
# Create a tensor for GPU
# This will occupy GPU RAM
tensor_gpu = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], device='cuda')

In [0]:
tensor_cpu

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

In [0]:
tensor_gpu

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]], device='cuda:0')

 *the current GPU **memory usage** by 
tensors in bytes for a given device* 

In [0]:
torch.cuda.memory_allocated()

512

*the current GPU memory managed by the
caching allocator in bytes for a given device*

In [0]:
torch.cuda.memory_cached()

2097152

*Releases all unoccupied cached memory currently held by
the caching allocator so that those can be used in other
 GPU application*

In [0]:
torch.cuda.empty_cache()


In [0]:
torch.cuda.memory_cached()

2097152

*set up and run CUDA operations*

In [0]:
torch.cuda.get_device_properties(0).total_memory

15812263936

In [0]:
tensor_cpu = tensor_cpu * 5

In [0]:
tensor_cpu

tensor([[ 5., 10.],
        [15., 20.],
        [25., 30.]])

In [0]:
tensor_gpu = tensor_gpu * 5
tensor_gpu

tensor([[ 5., 10.],
        [15., 20.],
        [25., 30.]], device='cuda:0')

In [0]:
print(torch.cuda.memory_allocated())
print(torch.cuda.memory_cached())

1024
2097152


*free inside cache*

In [0]:
t = torch.cuda.get_device_properties(0).total_memory
c = torch.cuda.memory_cached(0)
a = torch.cuda.memory_allocated(0)
f = c-a
print(f)

2096128




```
# Move GPU tensor to CPU
tensor_gpu_cpu = tensor_gpu.to(device='cpu')
 
# Move CPU tensor to GPU
tensor_cpu_gpu = tensor_cpu.to(device='cuda')
```



*This vector is stored on cpu and any operation you do on it will be done on cpu. To transfer it to gpu you just have to do .cuda*

In [0]:
a = torch.FloatTensor([1., 2.]).cuda()
print(a.get_device())
print(a.is_cuda)

0
True


In [0]:
b = torch.FloatTensor([1., 2.])
print(b.get_device())
print(b.is_cuda)

-1
False


## **Code**


In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()# inherit the properties of superclass nn.
        
        #The following attributes are added to the class instance during the initialization
        # The convolutional layers are defined. these are all functions but are defined like attributes

        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF--

        # Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

        #  # Conv Layer 1
        # self.conv1 = nn.Conv2d(
        #     in_channels=in_channels, out_channels=out_channels,
        #     kernel_size=(3, 3), stride=stride, padding=1, bias=False
        # )

        
        # 2nd layer
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

        # Pooling 1
        self.pool1 = nn.MaxPool2d(2, 2)
        #MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

        #3rd layer
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        # Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

        # 4th layer
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        # Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

        #Pooling 2
        self.pool2 = nn.MaxPool2d(2, 2)
        #MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

        #5th layer
        self.conv5 = nn.Conv2d(256, 512, 3)
        # Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))

        #6th layer
        self.conv6 = nn.Conv2d(512, 1024, 3)
        # Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1))

        #7th layer
        self.conv7 = nn.Conv2d(1024, 10, 3)
        # Conv2d(1024, 10, kernel_size=(3, 3), stride=(1, 1))

# """This method defines the forward pass of the neural network"""

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        #     Pooling1(relu(conv2ndlayer(relu(conv1stlayer))))
        # layer with all the  pooling and relu layers: there is a 2X2 max pooling 1
        
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        # Pooling2(relu(conv4thlayer(relu(conv3rdlayer))))

        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        # relu(conv6thlayer(relu(conv5thlayer)))

        x = F.relu(self.conv7(x))
        #relu(conv7thlayer)

        x = x.view(-1, 10)
    # The reshape (flattening) layer which changes the shape of the tensor from that of a multichannel 2D tensor to a 1D tensor
        return F.log_softmax(x) #softmax function

In [0]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
# print(use_cuda)
device = torch.device("cuda" if use_cuda else "cpu")
# print(device)
model = Net().to(device) #created instance of class Net;Move it from CPU to GPU
# print(model)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [0]:


torch.manual_seed(1)#set the seed to a fixed value so that the results will be reproducible.
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
#utils.data-- load training data
# DataLoader is used when you have a large dataset and 
# you want to load data from a Dataset in background 
# so that it’s ready and waiting for the training loop.
#TF.normalize(image, mean, var)
# image = (image - mean(0.1307)) / std(0.3081)
 #image = ((image * std(0.3081)) + mean(0.1307))--to get image back
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([ #clubs all the transforms provided to it
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(), #This  converts your input image to PyTorch tensor.
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 21338891.35it/s]                            


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw


32768it [00:00, 333156.27it/s]
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 5793245.38it/s]                           
8192it [00:00, 133308.01it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


In [0]:
for data, target in train_loader:
    print(data.shape,target.shape)
    break
# data is bascally a grayscaled MNIST image 
# target is the label between 0 and 9

torch.Size([128, 1, 28, 28]) torch.Size([128])


In [0]:
from tqdm import tqdm #print progress in a script
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        # Since the backward() function accumulates gradients, and 
        # we don’t want to mix up gradients between minibatches,
        #  we have to zero them out at the start of a new minibatch. 
      
        output = model(data)
        loss = F.nll_loss(output, target)
        # output is the model prediction(what the model predicted on giving an image/data) 
        # and target is the actual label of the given image.
        loss.backward()
        #  calling tensor.backward() function computes the gradients automatically. 
        #  The gradient for this tensor will accumulate in the tensor.grad atribute
        #The gradients are "stored" by the tensors themselves in parameter.grad 
        #  (they have a grad and a requires_grad attributes) once you call backward() on the loss
        optimizer.step()
        # optimizer.step() makes the optimizer iterate over all
        # parameters (stored in parameter.grad) it is supposed to update and use their internally stored grad to update their values.
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')




def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    #The wrapper "with torch.no_grad()" temporarily set all the requires_grad flag to false
    # i.e.  will make all the operations in the block have no gradients
    with torch.no_grad(): 
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.011, momentum=0.9)

for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

loss=0.6129084229469299 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.85it/s]



Test set: Average loss: 0.5325, Accuracy: 8779/10000 (88%)



# **Model2** --Removing relu from last 3 layers

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(256, 512, 3)
        self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(1024, 10, 3)


    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = self.conv6(self.conv5(x))#removing relu from 5th, 6th layer
        x = self.conv7(x)# removing relu from last layer
        x = x.view(-1, 10)
        return F.log_softmax(x) 

In [0]:

model2 = Net().to(device)
optimizer = optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 2):
    train(model2, device, train_loader, optimizer, epoch)
    test(model2, device, test_loader)



loss=2.301518440246582 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A
loss=2.301518440246582 batch_id=0:   0%|          | 1/469 [00:00<00:59,  7.87it/s][A
loss=2.3019204139709473 batch_id=1:   0%|          | 1/469 [00:00<00:59,  7.87it/s][A
loss=2.3030574321746826 batch_id=2:   0%|          | 1/469 [00:00<00:59,  7.87it/s][A
loss=2.3030574321746826 batch_id=2:   1%|          | 3/469 [00:00<00:50,  9.28it/s][A
loss=2.301675796508789 batch_id=3:   1%|          | 3/469 [00:00<00:50,  9.28it/s] [A
loss=2.299231767654419 batch_id=4:   1%|          | 3/469 [00:00<00:50,  9.28it/s][A
loss=2.299231767654419 batch_id=4:   1%|          | 5/469 [00:00<00:42, 10.89it/s][A
loss=2.2982213497161865 batch_id=5:   1%|          | 5/469 [00:00<00:42, 10.89it/s][A
loss=2.2991578578948975 batch_id=6:   1%|          | 5/469 [00:00<00:42, 10.89it/s][A
loss=2.2925329208374023 batch_id=7:   1%|          | 5/469 [00:00<00:42, 10.89it/s][A
loss=2.2925329208374023 batch_id=7:   2%|▏         | 


Test set: Average loss: 0.0589, Accuracy: 9800/10000 (98%)

