In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np

#### dataset

In [2]:
import torchvision 

import torchvision.transforms as transforms 

In [3]:
trainset = torchvision.datasets.CIFAR10(root='../data_place/cifar', 
                                        train=True, 
                                        download=False, 
                                        transform=transforms.ToTensor())


In [4]:
class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# in the dataset this will be in numbers.. class 0,1,2,.. 

#### dataloader

In [5]:
trainloader = torch.utils.data.DataLoader(trainset, #dataset
                                          batch_size=4, 
                                          shuffle=True)

# shuffle = True => they are given randomly.
# if we don't shuffle then sometimes the whole training course become repetitive.

In [6]:
# iterator for dataloader
data_iter = iter(trainloader)

In [7]:
# one batch
images, labels = next(data_iter)

In [8]:
import torch.nn as nn

#### super.__init__()    

inheriting - child class will have all that of parent. If something of same name is made in the child class - it overrides the one in the parent class. In such a case - if we still want parent stuff - then called super.that() inside the definition in child.  

why super(class_name, self) -> this is python2 way. works in python3 also.




### LeNet - model

In [9]:
class LeNet(nn.Module):
    
    def __init__(self): 
        super().__init__()

        # make as two parts - a convoln part and a fully connected part
        # as we need a flattening operation in between

        self.cnn_block = nn.Sequential(
            nn.Conv2d(3, 6, 5),         # (N, 3, 32, 32) -> (N,  6, 28, 28)
            nn.Tanh(),
            nn.AvgPool2d(2, stride=2),  # (N, 6, 28, 28) -> (N,  6, 14, 14)
            nn.Conv2d(6, 16, 5),        # (N, 6, 14, 14) -> (N, 16, 10, 10)  
            nn.Tanh(),
            nn.AvgPool2d(2, stride=2)   # (N,16, 10, 10) -> (N, 16, 5, 5)
        )

        # FC part on flattened version of ouput of Convoln part.
        self.fc_block = nn.Sequential(
            nn.Linear(400,120),         # (N, 400) -> (N, 120)
            nn.Tanh(),
            nn.Linear(120,84),          # (N, 120) -> (N, 84)
            nn.Tanh(),
            nn.Linear(84,10)            # (N, 84)  -> (N, 10)
        )
        
    def forward(self, XX):
        # print(XX.shape)
        XX = self.cnn_block(XX)
        # print(XX.shape)

        XX = XX.view( XX.size(0), -1 ) # (N, everything_else) 
        # flatten using reshaping
        # keep same dimension for first index(here, N)  
        # everything else collapsed into one second index(with -1).
        # print(XX.shape)

        XX = self.fc_block(XX)
        # print(XX.shape)
        return XX

using some clever application of lambda function,.. we can move the flattening operation also into a sequential description - thereby write the whole n/w in a single sequential.

but this is **not** recommended. - stack only trivial things in sequential.

not using softmax in the last layer. - directly sending output of linear model. 

for inference - ie , given an image get class - softmax is not essential.  
having the final values of all neurons (linear) - max of those - corrsponding class.

softmax is a monotonic function - max value in i/p remain the max value after softmax also. 

softmax - need is to get a prob dist - so that we can use it to get cross entropy loss. the loss fn here is cross entropy itself.

thus softmax is there in the loss - and hence in the gradient computation (backward pass)

but not really needed for inference. - can do it with the direct(linear) outputs of neurons itself.

'linear' -> the neuron output is the linear combination itself - not activation on top of it. (or in other words the activation fn is y=x)


**softmax in BP**  

included in the cross entropy loss function definition(nn.CrossEntropyLoss())  
  

- Thus not included in inference. but comes in training through the cross entropy loss definition (thus, not included in the n/w definition part.)

#### inference

In [10]:
lenet_model = LeNet()
out = lenet_model(images)

In [11]:
print(out)

tensor([[-0.0619,  0.0635,  0.0217,  0.0890,  0.0754, -0.0188,  0.0037, -0.0875,
         -0.0802,  0.0213],
        [-0.0591,  0.0777,  0.0317,  0.0925,  0.0928, -0.0116, -0.0075, -0.0704,
         -0.0902,  0.0196],
        [-0.0589,  0.0703,  0.0235,  0.0897,  0.0741, -0.0393, -0.0123, -0.0940,
         -0.0719,  0.0212],
        [-0.0640,  0.0663,  0.0257,  0.0966,  0.0867, -0.0256,  0.0015, -0.0763,
         -0.0897,  0.0236]], grad_fn=<AddmmBackward0>)


10 values for each image(4) - both pos and neg values.. as softmax.. is not done.   
largest value - infered class.

In [12]:
# prediction = argmax of prob distribution

max_values, pred_class = torch.max(out.data, 1)
# 1 => axis on which max value is to be found.
# return max values, and also their indexes.
print(pred_class)

tensor([3, 4, 3, 3])


# Training LeNet

#### Data

**instantiate a new dataloader for a different batchsize, etc.**

In [13]:
batch_size = 128 # power of 2

# training data
# dataset: 
trainset = torchvision.datasets.CIFAR10(root='../data_place/cifar', 
                                        train=True, 
                                        download=False, 
                                        transform=transforms.ToTensor())
# dataloader:
trainloader = torch.utils.data.DataLoader(trainset, 
                                          batch_size=batch_size, 
                                          shuffle=True)

# test data 
# dataset (train = false)
testset = torchvision.datasets.CIFAR10(root='../data_place/cifar', 
                                       train=False, 
                                       download=False, 
                                       transform=transforms.ToTensor())
# dataloader:
testloader = torch.utils.data.DataLoader(testset, 
                                         batch_size=batch_size, 
                                         shuffle=False)
# shuffle = false - as no point in shuffling test data. only evaluating.

#### utils

In [14]:
# compute accuracy
def evaluation(dataloader): # input - dataloader
    total, correct = 0, 0
    
    for batch in dataloader: # iterating trough batches.
        input_batch, label_batch = batch
        output = lenet_model(input_batch) # invoking model - model ouput
        _, pred = torch.max(output.data, 1) # max along dimension 1
        
        total += label_batch.size(0)
        correct += (pred == label_batch).sum().item() #sum gives number of 1s. count.
    return 100 * correct / total

# a general implementation - would pass the model (object) as a second parameter. 
# here - using a global model

isn't "with torch.no_grad" needed in evaluation ??

#### loss, optimizer

In [15]:
import torch.optim as optim


# defining loss function
loss_fn = nn.CrossEntropyLoss() 
# softmax - part of this

# defining optimizer
opt = optim.Adam(lenet_model.parameters())
# need to pass all paramaeters to any optimiser..
# can do that with parameters()
# didn't specify any hyperparameters (alpha, beta..) - default values.

### Train

In [None]:
%%time

#training 


loss_step_arr = [] # loss after each step(parameter update)
loss_epoch_arr = [] # loss after each epoch
epochs = 16

for epoch in range(epochs):

    for i, batch in enumerate(trainloader, 0): #batches
        
        # batch
        input_batch, label_batch = batch

        opt.zero_grad() # reset gradient
        # do reset step in the start
        
        # forward pass
        output = lenet_model(input_batch)
        
        # loss
        loss = loss_fn(output, label_batch)
        
        # compute gradient
        loss.backward()
        
        # update parameters
        opt.step()
        
        # log loss
        loss_step_arr.append(loss.item())
        
    loss_epoch_arr.append(loss.item())
    
        
    print('Epoch: %d/%d, Test acc: %0.2f, Train acc: %0.2f' % (epoch, 
                                                               epochs, 
                                                               evaluation(testloader), 
                                                               evaluation(trainloader)))
    # do evaluation - after each epoch (on both train, test)
    

# plot loss vs epoch - after training
plt.plot(loss_epoch_arr)
plt.show()