# Lab 03 : LeNet5 architecture - exercise

In [None]:
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
    # mount google drive
    from google.colab import drive
    drive.mount('/content/gdrive')
    path_to_file = '/content/gdrive/My Drive/CS4243_codes/codes/labs_lecture08/lab03_lenet5'
    print(path_to_file)
    # move to Google Drive directory
    os.chdir(path_to_file)
    !pwd

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from random import randint
import utils
import time

### With or without GPU?

It is recommended to run this code on GPU:<br> 
* Time for 1 epoch on CPU : 96 sec (1.62 min)<br> 
* Time for 1 epoch on GPU : 2 sec w/ GeForce GTX 1080 Ti <br>

In [None]:
device= torch.device("cuda")
#device= torch.device("cpu")
print(device)

### Download the MNIST dataset 

In [None]:
from utils import check_mnist_dataset_exists
data_path=check_mnist_dataset_exists()

train_data=torch.load(data_path+'mnist/train_data.pt')
train_label=torch.load(data_path+'mnist/train_label.pt')
test_data=torch.load(data_path+'mnist/test_data.pt')
test_label=torch.load(data_path+'mnist/test_label.pt')

print(train_data.size())
print(test_data.size())

### Compute average pixel intensity over all training set and all channels

In [None]:
mean= train_data.mean()

print(mean)

### Compute standard deviation

In [None]:
std= train_data.std()

print(std)

### Make a LeNet5 convnet class. 

In [None]:
class LeNet5_convnet(nn.Module):

    def __init__(self):

        super(LeNet5_convnet, self).__init__()

        # CL1:   28 x 28  -->    50 x 28 x 28 
        self.conv1 = nn.Conv2d(1,   50,  kernel_size=3,  padding=1 )
        
        # MP1: 50 x 28 x 28 -->    50 x 14 x 14
        self.pool1  = nn.MaxPool2d(2,2)
        
        # CL2:   50 x 14 x 14  -->    100 x 14 x 14 
        self.conv2 = # COMPLETE HERE
        
        # MP2: 100 x 14 x 14 -->    100 x 7 x 7
        self.pool2 = # COMPLETE HERE
        
        # LL1:   100 x 7 x 7 = 4900 -->  100 
        self.linear1 = # COMPLETE HERE
        
        # LL2:   100  -->  10 
        self.linear2 = # COMPLETE HERE


    def forward(self, x):

        # CL1:   28 x 28  -->    50 x 28 x 28 
        x = self.conv1(x)
        x = torch.relu(x)
        
        # MP1: 50 x 28 x 28 -->    50 x 14 x 14
        x = self.pool1(x)
        
        # CL2:   50 x 14 x 14  -->    100 x 14 x 14
        x = # COMPLETE HERE
        x = # COMPLETE HERE
        
        # MP2: 100 x 14 x 14 -->    100 x 7 x 7
        x = # COMPLETE HERE

        # LL1:   100 x 7 x 7 = 4900  -->  100 
        x = x.view(# COMPLETE HERE)
        x = # COMPLETE HERE
        x = # COMPLETE HERE
        
        # LL2:   4900  -->  10 
        x = # COMPLETE HERE
    
        return x

### Build the net. How many parameters in total?

In [None]:
net=# COMPLETE HERE
print(net)
utils.display_num_param(net)

### Send the weights of the networks to the GPU (as well as the mean and std)

In [None]:
net = # COMPLETE HERE

mean=mean.to(device)

std=std.to(device)

### Choose the criterion, batch size, and initial learning rate. Select the following:
* batch size =128
* initial learning rate =0.25

In [None]:
criterion = nn.CrossEntropyLoss()

my_lr=# COMPLETE HERE

bs= # COMPLETE HERE

### Function to evaluate the network on the test set

In [None]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    for i in range(0,10000,bs):

        minibatch_data =  test_data[i:i+bs].unsqueeze(dim=1)
        minibatch_label= test_label[i:i+bs]

        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std    

        scores=net( inputs ) 

        error = utils.get_error( scores , minibatch_label)

        running_error += error.item()

        num_batches+=1


    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

### Do 30 passes through the training set. Divide the learning rate by 2 every 5 epochs.

In [None]:
start=time.time()

for epoch in range(1,30):
    
    if not epoch%5:
        my_lr = # COMPLETE HERE
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
        
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(60000)
 
    for count in range(0,60000,bs):
        
        # FORWARD AND BACKWARD PASS
    
        optimizer.zero_grad()
             
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices].unsqueeze(dim=1)
        minibatch_label=  train_label[indices]
        
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std      
        
        inputs.requires_grad_()

        scores=net( inputs ) 

        loss =  criterion( scores , minibatch_label) 
          
        loss.backward()
        
        optimizer.step()
        

        # COMPUTE STATS
        
        running_loss += loss.detach().item()
        
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # AVERAGE STATS THEN DISPLAY
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    
    print('epoch=',epoch, '\t time=', elapsed,'min', '\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')
    
    

### Choose image at random from the test set and see how good/bad are the predictions

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# send to device, rescale, and view as a batch of 1 
im = im.to(device)
im= (im-mean) / std
im=im.view(1,28,28).unsqueeze(dim=1)

# feed it to the net and display the confidence scores
scores =  net(im) 
probs= torch.softmax(scores, dim=1)
utils.show_prob_mnist(probs.cpu())