# SIX LAYER CONVNET -- DEMO

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import utils
import time

### With or without GPU?

In [None]:
device= torch.device("cuda")
#device= torch.device("cpu")
print(device)

### Download the CIFAR dataset 

In [None]:
train_data=torch.load('../../data/cifar/train_data.pt')
train_label=torch.load('../../data/cifar/train_label.pt')
test_data=torch.load('../../data/cifar/test_data.pt')
test_label=torch.load('../../data/cifar/test_label.pt')

print(train_data.size())
print(test_data.size())

### Compute average pixel intensity over all training set and all channels

In [None]:
mean= train_data.mean()

print(mean)

### Compute standard deviation

In [None]:
std= train_data.std()

print(std)

### Make a six layer convnet class. 

In [None]:
class six_layer_convnet(nn.Module):

    def __init__(self):

        super().__init__()

        # block 1:   3 x 32 x 32  -->    50 x 8 x 8 
        self.conv1a = nn.Conv2d(3,   50,  kernel_size=5,  padding=2 )
        self.conv1b = nn.Conv2d(50,  50,  kernel_size=5,  padding=2 )
        self.pool1  = nn.MaxPool2d(4,4)

        # block 2:   50 x 8 x 8  -->  100 x 2 x 2
        self.conv2a = nn.Conv2d(50,  100, kernel_size=5, padding=2 )
        self.conv2b = nn.Conv2d(100, 100, kernel_size=5, padding=2 )
        self.pool2  = nn.MaxPool2d(4,4)

        # linear layers:   100 x 2 x 2  -->  400 -->  800 -->  10 
        self.linear1 = nn.Linear(400, 800)
        self.linear2 = nn.Linear(800,10)


    def forward(self, x):

        # block 1:   3 x 32 x 32  -->    50 x 8 x 8
        x = self.conv1a(x)
        x = F.relu(x)
        x = self.conv1b(x)
        x = F.relu(x)
        x = self.pool1(x)

        # block 2:   50 x 8 x 8  -->  100 x 2 x 2
        x = self.conv2a(x)
        x = F.relu(x)
        x = self.conv2b(x)
        x = F.relu(x)
        x = self.pool2(x)

        # linear layers:   100 x 2 x 2  -->  400 -->  800 -->  10 
        x = x.view(-1, 400)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
    
        return x

### Build the net. How many parameters in total? (the three layer net had 2 million parameters)

In [None]:
net=six_layer_convnet()
print(net)
utils.display_num_param(net)

### Send the weights of the networks to the GPU (as well as the mean and std)

In [None]:
net = net.to(device)

mean=mean.to(device)

std=std.to(device)

### Choose the criterion, learning rate, and batch size.

In [None]:
criterion = nn.CrossEntropyLoss()

my_lr=0.25 

bs= 128

### Function to evaluate the network on the test set

In [None]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    with torch.no_grad():
        
        for i in range(0,10000,bs):

            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]

            minibatch_data=minibatch_data.to(device)
            minibatch_label=minibatch_label.to(device)

            inputs = (minibatch_data - mean)/std 

            scores=net( inputs ) 

            error = utils.get_error( scores , minibatch_label)

            running_error += error.item()

            num_batches+=1


    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

### Do 14 passes through the training set.

In [None]:
start=time.time()

for epoch in range(1,14):
    
    if epoch==5 or epoch == 8 or epoch==11:
        my_lr = my_lr / 2
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
        
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(50000)
 
    for count in range(0,50000,bs):
        
        # FORWARD AND BACKWARD PASS
    
        optimizer.zero_grad()
             
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std      
        
        inputs.requires_grad_()

        scores=net( inputs ) 

        loss =  criterion( scores , minibatch_label) 
          
        loss.backward()
        
        optimizer.step()
        

        # COMPUTE STATS
        
        num_batches+=1
        with torch.no_grad():
            running_loss += loss.item()
            error = utils.get_error( scores , minibatch_label)
            running_error += error.item()         
        
                
    # AVERAGE STATS THEN DISPLAY
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    
    print('epoch=',epoch, '\t time=', elapsed,'min', '\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')
    
    

### Choose image at random from the test set and see how good/bad are the predictions

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# send to device, rescale, and view as a batch of 1 
im = im.to(device)
im= (im-mean) / std
im=im.view(1,3,32,32)

# feed it to the net and display the confidence scores
scores =  net(im) 
probs= F.softmax(scores, dim=1)
utils.show_prob_cifar(probs.cpu())