# neural network

### Import

In [1]:
import sys, os

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import time

In [3]:
from src.neural_network import utils
from src.neural_network import mlp

### Setting train & test dataset

In [4]:
data_path=utils.check_cifar_dataset_exists()

train_data=torch.load(data_path+'cifar/train_data.pt')
train_label=torch.load(data_path+'cifar/train_label.pt')
test_data=torch.load(data_path+'cifar/test_data.pt')
test_label=torch.load(data_path+'cifar/test_label.pt')

print(train_data.size())

torch.Size([50000, 3, 32, 32])


### Setting the net

In [5]:
# 1. cpu or gpu
#device= torch.device("cuda")
device= torch.device("cpu")
print(device)

# 2. net layers
# in, hiddens.., out
all_layers=(3072,500,500,10)

# 3. learning rate
lr = 0.01

# 4. batch size
bs = 200

# 5. epoch num
epoches = 21

# misc
num_of_train_data = list(train_data.size())[0]
num_of_test_data = list(test_data.size())[0]
in_layer_size = all_layers[0]

cpu


### Net

In [6]:
net=mlp.three_layer_net(*all_layers)
print(net)
utils.display_num_param(net)

three_layer_net(
  (layer1): Linear(in_features=3072, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=10, bias=False)
)
There are 1791000 (1.79 million) parameters in this neural network


### GPU

In [7]:
net = net.to(device)

### Criterion & optimizer

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr )

### Helper function to training

In [9]:
def eval_on_test_set():

    running_error=0
    num_batches=0


    for i in range(0,num_of_test_data,bs):

        # extract the minibatch
        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]

        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)

        # reshape the minibatch
        inputs = minibatch_data.view(bs,in_layer_size)

        # feed it to the network
        scores=net( inputs ) 

        # compute the error made on this batch
        error = utils.get_error( scores , minibatch_label)

        # add it to the running error
        running_error += error.item()

        num_batches+=1


    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

### Training

In [None]:
start=time.time()

for epoch in range(epoches):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(num_of_train_data)
 
    for count in range(0,num_of_train_data,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        # reshape the minibatch
        inputs = minibatch_data.view(bs,in_layer_size)

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
    if epoch%20 == 0:
        print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        eval_on_test_set() 
        print(' ')


epoch= 0 	 time= 7.019979000091553 	 loss= 2.2400060024261474 	 error= 82.84200015068053 percent
error rate on test set = 76.74999988079071 percent
 


### Evaluation on test data

In [None]:
# choose a picture at random
idx=randint(0, num_of_test_data-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# feed it to the net and display the confidence scores
scores =  net( im.view(1,in_layer_size).to(device)) 
probs= F.softmax(scores, dim=1)
utils.show_prob_cifar(probs.cpu())