# HWK5 PROBLEM 2

### Your goal is to train a neural net with multiple layers on Fashion Mnist and to obtain the lowest error rate possible on the test set. Try various hyperparameter (number of layers, hidden_sizes, etc...). Good luck!

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import time
import utils

In [2]:
train_data=torch.load('../../data/fashion-mnist/train_data.pt')
train_label=torch.load('../../data/fashion-mnist/train_label.pt')
test_data=torch.load('../../data/fashion-mnist/test_data.pt')
test_label=torch.load('../../data/fashion-mnist/test_label.pt')

In [3]:
class one_layer_net(nn.Module):

    def __init__(self, input_size, hid1, output_size):
        super().__init__()
        self.linear_layer = nn.Linear( input_size, hid1 , bias=False)
        self.linear_layer2 = nn.Linear( hid1, output_size , bias=False)
        
    def forward(self, x):
        x = self.linear_layer(x)
        x = F.relu(x)
        scores = self.linear_layer2(x)
        return scores

In [4]:
net=one_layer_net(784,300,10)
print(net)

one_layer_net(
  (linear_layer): Linear(in_features=784, out_features=300, bias=False)
  (linear_layer2): Linear(in_features=300, out_features=10, bias=False)
)


In [5]:
criterion = nn.CrossEntropyLoss()

optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )

bs=200

In [6]:
def eval_on_test_set():

    running_error=0
    num_batches=0
    
    with torch.no_grad():

        for i in range(0,10000,bs):

            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]

            inputs = minibatch_data.view(bs,784)

            scores=net( inputs ) 

            error = utils.get_error( scores , minibatch_label)

            running_error += error.item()

            num_batches+=1


    total_error = running_error/num_batches
    print( 'test error  = ', total_error*100 ,'percent')

In [7]:
start = time.time()

for epoch in range(200):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(60000)
 
    for count in range(0,60000,bs):
        
        # forward and backward pass
    
        optimizer.zero_grad()
        
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label= train_label[indices]

        inputs = minibatch_data.view(bs,784)

        inputs.requires_grad_()

        scores=net( inputs ) 

        loss =  criterion( scores , minibatch_label) 
        
        loss.backward()

        optimizer.step()
        
        
        # compute some stats
        
        num_batches+=1
        
        with torch.no_grad():
            
            running_loss += loss.item()

            error = utils.get_error( scores , minibatch_label)
            running_error += error.item() 
    
    
    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed_time = time.time() - start
    
    # every 10 epoch we display the stats 
    # and compute the error rate on the test set  
    
    if epoch % 10 == 0 : 
    
        print(' ')
        
        print('epoch=',epoch, '\t time=', elapsed_time,
              '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        
        eval_on_test_set()
               

 
epoch= 0 	 time= 1.6905860900878906 	 loss= 1.6027797293663024 	 error= 40.9166667064031 percent
test error  =  33.72999966144562 percent
 
epoch= 10 	 time= 17.108583688735962 	 loss= 0.5398589029908181 	 error= 18.34499994913737 percent
test error  =  19.390000104904175 percent
 
epoch= 20 	 time= 32.513890981674194 	 loss= 0.4710832819342613 	 error= 16.121666530768078 percent
test error  =  17.229999899864197 percent
 
epoch= 30 	 time= 49.40365123748779 	 loss= 0.43798155744870504 	 error= 14.986666321754456 percent
test error  =  16.919999480247498 percent
 
epoch= 40 	 time= 68.85407829284668 	 loss= 0.41573214530944824 	 error= 14.25499975681305 percent
test error  =  15.729999780654907 percent
 
epoch= 50 	 time= 85.83088421821594 	 loss= 0.39783754080533984 	 error= 13.633333245913187 percent
test error  =  15.319999694824219 percent
 
epoch= 60 	 time= 103.84478306770325 	 loss= 0.38266659547885257 	 error= 13.14333345492681 percent
test error  =  14.769999623298647 percen

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# feed it to the net and display the confidence scores
scores =  net( im.view(1,784)) 
probs= F.softmax(scores, dim=1)
utils.show_prob_mnist(probs)