# HWK7 PROBLEM 6

### Your goal is to train a convnet with multiple layers on SVHN and to obtain the lowest error rate possible on the test set. Try various hyperparameter (number of layers, hidden_sizes, etc...). Good luck!

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import utils
import time

In [2]:
train_data=torch.load('../../data/svhn/train_data.pt')
train_label=torch.load('../../data/svhn/train_label.pt')
test_data=torch.load('../../data/svhn/test_data.pt')
test_label=torch.load('../../data/svhn/test_label.pt')
print(train_data.size())
print(test_data.size())

torch.Size([73257, 3, 32, 32])
torch.Size([26032, 3, 32, 32])


In [3]:
class convnet(nn.Module):
    def __init__(self):
        super().__init__()
        # block 1 - 3x32x32 to __x16x16
        self.conv1a = nn.Conv2d(3, 64, kernel_size = 3, padding = 1)
        self.conv1b = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.pool1 = nn.MaxPool2d(2,2)
        
        # block 2 - __x16x16 to __x8x8
        self.conv2a = nn.Conv2d(64, 128, kernel_size = 3, padding = 1)
        self.conv2b = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.pool2 = nn.MaxPool2d(2,2)
        
        # block 3 - __x8x8 to __x2x2
        self.conv3a = nn.Conv2d(128, 256, kernel_size = 3, padding = 1)
        self.conv3b = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.pool3 = nn.MaxPool2d(2,2)
        
        # block 4 - __x4x4 to __x2x2
        self.conv4a = nn.Conv2d(256, 512, kernel_size = 3, padding = 1)
        self.conv4b = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
        self.pool4 = nn.MaxPool2d(2,2)
        
        # linear layers - 
        self.linear1 = nn.Linear(2048, 4096)
        self.linear2 = nn.Linear(4096, 10)
        
    def forward(self, x):
        # block 1
        x = self.conv1a(x)
        x = F.relu(x)
        x = self.conv1b(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        # block 2
        x = self.conv2a(x)
        x = F.relu(x)
        x = self.conv2b(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        # block 3
        x = self.conv3a(x)
        x = F.relu(x)
        x = self.conv3b(x)
        x = F.relu(x)
        x = self.pool3(x)
        
        # block 4
        x = self.conv4a(x)
        x = F.relu(x)
        x = self.pool4(x)
        
        # linear layers
        x = x.view(-1,2048)
        x = self.linear1(x)
        x = F.relu(x)
        scores = self.linear2(x)
        
        return scores

In [4]:
device = torch.device("cuda")
net = convnet()
utils.display_num_param(net)
net = net.to(device)
mean= train_data.mean()
std= train_data.std()
mean= mean.to(device)
std= std.to(device)

There are 13119050 (13.12 million) parameters in this neural network


In [5]:
criterion = nn.CrossEntropyLoss()
my_lr = 0.265

bs = 200

In [6]:
def eval_on_test_set():

    running_error=0
    num_batches=0
    
    with torch.no_grad():

        for i in range(0,26032,bs):

            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]
            
            minibatch_data = minibatch_data.to(device)
            minibatch_label = minibatch_label.to(device)

            inputs = (minibatch_data - mean)/std

            scores=net( inputs ) 

            error = utils.get_error(scores , minibatch_label)

            running_error += error.item()

            num_batches+=1


    total_error = running_error/num_batches
    print( 'test error  = ', total_error*100,'percent')

In [7]:
start = time.time()

for epoch in range(20):
    
    if epoch == 3 or epoch == 5 or epoch == 7:
        my_lr = my_lr * 0.4
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(73257)
 
    for count in range(0,73257,bs):
    
        optimizer.zero_grad()
        
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label= train_label[indices]
        
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device)

        inputs = minibatch_data
        inputs = inputs - mean
        inputs = inputs / std

        inputs.requires_grad_(True)

        scores=net( inputs ) 

        loss =  criterion(scores , minibatch_label) 
        
        loss.backward()

        optimizer.step()
        
        
        # compute some stats
        
        num_batches+=1
        
        with torch.no_grad():
            
            running_loss += loss.item()

            error = utils.get_error(scores , minibatch_label)
            running_error += error.item() 
    
    
    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed_time = time.time() - start
    
    # every 1 epoch we display the stats 
    # and compute the error rate on the test set  
    
    if epoch % 1 == 0: 
    
        print(' ')
        
        print('epoch=',epoch, '\t time=', elapsed_time,
              '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        
        eval_on_test_set()

 
epoch= 0 	 time= 19.95359992980957 	 loss= 2.2384801333216946 	 error= 81.13138820235021 percent
test error  =  80.39790137123516 percent
 
epoch= 1 	 time= 42.28229522705078 	 loss= 1.9175393907835439 	 error= 68.25646618406519 percent
test error  =  45.66793969569316 percent
 
epoch= 2 	 time= 64.80646753311157 	 loss= 0.5037982820327666 	 error= 15.480282992043351 percent
test error  =  11.696566561706193 percent
 
epoch= 3 	 time= 87.10442280769348 	 loss= 0.24765929226492017 	 error= 7.230318875663612 percent
test error  =  8.47900964831578 percent
 
epoch= 4 	 time= 109.59703469276428 	 loss= 0.20305261255447482 	 error= 5.762298912703178 percent
test error  =  8.097330259002803 percent
 
epoch= 5 	 time= 132.00041341781616 	 loss= 0.14879961253958437 	 error= 3.987071380953048 percent
test error  =  6.608780149285121 percent
 
epoch= 6 	 time= 154.41909050941467 	 loss= 0.12751339025775157 	 error= 3.299061105426716 percent
test error  =  6.45610858465879 percent
 
epoch= 7 	 