In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import time
import utils

In [2]:
train_data=torch.load('../../data/cifar/train_data.pt')
train_label=torch.load('../../data/cifar/train_label.pt')
test_data=torch.load('../../data/cifar/test_data.pt')
test_label=torch.load('../../data/cifar/test_label.pt')
print(train_data.size())
print(test_data.size())

torch.Size([50000, 3, 32, 32])
torch.Size([10000, 3, 32, 32])


In [3]:
class convnet(nn.Module):
    def __init__(self):
        super().__init__()
        # block 1 - 3x32x32 to __x16x16
        self.conv1a = nn.Conv2d(3, 64, kernel_size = 3, padding = 1)
        self.conv1b = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1c = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1d = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1e = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1f = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1g = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.pool1 = nn.MaxPool2d(2,2)
        
        # block 2 - __x16x16 to __x8x8
        self.conv2a = nn.Conv2d(64, 128, kernel_size = 3, padding = 1)
        self.conv2b = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2c = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2d = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2e = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2f = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2g = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.pool2 = nn.MaxPool2d(2,2)
        
        # block 3 - __x8x8 to __x4x4
        self.conv3a = nn.Conv2d(128, 256, kernel_size = 3, padding = 1)
        self.conv3b = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3c = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3d = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3e = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3f = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3g = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.pool3 = nn.MaxPool2d(2,2)
        
        # block 4 - __x4x4 to __x2x2
        self.conv4a = nn.Conv2d(256, 512, kernel_size = 3, padding = 1)
        self.pool4 = nn.MaxPool2d(2,2)
        
        # linear layers - 
        self.linear1 = nn.Linear(2048, 10)
        
    def forward(self, x):
        # block 1
        a1 = self.conv1a(x)
        a1 = F.relu(a1)
        b1 = self.conv1b(a1)
        b1 = F.relu(b1)
        c1 = self.conv1c(b1) 
        c1 = F.relu(c1)
        d1 = self.conv1d(c1)
        d1 = F.relu(d1)
        e1 = self.conv1e(d1) 
        e1 = F.relu(e1)
        f1 = self.conv1f(e1)
        f1 = F.relu(f1)
        g1 = self.conv1g(f1)
        g1 = F.relu(g1)
        p1 = self.pool1(g1)
        
        # block 2
        a2 = self.conv2a(p1)
        a2 = F.relu(a2)
        b2 = self.conv2b(a2)
        b2 = F.relu(b2)
        c2 = self.conv2c(b2) 
        c2 = F.relu(c2)
        d2 = self.conv2d(c2)
        d2 = F.relu(d2)
        e2 = self.conv2e(d2) 
        e2 = F.relu(e2)
        f2 = self.conv2f(e2)
        f2 = F.relu(f2)
        g2 = self.conv2g(f2)
        g2 = F.relu(g2)
        p2 = self.pool2(g2)
        
        # block 3
        a3 = self.conv3a(p2)
        a3 = F.relu(a3)
        b3 = self.conv3b(a3)
        b3 = F.relu(b3)
        c3 = self.conv3c(b3) 
        c3 = F.relu(c3)
        d3 = self.conv3d(c3)
        d3 = F.relu(d3)
        e3 = self.conv3e(d3) 
        e3 = F.relu(e3)
        f3 = self.conv3f(e3)
        f3 = F.relu(f3)
        g3 = self.conv3g(f3) 
        g3 = F.relu(g3)
        p3 = self.pool3(g3)
        
        # block 4
        a4 = self.conv4a(p3)
        a4 = F.relu(a4)
        p4 = self.pool4(a4)
        
        # linear layers
        p4 = p4.view(-1,2048)
        scores = self.linear1(p4)
        
        return scores

In [4]:
device = torch.device("cuda")
net = convnet()
utils.display_num_param(net)
net = net.to(device)
mean= train_data.mean()
std= train_data.std()
mean= mean.to(device)
std= std.to(device)

There are 6219018 (6.22 million) parameters in this neural network


In [5]:
criterion = nn.CrossEntropyLoss()
my_lr = 0.25

bs = 200

In [6]:
def eval_on_test_set():

    running_error=0
    num_batches=0
    
    with torch.no_grad():

        for i in range(0,10000,bs):

            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]
            
            minibatch_data = minibatch_data.to(device)
            minibatch_label = minibatch_label.to(device)

            inputs = (minibatch_data - mean)/std

            scores=net( inputs ) 

            error = utils.get_error(scores , minibatch_label)

            running_error += error.item()

            num_batches+=1


    total_error = running_error/num_batches
    print( 'test error  = ', total_error*100,'percent')

In [None]:
start = time.time()

for epoch in range(20):
    
    #if epoch == 3 or epoch == 5 or epoch == 7:
        #my_lr = my_lr * 0.5
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(50000)
 
    for count in range(0,50000,bs):
    
        optimizer.zero_grad()
        
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label= train_label[indices]
        
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device)

        inputs = minibatch_data
        inputs = inputs - mean
        inputs = inputs / std

        inputs.requires_grad_(True)

        scores=net( inputs ) 

        loss =  criterion(scores , minibatch_label) 
        
        loss.backward()

        optimizer.step()
        
        
        # compute some stats
        
        num_batches+=1
        
        with torch.no_grad():
            
            running_loss += loss.item()

            error = utils.get_error(scores , minibatch_label)
            running_error += error.item() 
    
    
    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed_time = time.time() - start
    
    # every 1 epoch we display the stats 
    # and compute the error rate on the test set  
    
    if epoch % 1 == 0: 
    
        print(' ')
        
        print('epoch=',epoch, '\t time=', elapsed_time,
              '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        
        eval_on_test_set()

 
epoch= 0 	 time= 43.21305012702942 	 loss= 2.303063786506653 	 error= 90.18799996376038 percent
test error  =  89.99999964237213 percent
 
epoch= 1 	 time= 88.94764518737793 	 loss= 2.3030718269348145 	 error= 90.21799983978272 percent
test error  =  89.99999964237213 percent
 
epoch= 2 	 time= 135.21955466270447 	 loss= 2.3029800119400026 	 error= 90.14199998378754 percent
test error  =  89.99999976158142 percent
 
epoch= 3 	 time= 182.02578234672546 	 loss= 2.303045184135437 	 error= 90.04199974536895 percent
test error  =  89.99999964237213 percent
 
epoch= 4 	 time= 228.85160875320435 	 loss= 2.302917819976807 	 error= 89.96799995899201 percent
test error  =  89.99999964237213 percent
 
epoch= 5 	 time= 275.78335094451904 	 loss= 2.3029757843017578 	 error= 90.00399987697601 percent
test error  =  90.0 percent
 
epoch= 6 	 time= 322.63572096824646 	 loss= 2.3029232196807863 	 error= 89.95000002384185 percent
test error  =  89.99999976158142 percent
 
epoch= 7 	 time= 369.51752066