In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import time
import utils

In [2]:
x = torch.arange(9).view(3,3)
print(x)
x = torch.flip(x,[1])
print(x)

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor([[2, 1, 0],
        [5, 4, 3],
        [8, 7, 6]])


In [3]:
train_data=torch.load('../../data/cifar/train_data.pt')
train_label=torch.load('../../data/cifar/train_label.pt')
test_data=torch.load('../../data/cifar/test_data.pt')
test_label=torch.load('../../data/cifar/test_label.pt')
print(train_data.size())
print(test_data.size())

train_data_flip = torch.flip(train_data,[3])
train_label_flip = train_label
print(train_data_flip.size())

torch.Size([50000, 3, 32, 32])
torch.Size([10000, 3, 32, 32])
torch.Size([50000, 3, 32, 32])


In [4]:
train_data = torch.stack([train_data, train_data_flip],0)
train_label = torch.stack([train_label, train_label_flip],0)
print(train_data.size())

train_data = train_data.view(100000,3,32,32)
train_label = train_label.view(100000)
print(train_data.size())
print(train_label.size())

torch.Size([2, 50000, 3, 32, 32])
torch.Size([100000, 3, 32, 32])
torch.Size([100000])


In [5]:
print(train_data[0])
print(train_data[50000])
print(train_label[0])
print(train_label[50000])

tensor([[[0.2314, 0.1686, 0.1961,  ..., 0.6196, 0.5961, 0.5804],
         [0.0627, 0.0000, 0.0706,  ..., 0.4824, 0.4667, 0.4784],
         [0.0980, 0.0627, 0.1922,  ..., 0.4627, 0.4706, 0.4275],
         ...,
         [0.8157, 0.7882, 0.7765,  ..., 0.6275, 0.2196, 0.2078],
         [0.7059, 0.6784, 0.7294,  ..., 0.7216, 0.3804, 0.3255],
         [0.6941, 0.6588, 0.7020,  ..., 0.8471, 0.5922, 0.4824]],

        [[0.2431, 0.1804, 0.1882,  ..., 0.5176, 0.4902, 0.4863],
         [0.0784, 0.0000, 0.0314,  ..., 0.3451, 0.3255, 0.3412],
         [0.0941, 0.0275, 0.1059,  ..., 0.3294, 0.3294, 0.2863],
         ...,
         [0.6667, 0.6000, 0.6314,  ..., 0.5216, 0.1216, 0.1333],
         [0.5451, 0.4824, 0.5647,  ..., 0.5804, 0.2431, 0.2078],
         [0.5647, 0.5059, 0.5569,  ..., 0.7216, 0.4627, 0.3608]],

        [[0.2471, 0.1765, 0.1686,  ..., 0.4235, 0.4000, 0.4039],
         [0.0784, 0.0000, 0.0000,  ..., 0.2157, 0.1961, 0.2235],
         [0.0824, 0.0000, 0.0314,  ..., 0.1961, 0.1961, 0.

In [6]:
class convnet(nn.Module):
    def __init__(self):
        super().__init__()
        # block 1 - 3x32x32 to __x16x16
        self.conv1a = nn.Conv2d(3, 64, kernel_size = 3, padding = 1)
        self.conv1b = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1c = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1d = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1e = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1f = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.conv1g = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
        self.pool1 = nn.MaxPool2d(2,2)
        
        # block 2 - __x16x16 to __x8x8
        self.conv2a = nn.Conv2d(64, 128, kernel_size = 3, padding = 1)
        self.conv2b = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2c = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2d = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2e = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2f = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv2g = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.pool2 = nn.MaxPool2d(2,2)
        
        # block 3 - __x8x8 to __x2x2
        self.conv3a = nn.Conv2d(128, 256, kernel_size = 3, padding = 1)
        self.conv3b = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3c = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3d = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3e = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3f = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv3g = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.pool3 = nn.MaxPool2d(2,2)
        
        # block 4 - __x4x4 to __x2x2
        self.conv4a = nn.Conv2d(256, 512, kernel_size = 3, padding = 1)
        self.pool4 = nn.MaxPool2d(2,2)
        
        # linear layers - 
        self.linear1 = nn.Linear(2048, 10)
        
    def forward(self, x):
        # block 1
        a1 = self.conv1a(x)
        a1 = F.relu(a1)
        b1 = self.conv1b(a1)
        b1 = F.relu(b1)
        c1 = self.conv1c(b1) 
        c1 = F.relu(c1) + a1
        d1 = self.conv1d(c1)
        d1 = F.relu(d1)
        e1 = self.conv1e(d1) 
        e1 = F.relu(e1) + c1
        f1 = self.conv1f(e1)
        f1 = F.relu(f1)
        g1 = self.conv1g(f1) 
        g1 = F.relu(g1) + e1
        p1 = self.pool1(g1)
        
        # block 2
        a2 = self.conv2a(p1)
        a2 = F.relu(a2)
        b2 = self.conv2b(a2)
        b2 = F.relu(b2)
        c2 = self.conv2c(b2) 
        c2 = F.relu(c2) + a2
        d2 = self.conv2d(c2)
        d2 = F.relu(d2)
        e2 = self.conv2e(d2) 
        e2 = F.relu(e2) + c2
        f2 = self.conv2f(e2)
        f2 = F.relu(f2)
        g2 = self.conv2g(f2) 
        g2 = F.relu(g2) + e2
        p2 = self.pool2(g2)
        
        # block 3
        a3 = self.conv3a(p2)
        a3 = F.relu(a3)
        b3 = self.conv3b(a3)
        b3 = F.relu(b3)
        c3 = self.conv3c(b3) 
        c3 = F.relu(c3) + a3
        d3 = self.conv3d(c3)
        d3 = F.relu(d3)
        e3 = self.conv3e(d3) 
        e3 = F.relu(e3) + c3
        f3 = self.conv3f(e3)
        f3 = F.relu(f3)
        g3 = self.conv3g(f3) 
        g3 = F.relu(g3) + e3
        p3 = self.pool3(g3)
        
        # block 4
        a4 = self.conv4a(p3)
        a4 = F.relu(a4)
        p4 = self.pool4(a4)
        
        # linear layers
        p4 = p4.view(-1,2048)
        scores = self.linear1(p4)
        
        return scores

In [7]:
device = torch.device("cuda")
net = convnet()
utils.display_num_param(net)
net = net.to(device)
mean= train_data.mean()
std= train_data.std()
mean= mean.to(device)
std= std.to(device)

There are 6219018 (6.22 million) parameters in this neural network


In [8]:
criterion = nn.CrossEntropyLoss()
my_lr = 0.25

bs = 200

In [9]:
def eval_on_test_set():

    running_error=0
    num_batches=0
    
    with torch.no_grad():

        for i in range(0,10000,bs):

            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]
            
            minibatch_data = minibatch_data.to(device)
            minibatch_label = minibatch_label.to(device)

            inputs = (minibatch_data - mean)/std

            scores=net( inputs ) 

            error = utils.get_error(scores , minibatch_label)

            running_error += error.item()

            num_batches+=1


    total_error = running_error/num_batches
    print( 'test error  = ', total_error*100,'percent')

In [10]:
start = time.time()

for epoch in range(15):
    
    if epoch == 4 or epoch == 8 or epoch == 10 or epoch == 12:
        my_lr = my_lr * 0.5
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(100000)
 
    for count in range(0,100000,bs):
    
        optimizer.zero_grad()
        
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label= train_label[indices]
        
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device)

        inputs = minibatch_data
        inputs = inputs - mean
        inputs = inputs / std

        inputs.requires_grad_(True)

        scores=net( inputs ) 

        loss =  criterion(scores , minibatch_label) 
        
        loss.backward()

        optimizer.step()
        
        
        # compute some stats
        
        num_batches+=1
        
        with torch.no_grad():
            
            running_loss += loss.item()

            error = utils.get_error(scores , minibatch_label)
            running_error += error.item() 
    
    
    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed_time = time.time() - start
    
    # every 1 epoch we display the stats 
    # and compute the error rate on the test set  
    
    if epoch % 1 == 0: 
    
        print(' ')
        
        print('epoch=',epoch, '\t time=', elapsed_time,
              '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        
        eval_on_test_set()

 
epoch= 0 	 time= 91.86796355247498 	 loss= 1.6721508145332336 	 error= 61.28400065898896 percent
test error  =  43.980000734329224 percent
 
epoch= 1 	 time= 187.66155314445496 	 loss= 1.0292969111204147 	 error= 36.37100149393081 percent
test error  =  32.44000160694122 percent
 
epoch= 2 	 time= 283.37278842926025 	 loss= 0.7318337965011597 	 error= 25.433001852035524 percent
test error  =  26.860002279281613 percent
 
epoch= 3 	 time= 379.1510376930237 	 loss= 0.5592886995077133 	 error= 19.353001940250397 percent
test error  =  24.250001668930054 percent
 
epoch= 4 	 time= 475.0072100162506 	 loss= 0.32506784704327585 	 error= 11.280001831054689 percent
test error  =  18.840001463890076 percent
 
epoch= 5 	 time= 570.9699511528015 	 loss= 0.23536198130249977 	 error= 8.150002181529999 percent
test error  =  20.39000165462494 percent
 
epoch= 6 	 time= 666.9752659797668 	 loss= 0.16077808641642333 	 error= 5.479001724720002 percent
test error  =  20.32000243663788 percent
 
epoch=