In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.transforms import transforms
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler

## Data Loading

In [None]:
import torch
import torchvision
from torchvision.transforms import transforms

In [None]:
from torch.optim.lr_scheduler import MultiStepLR

In [None]:

transform_train = transforms.Compose(
    [transforms.RandomCrop(32, padding=4),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])


transform_test = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
Files already downloaded and verified


In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

## Model Architecture

In [None]:
class CNN4(nn.Module):
    def __init__(self):
        super(CNN4,self).__init__()
        #network architecture
        self.conv11 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv12 = nn.Conv2d(32, 32, 3, padding=1)
        self.batchn1 = nn.BatchNorm2d(32)
        self.conv21 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv22 = nn.Conv2d(64, 64, 3, padding=1)
        self.batchn2 = nn.BatchNorm2d(64)
        self.conv31 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv32 = nn.Conv2d(128, 128, 3, padding=1)
        self.batchn3 = nn.BatchNorm2d(128)
        self.conv41 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv42 = nn.Conv2d(256, 256, 3, padding=1)
        self.batchn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout(0.25)
        self.linear1 = nn.Linear(256*2*2, 256)
        self.linear2 = nn.Linear(256, 128)
        self.linear3 = nn.Linear(128, 10)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self,x):
        #forward pass
        #x is the input
        x = (F.relu(self.conv11(x)))
        x = self.drop(self.pool(F.relu(self.conv12(x))))
        x = (F.relu(self.conv21(x)))
        x = self.drop(self.pool(F.relu(self.conv22(x))))
        x = (F.relu(self.conv31(x)))
        x = self.drop(self.pool(F.relu(self.conv32(x))))
        x = (F.relu(self.conv41(x)))
        x = self.drop(self.pool(F.relu(self.conv42(x))))
        #print(x.shape)
        x = x.view(-1, 256*2*2) ## reshaping 
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.softmax(self.linear3(x))
        return x

In [None]:
cnn = CNN4()
print(cnn)

CNN4(
  (conv11): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv12): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv21): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv22): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv31): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv32): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv41): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv42): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_st

In [None]:
#loss function
loss_function = nn.CrossEntropyLoss()

cnn.cuda()
cnn.train()

CNN4(
  (conv11): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv12): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv21): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv22): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv31): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv32): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv41): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv42): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_st

In [None]:
num_epochs = 100

In [None]:
#optimizer = optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)

optimizer = optim.Adam(cnn.parameters(), lr=0.001, weight_decay=1e-5)

train_loss, train_accu = [], []

batch_size = 128

In [None]:
#scheduler = MultiStepLR(optimizer, milestones=[75, 100], gamma=0.1)

In [None]:
for epoch in range(num_epochs):
        running_loss = 0.0
        for i,data in enumerate(trainloader, 0):
            #get the inputs
            X,y  = data
            X, y = X.cuda(), y.cuda()
            
            optimizer.zero_grad()
            outputs = cnn(X)
            loss = loss_function(outputs, y)
            loss.backward()
            train_loss.append(loss.item())
            optimizer.step()
            #scheduler.step()
            predictions = outputs.data.max(1)[1]
            
            running_loss += loss.item()
            
            accuracy = np.sum(predictions.cpu().numpy()==y.cpu().numpy())/batch_size*100
            train_accu.append(accuracy)
            if i % 10000 == 0:
              print('Epoch: {}\tTrain Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch, i, loss.item(), accuracy))
            #i += 1

Epoch: 0	Train Step: 0	Loss: 2.300	Accuracy: 10.938
Epoch: 1	Train Step: 0	Loss: 1.673	Accuracy: 36.719
Epoch: 2	Train Step: 0	Loss: 1.488	Accuracy: 46.094
Epoch: 3	Train Step: 0	Loss: 1.151	Accuracy: 60.938
Epoch: 4	Train Step: 0	Loss: 1.014	Accuracy: 57.812
Epoch: 5	Train Step: 0	Loss: 0.840	Accuracy: 70.312
Epoch: 6	Train Step: 0	Loss: 0.851	Accuracy: 62.500
Epoch: 7	Train Step: 0	Loss: 0.818	Accuracy: 71.875
Epoch: 8	Train Step: 0	Loss: 0.897	Accuracy: 66.406
Epoch: 9	Train Step: 0	Loss: 1.030	Accuracy: 63.281
Epoch: 10	Train Step: 0	Loss: 0.915	Accuracy: 64.062
Epoch: 11	Train Step: 0	Loss: 0.568	Accuracy: 80.469
Epoch: 12	Train Step: 0	Loss: 0.820	Accuracy: 74.219
Epoch: 13	Train Step: 0	Loss: 0.693	Accuracy: 75.000
Epoch: 14	Train Step: 0	Loss: 0.815	Accuracy: 69.531
Epoch: 15	Train Step: 0	Loss: 0.814	Accuracy: 74.219
Epoch: 16	Train Step: 0	Loss: 0.640	Accuracy: 78.125
Epoch: 17	Train Step: 0	Loss: 0.689	Accuracy: 77.344
Epoch: 18	Train Step: 0	Loss: 0.692	Accuracy: 75.781
Epo

In [None]:
def save_predictions(file, y):
    np.save(file, y)

In [54]:
cnn.eval()
correct = 0
final = []
for images, labels in testloader:
    with torch.no_grad(): 
        images, labels = images.cuda(), labels.cuda() 
        outputs = cnn(images)
        predictions = outputs.data.max(1)[1]
        pred = predictions.cpu().numpy()
        #print(pred.shape)
        final.extend(pred.tolist())
        #print(pred)
        correct += predictions.eq(labels.data).sum()
        
print('Test set accuracy: {:.2f}%'.format(100.0 * correct / len(testloader.dataset)))

Test set accuracy: 87.00%


# Model Architechture Derivation

**First**

Using a simple architecture for 1st testing of the model
3 convolutional layers with max-pooling and 2 fully connected layers

class CNN2(nn.Module):
    def __init__(self):
        super(CNN2,self).__init__()
        #network architecture
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.linear1 = nn.Linear(64*4*4, 512)
        self.linear2 = nn.Linear(512, 10)
        
    
    def forward(self,x):
        #forward pass
        #x is the input
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        #print(x.shape)
        x = x.view(-1, 64*4*4) ## reshaping 
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x

Got an accuracy of 54%

**Second**

Adding more convolutional layers and introducing a drop out of 0.25 for each layer

Using stochastic gradient descent with momentum and weight decay 

class CNN3(nn.Module):
    def __init__(self):
        super(CNN3,self).__init__()
        #network architecture
        self.conv11 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv12 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv21 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv22 = nn.Conv2d(64, 64, 3, padding=1)
        self.conv31 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv32 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout(0.25)
        self.linear1 = nn.Linear(128*4*4, 512)
        self.linear2 = nn.Linear(512, 256)
        self.linear3 = nn.Linear(256, 10)
        
    
    def forward(self,x):
        #forward pass
        #x is the input
        x = F.relu(self.conv11(x))
        x = self.drop(self.pool(F.relu(self.conv12(x))))
        x = F.relu(self.conv21(x))
        x = self.drop(self.pool(F.relu(self.conv22(x))))
        x = F.relu(self.conv31(x))
        x = self.drop(self.pool(F.relu(self.conv32(x))))
        #print(x.shape)
        x = x.view(-1, 128*4*4) ## reshaping 
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return F.log_softmax(x)

Got an accuracy of 75%

**Third**

Still increasing the convolutional layers and now using a deeper network for the fully connected part



**Fourth**

Tried various experimentations with the batch size and learning rate ,

The accuracy ranged between 75% and 80%

Then tried to introduce batch normalization 

Did not achieve resulta as expected

**Fifth**

Change in the architecture instead of using straight convolutional layer

Used a combination of convolutional and actiivation layer followed by a convolution activation and a dropout

This architecture will help in properly convolving the images and drop out will help to a better generalization




Tried to introduce a learning rate schedule for stochastic gradient descent

Still requiring a lot of epochs to achieve above 80% accuracy

class CNN4(nn.Module):
    def __init__(self):
        super(CNN4,self).__init__()
        #network architecture
        self.conv11 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv12 = nn.Conv2d(32, 32, 3, padding=1)
        self.batchn1 = nn.BatchNorm2d(32)
        self.conv21 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv22 = nn.Conv2d(64, 64, 3, padding=1)
        self.batchn2 = nn.BatchNorm2d(64)
        self.conv31 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv32 = nn.Conv2d(128, 128, 3, padding=1)
        self.batchn3 = nn.BatchNorm2d(128)
        self.conv41 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv42 = nn.Conv2d(256, 256, 3, padding=1)
        self.batchn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout(0.25)
        self.linear1 = nn.Linear(256*2*2, 256)
        self.linear2 = nn.Linear(256, 128)
        self.linear3 = nn.Linear(128, 10)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self,x):
        #forward pass
        #x is the input
        x = (F.relu(self.conv11(x)))
        x = self.drop(self.pool(F.relu(self.conv12(x))))
        x = (F.relu(self.conv21(x)))
        x = self.drop(self.pool(F.relu(self.conv22(x))))
        x = (F.relu(self.conv31(x)))
        x = self.drop(self.pool(F.relu(self.conv32(x))))
        x = (F.relu(self.conv41(x)))
        x = self.drop(self.pool(F.relu(self.conv42(x))))
        #print(x.shape)
        x = x.view(-1, 256*2*2) ## reshaping 
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.softmax(self.linear3(x))
        return x

**Sixth**

Finally decided to use Adam optimizer with a weight decay

The batch size is decided to be 128

Decided to train for 80 epochs

Got an accuracy of 87%

Did not use batch normalization only used dropout of 0.25 for each convolutional layer



**Final Architecture**

class CNN4(nn.Module):
    def __init__(self):
        super(CNN4,self).__init__()
        #network architecture
        self.conv11 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv12 = nn.Conv2d(32, 32, 3, padding=1)
        self.batchn1 = nn.BatchNorm2d(32)
        self.conv21 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv22 = nn.Conv2d(64, 64, 3, padding=1)
        self.batchn2 = nn.BatchNorm2d(64)
        self.conv31 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv32 = nn.Conv2d(128, 128, 3, padding=1)
        self.batchn3 = nn.BatchNorm2d(128)
        self.conv41 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv42 = nn.Conv2d(256, 256, 3, padding=1)
        self.batchn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout(0.25)
        self.linear1 = nn.Linear(256*2*2, 256)
        self.linear2 = nn.Linear(256, 128)
        self.linear3 = nn.Linear(128, 10)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self,x):
        #forward pass
        #x is the input
        x = (F.relu(self.conv11(x)))
        x = self.drop(self.pool(F.relu(self.conv12(x))))
        x = (F.relu(self.conv21(x)))
        x = self.drop(self.pool(F.relu(self.conv22(x))))
        x = (F.relu(self.conv31(x)))
        x = self.drop(self.pool(F.relu(self.conv32(x))))
        x = (F.relu(self.conv41(x)))
        x = self.drop(self.pool(F.relu(self.conv42(x))))
        #print(x.shape)
        x = x.view(-1, 256*2*2) ## reshaping 
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.softmax(self.linear3(x))
        return x

****