#### CNN Classifier

**Additional Dependency**: Visdom for visualization

In [1]:
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

import numpy as np
import matplotlib.pyplot as plt
import time
import visdom

viz = visdom.Visdom()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using PyTorch version:', torch.__version__, 'Device:', device)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

Using PyTorch version: 0.4.1 Device: cuda


In [16]:
class Net(nn.Module):
    def __init__(self, bathSize, outputDims):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()
        self.maxpool4 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, outputDims)
        
    def forward(self, input):
        layer1Conv = self.conv1(input)
        layer1 = self.maxpool1(self.relu1(self.bn1(layer1Conv)))
        layer2Conv = self.conv2(layer1)
        layer2 = self.maxpool2(self.relu2(self.bn2(layer2Conv)))
        layer3 = self.maxpool3(self.relu3(self.bn3(self.conv3(layer2))))
        layer4 = self.maxpool4(self.relu4(self.bn4(self.conv4(layer3))))
        layer5 = layer4.view(layer4.size(0), -1)
        layer5 = F.dropout(F.relu(self.fc1(layer5)), p=0.3, training=self.training)
        out = self.fc2(layer5)
        
        return F.log_softmax(out, dim=1), self.conv1.weight.data, self.conv2.weight.data, layer1Conv, layer2Conv    

In [3]:
def train(model, trainLoader, optimizer, epoch, logInterval=100):
    model.train()
    for batchIdx, (data, target) in enumerate(trainLoader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output, _, _, _, _ = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batchIdx % logInterval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
                  .format(epoch, batchIdx * len(data), len(trainLoader.dataset), 
                          100. * batchIdx / len(trainLoader), loss.item())
                 )
    return loss.item()

In [4]:
def test(model, testLoader, optimizer, confMat=None):
    model.eval()
    testLoss = 0.0
    correct = 0.0
    data = None
    with torch.no_grad():
        for data, target in testLoader:
            data, target = data.to(device), target.to(device)
            output, convWeights1, convWeights2, layer1, layer2 = model(data)
            testLoss += F.nll_loss(output, target).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
            if confMat is not None:
                for idx in range(target.size()[0]):
                    confMat[target[idx], pred[idx].item()] += 1
            
    testLoss /= len(testLoader.dataset)
    accuracy = 100 * (correct / len(testLoader.dataset))
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
          .format(testLoss, correct, len(testLoader.dataset), accuracy))
    return testLoss, accuracy, convWeights1, convWeights2, layer1, layer2, data

In [5]:
def visLine(yVal, xVal, lab, title, winLine=None):
    viz.line(X=np.array([xVal]), 
             Y=np.array([yVal]), 
             win=winLine, 
             name=lab,
             update='append',
             opts=dict(title=title,
                       showlegend=True)
            )
    
def visHeatMap(mat, title):
    ''' https://matplotlib.org/gallery/images_contours_and_fields/image_annotated_heatmap.html
    '''
    viz.heatmap(
        X=np.flipud(mat),
        opts=dict(
            columnnames=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'],
            rownames=['truck', 'ship', 'horse', 'frog', 'dog', 'deer', 'cat', 'bird', 'automobile', 'airplane'],
            title=title,
        ))

def visImageGrids(data, win, title):
    b, c, h, w = data.shape
    for idx in range(0,c,3):
        if idx == 0:
            grid = data[:,0:3,:,:]
        else:
            if (c%3 == 0 and idx+3 < c+1) or (idx+3 < c):
                grid = torch.cat((grid, data[:,idx:idx+3,:,:]))
            else:
                noPads = idx+3-c
                pad = torch.cat((data[:,idx:c,:,:], torch.zeros(b, noPads, h, w).to(device)),dim=1)
                grid = torch.cat((grid, pad))
    grid = F.interpolate(grid, size=([32,32]))
    nrows = min(int(grid.shape[0]/2),16)
    if win:
        win = viz.images(grid, nrow=nrows, padding=2, win=win, opts=dict(title=title))
    else:
        win = viz.images(grid, nrow=nrows, padding=2, opts=dict(title=title))
    return win

In [21]:
def evalModel(lr, opt, batchSize, outputDims, winLine):
    '''Helper function to evaluate the model
    '''
    model = Net(batchSize, outputDims).to(device)
        
    confusionMatrix = None
    winImg = None
    winFil1 = None
    winFil2 = None
    winConv1 = None
    winConv2 = None
    
    if opt == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.01)
    elif opt == 'ADAM':
        optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999), weight_decay=0.01)
        
    for epoch in range(1, epochs+1):
        trainLoss = train(model, trainLoader, optimizer, epoch, logInterval=500)
        
        if epoch == epochs:
            confusionMatrix = torch.zeros((10,10)).to(device)
        testLoss, testAcc, convWeights1, convWeights2, layer1Conv, layer2Conv, data = test(
            model, testLoader, optimizer, confusionMatrix)
        
        visLine(trainLoss, epoch-1, opt, 'Train Loss', winLine[0])
        visLine(testLoss, epoch-1, opt, 'Test Loss', winLine[1])
        visLine(testAcc, epoch-1, opt, 'Test Accuracy', winLine[2])
        
        if epoch == 1:
            winImg = visImageGrids(data[0:32,:,:,:], winImg, 'Test Images')
        winFil1 = visImageGrids(convWeights1, winFil1, 'Trained Layer1 Filters for ' + opt)
        winFil2 = visImageGrids(convWeights2, winFil2, 'Trained Layer2 Filters for ' + opt)
        winConv1 = visImageGrids(layer1Conv[0:32,:,:,:], winConv1, 'Layer1 Convolution output for ' + opt)
        winConv2 = visImageGrids(layer2Conv[0:32,:,:,:], winConv2, 'Layer2 Convolution output for ' + opt)
        
    visHeatMap(confusionMatrix, opt)
        
#         torch.save(model.state_dict(), "checkpoints/model_"+opt+'_'+reg+'_'+lossFn+'_'+nonLin+'.th')

In [31]:
batchSize = 32
testBatchSize = 256

trainLoader = torch.utils.data.DataLoader(datasets.CIFAR10('./data', 
                                                        train=True, 
                                                        download=True, 
                                                        transform=transforms.Compose([
                                                            transforms.Resize((64,64)),
                                                            transforms.RandomHorizontalFlip(),
                                                            transforms.ToTensor(),
                                                            transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
                                                        ])),
                                          batch_size=batchSize, 
                                          shuffle=True, num_workers=0)
testLoader = torch.utils.data.DataLoader(datasets.CIFAR10('./data', 
                                                        train=False, 
                                                        download=True, 
                                                        transform=transforms.Compose([
                                                            transforms.Resize((64,64)),
                                                            transforms.ToTensor(),
                                                            transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
                                                        ])), 
                                          batch_size=testBatchSize, 
                                          shuffle=False, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


In [32]:
lr = 0.0002
epochs = 35
outputDims = 10

optimizers = ['ADAM', 'SGD']

winLine=[]
winLine.append(viz.line(X=[0],Y=[0], name='ADAM', opts=dict(title='Training Loss', showlegend=True)))
winLine.append(viz.line(X=[0],Y=[0], name='ADAM', opts=dict(title='Test Loss', showlegend=True)))
winLine.append(viz.line(X=[0],Y=[0], name='ADAM', opts=dict(title='Test Accuracy', showlegend=True)))              


In [33]:
%%time

for idx in range(len(optimizers)):
    startTime = time.time()
    print('Optimizer = '+ optimizers[idx])
    evalModel(lr, optimizers[idx], batchSize, outputDims, winLine)
    endTime = time.time()
    print('Time required:{} minutes and {} seconds'.format(int((endTime-startTime)/60), 
                                                           int((endTime-startTime)%60)))
    print('-----------------------------------------')

Optimizer = ADAM

Test set: Average loss: 0.0041, Accuracy: 6302.0/10000 (63%)


Test set: Average loss: 0.0034, Accuracy: 7014.0/10000 (70%)


Test set: Average loss: 0.0032, Accuracy: 7251.0/10000 (73%)


Test set: Average loss: 0.0030, Accuracy: 7403.0/10000 (74%)


Test set: Average loss: 0.0029, Accuracy: 7512.0/10000 (75%)


Test set: Average loss: 0.0027, Accuracy: 7699.0/10000 (77%)


Test set: Average loss: 0.0026, Accuracy: 7728.0/10000 (77%)


Test set: Average loss: 0.0026, Accuracy: 7736.0/10000 (77%)


Test set: Average loss: 0.0024, Accuracy: 7887.0/10000 (79%)


Test set: Average loss: 0.0024, Accuracy: 7921.0/10000 (79%)


Test set: Average loss: 0.0023, Accuracy: 8073.0/10000 (81%)


Test set: Average loss: 0.0024, Accuracy: 7998.0/10000 (80%)


Test set: Average loss: 0.0025, Accuracy: 7854.0/10000 (79%)


Test set: Average loss: 0.0024, Accuracy: 7931.0/10000 (79%)


Test set: Average loss: 0.0022, Accuracy: 8025.0/10000 (80%)


Test set: Average loss: 0.0024, Accur


Test set: Average loss: 0.0021, Accuracy: 8196.0/10000 (82%)


Test set: Average loss: 0.0020, Accuracy: 8257.0/10000 (83%)


Test set: Average loss: 0.0022, Accuracy: 8134.0/10000 (81%)


Test set: Average loss: 0.0022, Accuracy: 8161.0/10000 (82%)

Time required:49 minutes and 2 seconds
-----------------------------------------
Optimizer = SGD

Test set: Average loss: 0.0046, Accuracy: 5940.0/10000 (59%)


Test set: Average loss: 0.0036, Accuracy: 6856.0/10000 (69%)


Test set: Average loss: 0.0033, Accuracy: 7230.0/10000 (72%)


Test set: Average loss: 0.0031, Accuracy: 7342.0/10000 (73%)


Test set: Average loss: 0.0031, Accuracy: 7376.0/10000 (74%)


Test set: Average loss: 0.0030, Accuracy: 7443.0/10000 (74%)


Test set: Average loss: 0.0030, Accuracy: 7372.0/10000 (74%)


Test set: Average loss: 0.0029, Accuracy: 7499.0/10000 (75%)


Test set: Average loss: 0.0027, Accuracy: 7683.0/10000 (77%)


Test set: Average loss: 0.0026, Accuracy: 7723.0/10000 (77%)


Test set: Average lo


Test set: Average loss: 0.0023, Accuracy: 7954.0/10000 (80%)


Test set: Average loss: 0.0022, Accuracy: 8111.0/10000 (81%)


Test set: Average loss: 0.0024, Accuracy: 7971.0/10000 (80%)


Test set: Average loss: 0.0024, Accuracy: 7872.0/10000 (79%)


Test set: Average loss: 0.0022, Accuracy: 8133.0/10000 (81%)


Test set: Average loss: 0.0022, Accuracy: 8156.0/10000 (82%)


Test set: Average loss: 0.0022, Accuracy: 8100.0/10000 (81%)


Test set: Average loss: 0.0022, Accuracy: 8009.0/10000 (80%)

Time required:41 minutes and 59 seconds
-----------------------------------------
Wall time: 1h 31min 1s


**Observations**: Augmenting the dataset by flipping the images and upscaling them to twice their original size improves the accuracy considerably. The overall performance using both SGD and ADAM were similar. However with Adam, the risk of overfitting/underfiiting seemed greater as the weight decay term needed a lot of tuning. The dropout was removed from the convolution layer, However without it, some of the filters do not seem to be learning anything useful and just produve a black box.