In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from torch.nn.functional import relu, softmax
import torch.optim as optim

In [None]:
class CNN(nn.Module):
    '''
    Implement network as a custom Module subclass
    '''
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=64,
                               kernel_size=11)

        self.maxpool = nn.MaxPool2d(kernel_size=2,
                                 stride=2)

        self.conv2 = nn.Conv2d(in_channels=64,
                               out_channels=128,
                               kernel_size=3)

        self.conv3 = nn.Conv2d(in_channels=128,
                               out_channels=128,
                               kernel_size=3)
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
        
        self.fc = nn.Linear(128, 10)
        
    def forward(self, x):
        
        # 1st set of layers -- Conv, ReLU, MaxPool
        x = self.conv1(x)
        x = relu(x)
        x = self.maxpool(x)
        
        # 2nd set of layers -- Conv, ReLU
        x = self.conv2(x)
        x = relu(x)
        
        # 3rd set of layers -- Conv, ReLU
        x = self.conv3(x)
        x = relu(x)
        
        # 4th set of layers -- AvgPool
        x = self.avgpool(x)
        x = x.view(-1, 128)
        
        # 5th set of layers -- FC Linear, softmax
        x = self.fc(x)
        x = softmax(x, dim=1)
        
        return x

# Transfer ConvNet onto GPU
device = torch.device("cuda:0")
net_original = CNN().to(device)

In [None]:
class CNN_bn(nn.Module):
    '''
    Implement ConvNet with batch normalization as a custom Module subclass
    '''
    def __init__(self):
        super(CNN_bn, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=64,
                               kernel_size=11)

        self.conv1_bn = nn.BatchNorm2d(num_features=64)

        self.maxpool = nn.MaxPool2d(kernel_size=2,
                                 stride=2)

        self.conv2 = nn.Conv2d(in_channels=64,
                               out_channels=128,
                               kernel_size=3)
        
        self.conv2_bn = nn.BatchNorm2d(128)

        self.conv3 = nn.Conv2d(in_channels=128,
                               out_channels=128,
                               kernel_size=3)
        
        self.conv3_bn = nn.BatchNorm2d(num_features=128)
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
        
        self.fc = nn.Linear(128, 10)
        
    def forward(self, x):
        
        # 1st set of layers -- Conv, ReLU, Batch Normalization, MaxPool
        x = self.conv1(x)
        x = relu(x)
        x = self.maxpool(x)
        x = self.conv1_bn(x)

        # 2nd set of layers -- Conv, ReLU, Batch Normalization
        x = self.conv2(x)
        x = relu(x)
        x = self.conv2_bn(x)
        
        # 3rd set of layers -- Conv, ReLU, Batch Normalization
        x = self.conv3(x)
        x = relu(x)
        x = self.conv3_bn(x)
        
        # 4th set of layers -- AvgPool
        x = self.avgpool(x)
        x = x.view(-1, 128)
        
        # 5th set of layers -- FC Linear, softmax
        x = self.fc(x)
        x = softmax(x, dim=1)
        
        return x

# Transfer ConvNet onto GPU
device = torch.device("cuda:0")
net_bn = CNN_bn().to(device)

In [None]:
class CNN_deep(nn.Module):
    '''
    Implement deeper ConvNet as a custom Module subclass
    '''
    def __init__(self):
        super(CNN_deep, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=64,
                               kernel_size=3)

        self.conv1_bn = nn.BatchNorm2d(num_features=64)

        self.maxpool = nn.MaxPool2d(kernel_size=2,
                                    stride=2)

        self.conv2 = nn.Conv2d(in_channels=64,
                               out_channels=128,
                               kernel_size=3)
        
        self.conv2_bn = nn.BatchNorm2d(num_features=128)

        self.conv3 = nn.Conv2d(in_channels=128,
                               out_channels=128,
                               kernel_size=3)
        
        self.conv3_bn = nn.BatchNorm2d(num_features=128)
        
        self.conv4 = nn.Conv2d(in_channels=128,
                               out_channels=256,
                               kernel_size=3)
        
        self.conv4_bn = nn.BatchNorm2d(num_features=256)

        self.conv5 = nn.Conv2d(in_channels=256,
                               out_channels=256,
                               kernel_size=3)
        
        self.conv5_bn = nn.BatchNorm2d(num_features=256)

        self.conv6 = nn.Conv2d(in_channels=256,
                               out_channels=512,
                               kernel_size=3)
        
        self.conv6_bn = nn.BatchNorm2d(num_features=512)

        self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)

        self.fc1 = nn.Linear(512, 256)
        
        self.fc2 = nn.Linear(256, 128)
        
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        
        # 1st convolutional layer -- Conv, ReLU, Batch Normalization, MaxPool
        x = self.conv1(x)
        x = relu(x)
        x = self.conv1_bn(x)
        x = self.maxpool(x)
        
        # 2nd convolutional layer -- Conv, ReLU, Batch Normalization
        x = self.conv2(x)
        x = relu(x)
        x = self.conv2_bn(x)
        
        # 3rd convolutional layer -- Conv, ReLU, Batch Normalization
        x = self.conv3(x)
        x = relu(x)
        x = self.conv3_bn(x)

        # 4th convolutional layer -- Conv, ReLU, Batch Normalization
        x = self.conv4(x)
        x = relu(x)
        x = self.conv4_bn(x)

        # 5th convolutional layer -- Conv, ReLU, Batch Normalization
        x = self.conv5(x)
        x = relu(x)
        x = self.conv5_bn(x)

        # 6th convolutional layer -- Conv, ReLU, Batch Normalization
        x = self.conv6(x)
        x = relu(x)
        x = self.conv6_bn(x)

        # AvgPool layer
        x = self.avgpool(x)
        x = x.view(-1, 512)

        # 1st fully connected layers -- FC Linear, ReLU
        x = self.fc1(x)
        x = relu(x)

        # 2nd fully connected layers -- FC Linear, ReLU
        x = self.fc2(x)
        x = relu(x)

        # 3rd set of layers -- FC Linear, softmax
        x = self.fc3(x)
        x = relu(x)
        x = softmax(x, dim=1)
        
        return x

# Transfer ConvNet onto GPU
device = torch.device("cuda:0")
net_deep = CNN_deep().to(device)

In [None]:
# Define hyper-parameters
batch_size = 30
num_epochs = 150
lr = 0.009
momentum = 0.9

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
def trainModel(net, trainloader, lr, momentum, num_epochs):

  # Define loss function
  criterion = nn.CrossEntropyLoss()

  # Define optimization algorithm
  optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

  train_loss = []
  for epoch in range(num_epochs):  # Loop over the dataset

      running_loss = []
      for i, data in enumerate(trainloader, 0):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data
          inputs = inputs.to(device)
          labels = labels.to(device)

          # Zero the parameter gradients
          optimizer.zero_grad()

          # Pass input data through model (forward pass)
          outputs = net(inputs)
          
          # Compute loss (cross-entropy loss)
          loss = criterion(outputs, labels)

          # Compute gradients (backward pass)
          loss.backward()

          # Update parameters (using computed gradients)
          optimizer.step()
          running_loss.append(loss.item())

      # Store training loss
      train_loss.append(np.mean(running_loss))
      print('epoch {} | loss: {}'.format(epoch,
                                         np.mean(running_loss)))

  print('Finished Training')

  return net, outputs, labels, train_loss

In [None]:
# Train ConvNet w/o batch normalization
net, outputs, labels, train_loss = trainModel(net=net_original, trainloader=trainloader, lr=lr, momentum=momentum, num_epochs=num_epochs)

epoch 0 | loss: 2.238338841030393
epoch 1 | loss: 2.124000420095539
epoch 2 | loss: 2.0721957473320094
epoch 3 | loss: 2.0408985536114215
epoch 4 | loss: 2.0179495166430734
epoch 5 | loss: 1.9942553534647913
epoch 6 | loss: 1.9756645047171215
epoch 7 | loss: 1.9511720357573383
epoch 8 | loss: 1.933359710342096
epoch 9 | loss: 1.9154285520011247
epoch 10 | loss: 1.9051857468748636
epoch 11 | loss: 1.8918812791959354
epoch 12 | loss: 1.8813163520478888
epoch 13 | loss: 1.872353206656261
epoch 14 | loss: 1.861965880039286
epoch 15 | loss: 1.8551467126475598
epoch 16 | loss: 1.8465558678072658
epoch 17 | loss: 1.8403626345701396
epoch 18 | loss: 1.8302381572854969
epoch 19 | loss: 1.824571268817373
epoch 20 | loss: 1.8173769747488644
epoch 21 | loss: 1.8101246356964111
epoch 22 | loss: 1.8038389717095187
epoch 23 | loss: 1.7984956407804438
epoch 24 | loss: 1.7908304373375585
epoch 25 | loss: 1.786528922014059
epoch 26 | loss: 1.7830328309900687
epoch 27 | loss: 1.778037604058511
epoch 28 |

In [None]:
# Train ConvNet w/ batch normalization
net, outputs, labels, train_loss = trainModel(net=net_bn, trainloader=trainloader, lr=lr, momentum=momentum, num_epochs=num_epochs)

In [None]:
# Train ConvNet w/ deep architecture
net, outputs, labels, train_loss = trainModel(net=net_deep, trainloader=trainloader, lr=lr, momentum=momentum, num_epochs=num_epochs)

In [None]:
# Visualize filters
filters = net.conv1.weight.cpu().detach().numpy()

In [None]:
fig = plt.figure(figsize=(16, 4))
for i in range(64):
  x = filters[i, :, :, :].transpose((1, 2, 0))
  x = (x - x.min()) / (x - x.min()).max()
  ax = fig.add_subplot(4, 16, i+1)
  ax.axis('off')
  ax.imshow(x)

In [None]:
def displayLoss(train_loss):
    
    plt.plot(range(len(train_loss)), train_loss, color='red', label='Train')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.show()

displayLoss(train_loss)

In [None]:
# Test network on testing data

correct = 0.
total = 0.
with torch.no_grad():
    for data in testloader:
        test_images, test_labels = data
        test_images = test_images.to(device)
        test_labels = test_labels.to(device)
        test_outputs = net(test_images)
        
        # Compute predicted labels
        _, predicted = torch.max(test_outputs.data, 1)
        
        # Update running totals of correct predictions and total predictions
        total += test_labels.size(0)
        correct += (predicted == test_labels).sum().item()

test_acc = correct / total
print('Test Accuracy: {}'.format(test_acc))