In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
%matplotlib inline

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]
)

batch_size = 128

cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(cifar_trainset, batch_size=batch_size, shuffle=True, num_workers=2)

cifar_testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(cifar_testset, batch_size=batch_size, shuffle=True, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
import torch.nn as nn
import torch.nn.functional as F

Problem 1 Part 1

In [5]:
class VGGBase(nn.Module):
    def __init__(self, num_classes=10):
        super(VGGBase, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [6]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = VGGBase(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)

In [7]:
loss_train_hist = []
loss_val_hist = []
acc_val_hist = []

In [8]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_hist.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_hist.append(vl)
    va = accuracy/total
    acc_val_hist.append(va)


Epoch [1/10], Step [391/391], Loss: 1.5173
Accuracy of the network on the 5000 validation images: 43.03 %
Epoch [2/10], Step [391/391], Loss: 1.2889
Accuracy of the network on the 5000 validation images: 56.01 %
Epoch [3/10], Step [391/391], Loss: 0.9392
Accuracy of the network on the 5000 validation images: 64.16 %
Epoch [4/10], Step [391/391], Loss: 0.9910
Accuracy of the network on the 5000 validation images: 66.16 %
Epoch [5/10], Step [391/391], Loss: 0.7537
Accuracy of the network on the 5000 validation images: 69.01 %
Epoch [6/10], Step [391/391], Loss: 0.8636
Accuracy of the network on the 5000 validation images: 70.75 %
Epoch [7/10], Step [391/391], Loss: 0.4740
Accuracy of the network on the 5000 validation images: 73.44 %
Epoch [8/10], Step [391/391], Loss: 0.7346
Accuracy of the network on the 5000 validation images: 74.86 %
Epoch [9/10], Step [391/391], Loss: 0.5961
Accuracy of the network on the 5000 validation images: 74.87 %
Epoch [10/10], Step [391/391], Loss: 0.5203
Ac

In [13]:
print("Training Loss: ", loss_train_hist)
print("Validation Loss: ", loss_val_hist)
print("Validation Accuracy: ", acc_val_hist)

Training Loss:  [1.8099597893712465, 1.2996049556890716, 1.0738227234776978, 0.9169404714003854, 0.8066265424499122, 0.7146925236410497, 0.6390998580724078, 0.5734735846214587, 0.5160492311810594, 0.4631518668225964]
Validation Loss:  [1.475914283643795, 1.1995902649963959, 1.005139410495758, 0.9557124263123621, 0.8846339251421675, 0.8322402027588857, 0.7678028578999676, 0.7359243495554864, 0.7303075405615794, 0.7439083683339855]
Validation Accuracy:  [0.4303, 0.5601, 0.6416, 0.6616, 0.6901, 0.7075, 0.7344, 0.7486, 0.7487, 0.7517]


In [14]:
pip install ptflops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Problem 1 Part 2

In [15]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [16]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = VGG16(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)

In [17]:
loss_train_hist16 = []
loss_val_hist16 = []
acc_val_hist16 = []

In [18]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_hist16.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_hist16.append(vl)
    va = accuracy/total
    acc_val_hist16.append(va)


Epoch [1/10], Step [391/391], Loss: 1.1424
Accuracy of the network on the 5000 validation images: 54.55 %
Epoch [2/10], Step [391/391], Loss: 0.8214
Accuracy of the network on the 5000 validation images: 66.4 %
Epoch [3/10], Step [391/391], Loss: 0.6931
Accuracy of the network on the 5000 validation images: 69.34 %
Epoch [4/10], Step [391/391], Loss: 0.6366
Accuracy of the network on the 5000 validation images: 72.45 %
Epoch [5/10], Step [391/391], Loss: 0.6355
Accuracy of the network on the 5000 validation images: 75.76 %
Epoch [6/10], Step [391/391], Loss: 0.5439
Accuracy of the network on the 5000 validation images: 75.54 %
Epoch [7/10], Step [391/391], Loss: 0.3708
Accuracy of the network on the 5000 validation images: 76.59 %
Epoch [8/10], Step [391/391], Loss: 0.4176
Accuracy of the network on the 5000 validation images: 72.28 %
Epoch [9/10], Step [391/391], Loss: 0.2320
Accuracy of the network on the 5000 validation images: 76.33 %
Epoch [10/10], Step [391/391], Loss: 0.0912
Acc

In [20]:
print("Training Loss: ", loss_train_hist16)
print("Validation Loss: ", loss_val_hist16)
print("Validation Accuracy: ", acc_val_hist16)

Training Loss:  [1.6073968171158715, 1.054281665845905, 0.803523164880855, 0.6265657282698794, 0.48996625021290596, 0.37401105974183974, 0.28904226862484844, 0.22014851367001034, 0.175840234093349, 0.13891570475857581]
Validation Loss:  [1.2398229731789119, 0.9484851639482039, 0.9088301258751109, 0.8253160964084577, 0.7384855788719805, 0.7837648995314972, 0.8048381035840964, 1.0485395072381707, 0.8852410407005986, 0.8627025998091395]
Validation Accuracy:  [0.5455, 0.664, 0.6934, 0.7245, 0.7576, 0.7554, 0.7659, 0.7228, 0.7633, 0.7792]


Problem 2 Part 1

In [21]:
class Inception(nn.Module):
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # Branch 1
        self.b1_1 = nn.LazyConv2d(c1, kernel_size=1)
        # Branch 2
        self.b2_1 = nn.LazyConv2d(c2[0], kernel_size=1)
        self.b2_2 = nn.LazyConv2d(c2[1], kernel_size=3, padding=1)
        # Branch 3
        self.b3_1 = nn.LazyConv2d(c3[0], kernel_size=1)
        self.b3_2 = nn.LazyConv2d(c3[1], kernel_size=5, padding=2)
        # Branch 4
        self.b4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.b4_2 = nn.LazyConv2d(c4, kernel_size=1)
        
    def forward(self, x):
        b1 = F.relu(self.b1_1(x))
        b2 = F.relu(self.b2_2(F.relu(self.b2_1(x))))
        b3 = F.relu(self.b3_2(F.relu(self.b3_1(x))))
        b4 = F.relu(self.b4_2(self.b4_1(x)))
        return torch.cat((b1, b2, b3, b4), dim=1)

In [22]:
class GoogleNet(nn.Module):
  def b1(self):
    return nn.Sequential(
              nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
              nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b2(self):
    return nn.Sequential(
        nn.LazyConv2d(64, kernel_size=1), nn.ReLU(),
        nn.LazyConv2d(192, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b3(self):
    return nn.Sequential(Inception(64, (96, 128), (16, 32), 32),
                         Inception(128, (128, 192), (32, 96), 64),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b4(self):
    return nn.Sequential(Inception(192, (96, 208), (16, 48), 64),
                         Inception(160, (112, 224), (24, 64), 64),
                         Inception(128, (128, 256), (24, 64), 64),
                         Inception(112, (144, 288), (32, 64), 64),
                         Inception(256, (160, 320), (32, 128), 128),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b5(self):
    return nn.Sequential(Inception(256, (160, 320), (32, 128), 128),
                         Inception(384, (192, 384), (48, 128), 128),
                         nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())
  def __init__(self, lr=0.1, num_classes=10):
    super(GoogleNet, self).__init__()
    self.model = nn.Sequential(self.b1(), self.b2(), self.b3(), self.b4(),
                             self.b5(), nn.LazyLinear(num_classes))
  def forward(self, x):
        return self.model(x)

In [23]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

# model = GoogleNet().to(device)
model = GoogleNet().model.to(device)

criterion = nn.CrossEntropyLoss()
criterion.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)



In [24]:
loss_train_histGN = []
loss_val_histGN = []
acc_val_histGN = []

In [25]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_histGN.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_histGN.append(vl)
    va = accuracy/total
    acc_val_histGN.append(va)

Epoch [1/10], Step [391/391], Loss: 2.3020
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [2/10], Step [391/391], Loss: 2.3042
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [3/10], Step [391/391], Loss: 2.3032
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [4/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [5/10], Step [391/391], Loss: 2.3029
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [6/10], Step [391/391], Loss: 2.3027
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [7/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [8/10], Step [391/391], Loss: 2.3025
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [9/10], Step [391/391], Loss: 2.3027
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [10/10], Step [391/391], Loss: 2.3028
Accuracy of

Problem 2 Part 2

In [26]:
class GoogleNet_BatchNorm(nn.Module):
  def b1(self):
    return nn.Sequential(
              nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
              nn.BatchNorm2d(64),
              nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b2(self):
    return nn.Sequential(
        nn.LazyConv2d(64, kernel_size=1), nn.BatchNorm2d(64), nn.ReLU(),
        nn.LazyConv2d(192, kernel_size=3, padding=1), nn.BatchNorm2d(192), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b3(self):
    return nn.Sequential(Inception(64, (96, 128), (16, 32), 32),
                         Inception(128, (128, 192), (32, 96), 64),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b4(self):
    return nn.Sequential(Inception(192, (96, 208), (16, 48), 64),
                         Inception(160, (112, 224), (24, 64), 64),
                         Inception(128, (128, 256), (24, 64), 64),
                         Inception(112, (144, 288), (32, 64), 64),
                         Inception(256, (160, 320), (32, 128), 128),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  def b5(self):
    return nn.Sequential(Inception(256, (160, 320), (32, 128), 128),
                         Inception(384, (192, 384), (48, 128), 128),
                         nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())
  def __init__(self, lr=0.1, num_classes=10):
    super(GoogleNet_BatchNorm, self).__init__()
    self.model = nn.Sequential(self.b1(), self.b2(), self.b3(), self.b4(),
                             self.b5(), nn.LazyLinear(num_classes))
  def forward(self, x):
        return self.model(x)

In [28]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = GoogleNet_BatchNorm().model.to(device)

criterion = nn.CrossEntropyLoss()
criterion.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)



In [29]:
loss_train_histGNBN = []
loss_val_histGNBN = []
acc_val_histGNBN = []

In [30]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_histGNBN.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_histGNBN.append(vl)
    va = accuracy/total
    acc_val_histGNBN.append(va)

Epoch [1/10], Step [391/391], Loss: 2.3019
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [2/10], Step [391/391], Loss: 2.3022
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [3/10], Step [391/391], Loss: 2.3021
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [4/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [5/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [6/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [7/10], Step [391/391], Loss: 2.3027
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [8/10], Step [391/391], Loss: 2.3027
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [9/10], Step [391/391], Loss: 2.3026
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [10/10], Step [391/391], Loss: 2.3028
Accuracy of

In [31]:
print("Training Loss: ", loss_train_histGNBN)
print("Validation Loss: ", loss_val_histGNBN)
print("Validation Accuracy: ", acc_val_histGNBN)

Training Loss:  [2.302711688649014, 2.302656273707709, 2.302641714320463, 2.3026312062197634, 2.3026287567889905, 2.3026301568121554, 2.302628259829548, 2.30263233733604, 2.3026280531188106, 2.3026290994775875]
Validation Loss:  [2.3026705180542377, 2.3026282002654255, 2.3025933126860028, 2.302576928199092, 2.3025879950463017, 2.302588094638873, 2.302588888361484, 2.3025862506673307, 2.302584968035734, 2.3025817086425007]
Validation Accuracy:  [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]


Problem 2 Part 1 - Attempt 2 (Still doesn't work)

In [None]:
class GoogleNetNew(nn.Module):
  def __init__(self, in_channels=3, num_classes=10):
    super(GoogleNetNew, self).__init__()

    self.conv1 = conv_block(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2, padding=3)

    self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.conv2 = conv_block(64, 192, kernel_size=3, stride=1, padding=1)
    self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception3a = InceptionNew(192, 64, 96, 128, 16, 32, 32)
    self.inception3b = InceptionNew(256, 128, 128, 192, 32, 96, 64)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception4a = InceptionNew(480, 192, 96, 208, 16, 48, 64)
    self.inception4b = InceptionNew(512, 160, 112, 224, 24, 64, 64)
    self.inception4c = InceptionNew(512, 128, 128, 256, 24, 64, 64)
    self.inception4d = InceptionNew(512, 112, 144, 288, 32, 64, 64)
    self.inception4e = InceptionNew(528, 256, 160, 320, 32, 128, 128)

    self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception5a = InceptionNew(832, 256, 160, 320, 32, 128, 128)
    self.inception5b = InceptionNew(832, 384, 192, 384, 48, 128, 128)
    self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
    self.dropout = nn.Dropout(p=0.4)
    self.fc1 = nn.Linear(1024, 1000)

  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool1(x)
    x = self.conv2(x)
    x = self.maxpool2(x)

    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)

    x = self.inception4a(x)
    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)
    x = self.inception4e(x)
    x = self.maxpool4(x)

    x = self.inception5a(x)
    x = self.inception5b(x)
    x = self.avgpool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.dropout(x)
    x = self.fc1(x)
    return x


class InceptionNew(nn.Module):
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
        super(InceptionNew, self).__init__()
        # Branch 1
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)
        # Branch 2
        self.branch2 = nn.Sequential(
            conv_block(in_channels, red_3x3, kernel_size=1),
            conv_block(red_3x3, out_3x3, kernel_size=3, padding=1)
        )
        # Branch 3
        self.branch3 = nn.Sequential(
            conv_block(in_channels, red_5x5, kernel_size=1),
            conv_block(red_5x5, out_5x5, kernel_size=5, padding=2)
        )
        # Branch 4
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            conv_block(in_channels, out_1x1pool, kernel_size=1)
        )
    def forward(self, x):
        return torch.cat([self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1)

class conv_block(nn.Module):
  def __init__(self, in_channels, out_channels, **kwargs):
    super(conv_block, self).__init__()
    self.relu = nn.ReLU()
    self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
    self.batchnorm = nn.BatchNorm2d(out_channels)

  def forward(self, x):
    return self.relu(self.batchnorm(self.conv(x)))

In [None]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = GoogleNetNew().to(device)

criterion = nn.CrossEntropyLoss()
criterion.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)

In [None]:
loss_train_histGNBN = []
loss_val_histGNBN = []
acc_val_histGNBN = []

In [None]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_hist16.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_hist16.append(vl)
    va = accuracy/total
    acc_val_hist16.append(va)

Problem 3 Part 1

In [32]:
class Residual(nn.Module):
    """The Residual block of ResNet models."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1,
                                   stride=strides)
        self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                                       stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

class ResNet(nn.Module):
    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.LazyBatchNorm2d(), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    def block(self, num_residuals, num_channels, first_block=False):
      blk = []
      for i in range(num_residuals):
          if i == 0 and not first_block:
              blk.append(Residual(num_channels, use_1x1conv=True, strides=2))
          else:
              blk.append(Residual(num_channels))
      return nn.Sequential(*blk)
    def __init__(self, arch, lr=0.1, num_classes=10):
      super(ResNet, self).__init__()
      self.net = nn.Sequential(self.b1())
      for i, b in enumerate(arch):
          self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
      self.net.add_module('last', nn.Sequential(
          nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
          nn.LazyLinear(num_classes)))
    def forward(self, x):
      return self.net(x)

In [33]:
class ResNet18(ResNet):
    def __init__(self, num_classes=10):
        super().__init__(((2, 64), (2, 128), (2, 256), (2, 512)), num_classes)

In [34]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = ResNet18().net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)

In [35]:
loss_train_histR18 = []
loss_val_histR18 = []
acc_val_histR18 = []

In [36]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_histR18.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_histR18.append(vl)
    va = accuracy/total
    acc_val_histR18.append(va)


Epoch [1/10], Step [391/391], Loss: 1.1079
Accuracy of the network on the 5000 validation images: 54.9 %
Epoch [2/10], Step [391/391], Loss: 1.2685
Accuracy of the network on the 5000 validation images: 59.83 %
Epoch [3/10], Step [391/391], Loss: 0.8017
Accuracy of the network on the 5000 validation images: 65.13 %
Epoch [4/10], Step [391/391], Loss: 0.7614
Accuracy of the network on the 5000 validation images: 67.06 %
Epoch [5/10], Step [391/391], Loss: 0.8245
Accuracy of the network on the 5000 validation images: 67.32 %
Epoch [6/10], Step [391/391], Loss: 0.6814
Accuracy of the network on the 5000 validation images: 65.93 %
Epoch [7/10], Step [391/391], Loss: 0.4910
Accuracy of the network on the 5000 validation images: 66.3 %
Epoch [8/10], Step [391/391], Loss: 0.6207
Accuracy of the network on the 5000 validation images: 66.94 %
Epoch [9/10], Step [391/391], Loss: 0.4444
Accuracy of the network on the 5000 validation images: 67.05 %
Epoch [10/10], Step [391/391], Loss: 0.2172
Accu

In [38]:
print("Training Loss: ", loss_train_histR18)
print("Validation Loss: ", loss_val_histR18)
print("Validation Accuracy: ", acc_val_histR18)

Training Loss:  [1.4365338596236674, 1.0555495563370492, 0.8613864973073115, 0.7174634493677817, 0.6004056185865037, 0.4920523040892218, 0.4047490801576458, 0.3291359624594374, 0.26939818000092225, 0.21808702782596773]
Validation Loss:  [1.2684783995906008, 1.1421971517273142, 0.9839184857621978, 0.9783547716804698, 0.9849975124190126, 1.0720300614079343, 1.1216949862015397, 1.1789754131172276, 1.1776849740668187, 1.212127279631699]
Validation Accuracy:  [0.549, 0.5983, 0.6513, 0.6706, 0.6732, 0.6593, 0.663, 0.6694, 0.6705, 0.6847]


Problem 3 Part 2

In [37]:
class ResNet26(ResNet):
    def __init__(self, num_classes=10):
        super().__init__(((6, 64), (6, 128), (2, 256), (2, 512)), num_classes)

In [40]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = ResNet26().net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)



In [42]:
loss_train_histR26 = []
loss_val_histR26 = []
acc_val_histR26 = []

In [43]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_histR26.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_histR26.append(vl)
    va = accuracy/total
    acc_val_histR26.append(va)


Epoch [1/10], Step [391/391], Loss: 0.9985
Accuracy of the network on the 5000 validation images: 54.56 %
Epoch [2/10], Step [391/391], Loss: 1.1778
Accuracy of the network on the 5000 validation images: 61.18 %
Epoch [3/10], Step [391/391], Loss: 0.8650
Accuracy of the network on the 5000 validation images: 62.57 %
Epoch [4/10], Step [391/391], Loss: 0.7603
Accuracy of the network on the 5000 validation images: 66.54 %
Epoch [5/10], Step [391/391], Loss: 0.5502
Accuracy of the network on the 5000 validation images: 67.98 %
Epoch [6/10], Step [391/391], Loss: 0.4132
Accuracy of the network on the 5000 validation images: 66.96 %
Epoch [7/10], Step [391/391], Loss: 0.6480
Accuracy of the network on the 5000 validation images: 67.17 %
Epoch [8/10], Step [391/391], Loss: 0.3760
Accuracy of the network on the 5000 validation images: 68.14 %
Epoch [9/10], Step [391/391], Loss: 0.3465
Accuracy of the network on the 5000 validation images: 68.4 %
Epoch [10/10], Step [391/391], Loss: 0.3671
Acc

In [48]:
print("Training Loss: ", loss_train_histR26)
print("Validation Loss: ", loss_val_histR26)
print("Validation Accuracy: ", acc_val_histR26)

Training Loss:  [1.4791089508234692, 1.0870997795973287, 0.8975861095406515, 0.7602958761517654, 0.6509163328601272, 0.5467354233002724, 0.4678100241190942, 0.3884990331919297, 0.3200858277280617, 0.2683706575876002]
Validation Loss:  [1.2665251765070082, 1.0872121332566949, 1.084966207606883, 0.9801287771780279, 0.9527796371073662, 1.0477795887597, 1.0796845879735826, 1.0914713951605786, 1.0928650226774095, 1.1798695959622347]
Validation Accuracy:  [0.5456, 0.6118, 0.6257, 0.6654, 0.6798, 0.6696, 0.6717, 0.6814, 0.684, 0.6805]


In [44]:
class ResNet32(ResNet):
    def __init__(self, num_classes=10):
        super().__init__(((8, 64), (6, 128), (4, 256), (4, 512)), num_classes)

In [45]:
#Defining hyperparameters again
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.01

model = ResNet32().net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

steps = len(trainloader)



In [46]:
loss_train_histR32 = []
loss_val_histR32 = []
acc_val_histR32 = []

In [47]:
for epoch in range(num_epochs):
  tl = 0
  vl = 0
  va = 0
  for i, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    tl = tl + loss.item()

  tl = tl/len(trainloader)
  loss_train_histR32.append(tl)
  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, steps, loss.item()))
  
  #Validation
  with torch.no_grad():
    accuracy = 0
    total = 0
    for images, labels in testloader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)

      loss = criterion(outputs, labels)
      vl += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      accuracy += (predicted == labels).sum().item()
      del images, labels, outputs

    print('Accuracy of the network on the {} validation images: {} %' .format(5000, 100*accuracy / total))
    vl = vl/len(testloader)
    loss_val_histR32.append(vl)
    va = accuracy/total
    acc_val_histR32.append(va)


Epoch [1/10], Step [391/391], Loss: 1.2615
Accuracy of the network on the 5000 validation images: 51.25 %
Epoch [2/10], Step [391/391], Loss: 1.0333
Accuracy of the network on the 5000 validation images: 56.48 %
Epoch [3/10], Step [391/391], Loss: 1.1554
Accuracy of the network on the 5000 validation images: 60.32 %
Epoch [4/10], Step [391/391], Loss: 0.7903
Accuracy of the network on the 5000 validation images: 64.55 %
Epoch [5/10], Step [391/391], Loss: 0.8654
Accuracy of the network on the 5000 validation images: 67.11 %
Epoch [6/10], Step [391/391], Loss: 0.7178
Accuracy of the network on the 5000 validation images: 67.07 %
Epoch [7/10], Step [391/391], Loss: 0.6093
Accuracy of the network on the 5000 validation images: 67.4 %
Epoch [8/10], Step [391/391], Loss: 0.5934
Accuracy of the network on the 5000 validation images: 66.46 %
Epoch [9/10], Step [391/391], Loss: 0.4350
Accuracy of the network on the 5000 validation images: 69.13 %
Epoch [10/10], Step [391/391], Loss: 0.4881
Acc

In [49]:
print("Training Loss: ", loss_train_histR32)
print("Validation Loss: ", loss_val_histR32)
print("Validation Accuracy: ", acc_val_histR32)

Training Loss:  [1.569885929222302, 1.1647049842588126, 0.9661072058141079, 0.8190585077571138, 0.7094748987413734, 0.6108789835744501, 0.520918270358649, 0.4359185559213009, 0.3718055216476436, 0.3178015544134028]
Validation Loss:  [1.3395561462716212, 1.2234503815445719, 1.118164963360074, 1.0259436761276632, 0.9712043208411977, 0.9755441418176964, 1.0267728635027438, 1.1149966807305058, 1.063697961312306, 1.2335963513277755]
Validation Accuracy:  [0.5125, 0.5648, 0.6032, 0.6455, 0.6711, 0.6707, 0.674, 0.6646, 0.6913, 0.6615]
