In [2]:
import imageio
import torch
from PIL import Image
from torchvision import transforms
import collections
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets
data_path = '../data-unversioned/p1ch7/'
torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7fa0be89b8f0>

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

device

device(type='cuda')

In [4]:
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915,0.4823,0.4468),
                          (0.2470,0.2435,0.2616))
]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915,0.4823,0.4468),
                          (0.2470,0.2435,0.2616))
]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ../data-unversioned/p1ch7/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/
Files already downloaded and verified


In [5]:
import datetime
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)
      outputs = model(imgs)
      loss = loss_fn(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()
    print('{} Epoch {}, Training loss {}'.format(
    datetime.datetime.now(), epoch,
    loss_train / len(train_loader)))

In [6]:
def validate(model, train_loader, val_loader):
  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad():
      for imgs, labels in loader:
          imgs, labels = imgs.to(device), labels.to(device)
          batch_size = imgs.shape[0]
          outputs = model(imgs)
          _, predicted = torch.max(outputs, dim=1)
          total += labels.shape[0]
          correct += int((predicted == labels).sum())
    print("Accuracy {}: {:.2f}".format(name , correct / total))

In [None]:
##############################################################################
############################## Problem 1 Part 1 ##############################
##############################################################################

In [None]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)
    self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)
    self.fc1 = nn.Linear(8 * 8 * 8, 32)
    self.act3 = nn.Tanh()
    self.fc2 = nn.Linear(32, 10)

  def forward(self, x):
    out = self.pool1(self.act1(self.conv1(x)))
    out = self.pool2(self.act2(self.conv2(out)))
    out = out.view(-1, 8 * 8 * 8)
    out = self.act3(self.fc1(out))
    out = self.fc2(out)
    return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=True)
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
training_loop(
  n_epochs = 300,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)

2022-03-29 14:05:30.011782 Epoch 1, Training loss 2.02721984261442
2022-03-29 14:05:40.087972 Epoch 2, Training loss 1.7497853926380578
2022-03-29 14:05:50.107808 Epoch 3, Training loss 1.5772642540504864
2022-03-29 14:05:59.863208 Epoch 4, Training loss 1.4792075047407613
2022-03-29 14:06:09.793509 Epoch 5, Training loss 1.4073018411846112
2022-03-29 14:06:19.359287 Epoch 6, Training loss 1.3408892368111769
2022-03-29 14:06:28.925203 Epoch 7, Training loss 1.2817198612805827
2022-03-29 14:06:38.259927 Epoch 8, Training loss 1.2311831129633861
2022-03-29 14:06:48.023221 Epoch 9, Training loss 1.1906220897689195
2022-03-29 14:06:57.645081 Epoch 10, Training loss 1.157154878691944
2022-03-29 14:07:07.445828 Epoch 11, Training loss 1.128880636313992
2022-03-29 14:07:17.228615 Epoch 12, Training loss 1.1048525478833777
2022-03-29 14:07:26.969562 Epoch 13, Training loss 1.0835032950124472
2022-03-29 14:07:36.476041 Epoch 14, Training loss 1.0672177808059147
2022-03-29 14:07:46.004019 Epoch 

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.62


In [None]:
##############################################################################
############################## Problem 1 Part 2 ##############################
##############################################################################

In [None]:
class Net2(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)
    self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)
    self.conv3 = nn.Conv2d(8, 4, kernel_size=3, padding=1)
    self.act3 = nn.Tanh()
    self.pool3 = nn.MaxPool2d(2)
    self.fc1 = nn.Linear(4 * 4 * 4, 32)
    self.act3 = nn.Tanh()
    self.fc2 = nn.Linear(32, 10)

  def forward(self, x):
    out = self.pool1(self.act1(self.conv1(x)))
    out = self.pool2(self.act2(self.conv2(out)))
    out = self.pool3(self.act3(self.conv3(out)))
    out = out.view(-1, 4 * 4 * 4)
    out = self.act3(self.fc1(out))
    out = self.fc2(out)
    return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=True)
model2 = Net2().to(device=device)
optimizer = optim.SGD(model2.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
training_loop(
  n_epochs = 300,
  optimizer = optimizer,
  model = model2,
  loss_fn = loss_fn,
  train_loader = train_loader,
)

2022-03-29 15:04:07.458859 Epoch 1, Training loss 2.1719703678889655
2022-03-29 15:04:17.448987 Epoch 2, Training loss 1.9653784640304877
2022-03-29 15:04:27.206111 Epoch 3, Training loss 1.8669427848228104
2022-03-29 15:04:37.033651 Epoch 4, Training loss 1.7318873138683837
2022-03-29 15:04:47.020100 Epoch 5, Training loss 1.6289830893811668
2022-03-29 15:04:56.900992 Epoch 6, Training loss 1.5667944687711612
2022-03-29 15:05:06.923919 Epoch 7, Training loss 1.5080518550275233
2022-03-29 15:05:16.907148 Epoch 8, Training loss 1.4609321005204146
2022-03-29 15:05:26.959439 Epoch 9, Training loss 1.420656356512738
2022-03-29 15:05:36.942796 Epoch 10, Training loss 1.390820705143692
2022-03-29 15:05:46.687221 Epoch 11, Training loss 1.366698873012572
2022-03-29 15:05:56.513970 Epoch 12, Training loss 1.3431210291507605
2022-03-29 15:06:06.577681 Epoch 13, Training loss 1.323814850100471
2022-03-29 15:06:16.688604 Epoch 14, Training loss 1.3040657347577918
2022-03-29 15:06:26.639626 Epoch 

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model2, train_loader, val_loader)

Accuracy train: 0.68
Accuracy val: 0.64


In [None]:
###############################################################################
################################## Problem 2 ##################################
###############################################################################

In [None]:
###############################################################################
################################## ResNet-10 ##################################
###############################################################################

In [7]:
all_acc_dict = collections.OrderedDict()

In [8]:
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)  # <1>
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [9]:
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=True)
model3 = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model3.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model3,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-29 21:38:44.193684 Epoch 1, Training loss 1.658337791252624
2022-03-29 21:39:02.534176 Epoch 2, Training loss 1.3571581625572555
2022-03-29 21:39:20.699279 Epoch 3, Training loss 1.2195725649823923
2022-03-29 21:39:39.476839 Epoch 4, Training loss 1.1272219763997267
2022-03-29 21:39:58.298072 Epoch 5, Training loss 1.061702290459362
2022-03-29 21:40:17.279771 Epoch 6, Training loss 1.0111957640599107
2022-03-29 21:40:36.270593 Epoch 7, Training loss 0.9623677023231526
2022-03-29 21:40:54.765563 Epoch 8, Training loss 0.923695315256753
2022-03-29 21:41:15.184420 Epoch 9, Training loss 0.890067790780226
2022-03-29 21:41:37.911143 Epoch 10, Training loss 0.862966438922126
2022-03-29 21:42:00.051882 Epoch 11, Training loss 0.8361911259953628
2022-03-29 21:42:18.858741 Epoch 12, Training loss 0.8104548426464085
2022-03-29 21:42:37.747499 Epoch 13, Training loss 0.7847263403332142
2022-03-29 21:42:56.658348 Epoch 14, Training loss 0.7658165612870165
2022-03-29 21:43:17.108037 Epoch 1

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
all_acc_dict["ResNet-10"] = validate(model3, train_loader, val_loader)

Accuracy train: 1.00
Accuracy val: 0.66


In [None]:
##############################################################################
################################ Weight Decay ################################
##############################################################################

In [16]:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=True)
model4 = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model4.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs = 300,
    optimizer = optimizer,
    model = model4,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-29 18:39:03.292240 Epoch 1, Training loss 1.744786892095795
2022-03-29 18:41:48.850814 Epoch 10, Training loss 0.978222859180187
2022-03-29 18:44:57.207363 Epoch 20, Training loss 0.7765586514530889
2022-03-29 18:48:08.945988 Epoch 30, Training loss 0.6537537234823417
2022-03-29 18:51:22.857520 Epoch 40, Training loss 0.5563301177662047
2022-03-29 18:54:38.924277 Epoch 50, Training loss 0.48478534661443035
2022-03-29 18:57:55.105954 Epoch 60, Training loss 0.4269886523904398
2022-03-29 19:01:11.653334 Epoch 70, Training loss 0.38006199002647034
2022-03-29 19:04:27.492654 Epoch 80, Training loss 0.344297031860065
2022-03-29 19:07:44.435463 Epoch 90, Training loss 0.3161183579460434
2022-03-29 19:10:59.820952 Epoch 100, Training loss 0.2908612041331618
2022-03-29 19:14:16.209084 Epoch 110, Training loss 0.2808811947932024
2022-03-29 19:17:33.605346 Epoch 120, Training loss 0.27060127115386834
2022-03-29 19:20:49.760146 Epoch 130, Training loss 0.2374589684254983
2022-03-29 19:24:

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
all_acc_dict["ResNet-10 Weight Decay"] = validate(model4, train_loader, val_loader)

Accuracy train: 0.94
Accuracy val: 0.65


In [None]:
##############################################################################
################################## Drop out ##################################
##############################################################################

In [10]:
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.3)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.conv2_dropout = nn.Dropout2d(p=0.3)
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = self.conv2_dropout(out)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [12]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=True)
model5 = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model5.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model5,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 18:40:44.608581 Epoch 1, Training loss 1.9623859780828665
2022-03-30 18:40:58.607080 Epoch 2, Training loss 1.6857953951181963
2022-03-30 18:41:12.470486 Epoch 3, Training loss 1.5873234758291708
2022-03-30 18:41:26.322761 Epoch 4, Training loss 1.5200826545505572
2022-03-30 18:41:40.095934 Epoch 5, Training loss 1.4730018250777592
2022-03-30 18:41:54.026551 Epoch 6, Training loss 1.426801205900929
2022-03-30 18:42:08.018490 Epoch 7, Training loss 1.3862354458140596
2022-03-30 18:42:21.776041 Epoch 8, Training loss 1.3533177811014072
2022-03-30 18:42:35.579975 Epoch 9, Training loss 1.327164780377122
2022-03-30 18:42:49.513873 Epoch 10, Training loss 1.302169436002936
2022-03-30 18:43:03.286723 Epoch 11, Training loss 1.270544777669565
2022-03-30 18:43:17.041397 Epoch 12, Training loss 1.25134338053596
2022-03-30 18:43:31.036857 Epoch 13, Training loss 1.236553414093564
2022-03-30 18:43:44.864580 Epoch 14, Training loss 1.2181026420324965
2022-03-30 18:43:58.628264 Epoch 15,

In [13]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
all_acc_dict["ResNet-10 Weight Decay"] = validate(model5, train_loader, val_loader)

Accuracy train: 0.76
Accuracy val: 0.67
