In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections
import datetime
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

#setting device type
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [2]:
#Setting up the data needed
class_names = ['airplane','automobile','bird','cat','deer',
'dog','frog','horse','ship','truck']
#getting data
data_path="C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py"
tensor_cifar10 = datasets.CIFAR10(data_path, train=True, download=True, 
                                  transform=transforms.Compose([
                                                                transforms.ToTensor(),
                                                                transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))]))
imgs = torch.stack([img_t for img_t, _ in tensor_cifar10], dim=3)
#imgs.view(3, -1).mean(dim=1) => tensor([0.4914, 0.4822, 0.4465])
#imgs.view(3, -1).std(dim=1) => tensor([0.2470, 0.2435, 0.2616])

tensor_cifar10Val = datasets.CIFAR10(data_path, train=False, download=False, 
                                     transform=transforms.Compose([
                                                                   transforms.ToTensor(),
                                                                   transforms.Normalize((0.4942, 0.4851, 0.4504), (0.2467, 0.2429, 0.2616))]))
imgsV = torch.stack([img_t for img_t, _ in tensor_cifar10Val], dim=3)
#imgsV.view(3, -1).mean(dim=1) => tensor([0.4942, 0.4851, 0.4504])
#imgsV.view(3, -1).std(dim=1) => tensor([0.2467, 0.2429, 0.2616])


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py/cifar-10-python.tar.gz to C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py


In [3]:
#Defining training loop
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    training_start_time = time.time()
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))
    print('Training finished, took {:.2f}s'.format(time.time() - training_start_time))

#Defining function to validate accuracy
def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

In [4]:
#Setting up Dataloader
#used for training, shuffle and oraganize data in minibatches
train_loader = torch.utils.data.DataLoader(tensor_cifar10, batch_size=64,shuffle=True)
#used for accuracy measurement
val_loader = torch.utils.data.DataLoader(tensor_cifar10Val, batch_size=64, shuffle=False)
acc_train_loader = torch.utils.data.DataLoader(tensor_cifar10, batch_size=64, shuffle=False)

In [None]:
#Problem 1
#Defineing model for Problem 1a
#num items in dataset=50000
#input 32 × 32 × 3: 3,072
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) #3x16x3x3
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1) #16x8x3x3
        self.fc1 = nn.Linear(8 * 8 * 8, 32) #8x8x8x32
        self.fc2 = nn.Linear(32, 10) #32x2
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

model = Net().to(device=device)
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(18354, [432, 16, 1152, 8, 16384, 32, 320, 10])

In [None]:
#training model for 1a
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-28 20:00:34.111138 Epoch 1, Training loss 2.037649637597906
2022-03-28 20:02:59.528904 Epoch 10, Training loss 1.219499758289903
2022-03-28 20:05:41.408326 Epoch 20, Training loss 1.0314267439305629
2022-03-28 20:08:23.926600 Epoch 30, Training loss 0.9454076143024522
2022-03-28 20:11:05.828766 Epoch 40, Training loss 0.8827333868769429
2022-03-28 20:13:47.833637 Epoch 50, Training loss 0.8358079930553046
2022-03-28 20:16:29.723876 Epoch 60, Training loss 0.800618650625124
2022-03-28 20:19:11.541624 Epoch 70, Training loss 0.7716244308235091
2022-03-28 20:21:53.670740 Epoch 80, Training loss 0.7481452699207589
2022-03-28 20:24:35.477998 Epoch 90, Training loss 0.7264577435410541
2022-03-28 20:27:17.204272 Epoch 100, Training loss 0.709950475512868
2022-03-28 20:29:59.024475 Epoch 110, Training loss 0.6931796381845499
2022-03-28 20:32:40.918900 Epoch 120, Training loss 0.6792315020585609
2022-03-28 20:35:22.580519 Epoch 130, Training loss 0.6654227005551233
2022-03-28 20:38:04.2

In [None]:
#Checking accuracy
validate(model,  acc_train_loader, val_loader)

Accuracy train: 0.82
Accuracy val: 0.61


{'train': 0.81714, 'val': 0.6067}

In [None]:
#Defineing model for Problem 1b
#adding one more additional convolution layer followed by an activation function and pooling function
class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) #3x16x3x3
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1) #16x8x3x3
        self.conv3= nn.Conv2d(8, 4, kernel_size=3, padding=1) #8x4x3x3
        self.fc1 = nn.Linear(4 * 4 * 4, 32) #4x4x4x32
        self.fc2 = nn.Linear(32, 10) #32x2
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = F.max_pool2d(torch.tanh(self.conv3(out)), 2)
        out = out.view(-1, 4 * 4 * 4)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

model = Net2().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-28 23:44:59.857119 Epoch 1, Training loss 2.2378833370135567
2022-03-28 23:49:51.624658 Epoch 10, Training loss 1.4104722114780066
2022-03-28 23:55:18.941314 Epoch 20, Training loss 1.23311075751129
2022-03-29 00:00:46.131594 Epoch 30, Training loss 1.137078295735752
2022-03-29 00:06:09.797377 Epoch 40, Training loss 1.076214544913348
2022-03-29 00:11:33.748397 Epoch 50, Training loss 1.0381936994202607
2022-03-29 00:16:59.196022 Epoch 60, Training loss 1.012745030700703
2022-03-29 00:22:23.622945 Epoch 70, Training loss 0.994526516522288
2022-03-29 00:27:47.650065 Epoch 80, Training loss 0.9775165048096796
2022-03-29 00:33:12.822729 Epoch 90, Training loss 0.9680480797729833
2022-03-29 00:38:36.914658 Epoch 100, Training loss 0.955812995619786
2022-03-29 00:44:02.114940 Epoch 110, Training loss 0.9474385941729826
2022-03-29 00:49:27.557628 Epoch 120, Training loss 0.9435525353607315
2022-03-29 00:54:52.458864 Epoch 130, Training loss 0.9341267847344089
2022-03-29 01:00:18.0106

In [None]:
validate(model,  acc_train_loader, val_loader)

Accuracy train: 0.69
Accuracy val: 0.65


{'train': 0.68948, 'val': 0.6514}

In [None]:
#size of model for 1b
model = Net2().to(device=device)
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(4310, [432, 16, 1152, 8, 288, 4, 2048, 32, 320, 10])

a. Build a ResNet based Convolutional Neural Network, like what we built in lectures (with skip connections), to classify the images across all 10 classes in CIFAR 10. For this problem, let’s use 10 blocks for ResNet and call it ResNet-10. Use the similar dimensions and channels as we need in lectures. Train your network for 300 epochs. Report your training time, training loss, and evaluation accuracy after 300 epochs. Analyze your results in your report and compare them against problem 1.b on training time, achieved accuracy, and model size. Make sure to submit your code by providing the GitHub URL of your course repository for this course.

b. Develop three additional trainings and evaluations for your ResNet-10 to assess the impacts of regularization to your ResNet-10.
*   Weight Decay with lambda of 0.001
*   Dropout with p=0.3
*   Batch Normalization

Report and compare your training time, training loss, and evaluation accuracy after 300 epochs across these three different trainings. Analyze your results in your report and compare them against problem 1.a on training time, achieved accuracy.

In [5]:
#Problem 2
#Defining model for 2a
#ResNet block
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias=False)
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight, nonlinearity='relu')
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

#ResNet model
class ResNet10(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(*(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out


model = ResNet10(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76074, [864, 32, 9216, 32, 32, 65536, 32, 320, 10])

In [None]:
#Training model
training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

In [None]:
#ResNet model for adding dropout
class ResNet10D(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.3)
        self.resblocks = nn.Sequential(*(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.conv2_dropout = nn.Dropout2d(p=0.3)
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = self.conv2_dropout(out)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

        
model = ResNet10D(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

#Training model
training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 10:55:09.385066 Epoch 1, Training loss 2.2135498814875514
2022-03-30 11:01:14.435248 Epoch 10, Training loss 1.4941980697004997
2022-03-30 11:07:34.511921 Epoch 20, Training loss 1.3449654826117903
2022-03-30 11:13:41.352108 Epoch 30, Training loss 1.2511763698457148
2022-03-30 11:19:47.541712 Epoch 40, Training loss 1.187286528632464
2022-03-30 11:25:54.821187 Epoch 50, Training loss 1.134730988832386
2022-03-30 11:32:02.625592 Epoch 60, Training loss 1.0996057735684583
2022-03-30 11:38:11.773637 Epoch 70, Training loss 1.0700059938613715
2022-03-30 11:44:18.092775 Epoch 80, Training loss 1.0463382771710301
2022-03-30 11:50:23.605515 Epoch 90, Training loss 1.0219793007959186
2022-03-30 11:56:25.491374 Epoch 100, Training loss 1.0015018986314155
2022-03-30 12:02:26.740992 Epoch 110, Training loss 0.987823931609883
2022-03-30 12:08:28.021304 Epoch 120, Training loss 0.974506317349651
2022-03-30 12:14:27.057263 Epoch 130, Training loss 0.9602330915458367
2022-03-30 12:20:30.0

In [None]:
validate(model,  acc_train_loader, val_loader)

Accuracy train: 0.70
Accuracy val: 0.64


{'train': 0.70408, 'val': 0.6355}

In [None]:
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76074, [864, 32, 9216, 32, 32, 65536, 32, 320, 10])

In [6]:
#ResNet model for adding batch normalization
class ResNet10BN(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.resblocks = nn.Sequential(*(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.relu(out), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

model = ResNet10BN(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76138, [864, 32, 32, 32, 9216, 32, 32, 65536, 32, 320, 10])

In [None]:
#Training model
training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 17:28:19.998771 Epoch 1, Training loss 1.8615856202667023
2022-03-30 17:34:42.429652 Epoch 10, Training loss 1.0351223790889506
2022-03-30 17:41:42.935345 Epoch 20, Training loss 0.8582049342601196
2022-03-30 17:48:41.714296 Epoch 30, Training loss 0.750310875799345
2022-03-30 17:55:45.754845 Epoch 40, Training loss 0.6663046671301508
2022-03-30 18:02:50.534087 Epoch 50, Training loss 0.5943967440853948
2022-03-30 18:09:49.959882 Epoch 60, Training loss 0.5321763424998354
2022-03-30 18:16:49.257867 Epoch 70, Training loss 0.47591287890434875
2022-03-30 18:23:48.882848 Epoch 80, Training loss 0.4195062935619098
2022-03-30 18:30:47.592010 Epoch 90, Training loss 0.37314048491399304
2022-03-30 18:37:46.155392 Epoch 100, Training loss 0.327264652218279
2022-03-30 18:44:45.462162 Epoch 110, Training loss 0.2852976848097408
2022-03-30 18:51:46.651340 Epoch 120, Training loss 0.24660135284447304
2022-03-30 18:58:52.855944 Epoch 130, Training loss 0.21510835833218703
2022-03-30 19:0

In [None]:
#ResNet training for adding weight decay
#Use ResNet10 model
def training_loop_WD(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))
            
model =  ResNet10(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
#Size of model
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76074, [864, 32, 9216, 32, 32, 65536, 32, 320, 10])

In [None]:
training_loop_WD(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)