In [None]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt

import random

In [None]:
def set_all_seeds(RANDOM_SEED):
    random.seed(RANDOM_SEED)     # python random generator
    np.random.seed(RANDOM_SEED)  # numpy random generator

    torch.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed_all(RANDOM_SEED)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_all_seeds(42)

In [None]:
class SimpleCIFAR10Classifier(nn.Module):
    def __init__(self):
        super(SimpleCIFAR10Classifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, 5)
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.fc1   = nn.Linear(16*6*6, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [None]:
# useful libraries
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

##################
# YOUR CODE HERE #
##################

# adjust batch size to your need
batch_size = 

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
val_size = int(0.5 * len(testset))
test_size = len(testset) - val_size
valset, testset = torch.utils.data.random_split(testset, [val_size, test_size])


val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

print(len(trainset), len(valset), len(testset))

# (a)

In [None]:
net = SimpleCIFAR10Classifier().cuda()
INITIAL_LR = 0.01
MOMENTUM = 0.9
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM)

EPOCHS = 30
CHECKPOINT_FOLDER = "./saved_model"
best_val_acc = 0

# Training Loop
train_losses = []
val_losses = []
for i in range(0, EPOCHS):

    net.train()

    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    # Record training loss
    train_loss = 0 

    # Looping through training loader
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        ##################
        # YOUR CODE HERE #
        ##################

        # Send input and target to device

        # compute the model output logits and training loss
        
        # back propogation & optimizer update parametes

        # calculate predictions
        predictions = 
        correct_examples += 
        total_examples += inputs.shape[0]

    # calculate average training loss and accuracy
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    train_losses.append(avg_loss)

    
    # Evaluate the validation set performance
    net.eval()

    total_examples = 0
    correct_examples = 0

    # Record validation loss
    val_loss = 0

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            ##################
            # YOUR CODE HERE #
            ##################
            
            # Send input and target to device

            # compute the model output logits and training loss

            # count the number of correctly predicted samples in the current batch

    # calculate average validation loss and accuracy
    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    val_losses.append(avg_loss)
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc

        if not os.path.exists(CHECKPOINT_FOLDER):
           os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                'epoch': i,
                'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'best_model.bin'))

    print('')

print(f"Best validation accuracy: {best_val_acc:.4f}")

# (b)

In [None]:
##################
# YOUR CODE HERE #
##################

# load trained model weight

In [None]:
##################
# YOUR CODE HERE #
##################

# write another loop to evaluate trained model performance on the test split

In [1]:
##################
# YOUR CODE HERE #
##################

# visualize model loss curves (both train and loss)

# (C)

### L1 Regularization

In [None]:
INITIAL_LR = 0.01
net = SimpleCIFAR10Classifier().cuda()
MOMENTUM = 0.9
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM)

EPOCHS = 30
CHECKPOINT_FOLDER = "./saved_model"

##################
# YOUR CODE HERE #
##################

# set your own L1 regularization weight 
REG = 

# write training loops with L1 regularization and validation loops (Hint: similar to (a))

In [None]:
##################
# YOUR CODE HERE #
##################

# load trained model weight

In [None]:
##################
# YOUR CODE HERE #
##################

# write another loop to evaluate trained model performance on the test split

In [None]:
##################
# YOUR CODE HERE #
##################

# visualize model loss curves (both train and loss)

### Visualize the model weights

In [None]:
import matplotlib.pyplot as plt
for name, module in net.named_modules():
    if 'conv' in name or 'fc' in name:
        ##################
        # YOUR CODE HERE #
        ##################
      
        # extract weight from layers
        
    
        # Visualize the weights
        _ = plt.hist(weight, bins=20)
        plt.title("Weight histogram of layer "+name)
        plt.show()

# (d)

### L2 Regularization

In [None]:
INITIAL_LR = 0.01
net = SimpleCIFAR10Classifier().cuda()
MOMENTUM = 0.9
REG = 1e-3
criterion = nn.CrossEntropyLoss()
EPOCHS = 30
CHECKPOINT_FOLDER = "./saved_model"

##################
# YOUR CODE HERE #
##################

# set your own L2 regularization weight 
REG = 
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)

# write training loops with L2 regularization and validation loops (Hint: similar to (a))

In [None]:
##################
# YOUR CODE HERE #
##################

# load trained model weight

In [None]:
##################
# YOUR CODE HERE #
##################

# write another loop to evaluate trained model performance on the test split

In [None]:
##################
# YOUR CODE HERE #
##################

# visualize model loss curves (both train and loss)

### Visualize the model weights

In [None]:
import matplotlib.pyplot as plt
for name, module in net.named_modules():
    if 'conv' in name or 'fc' in name:
        ##################
        # YOUR CODE HERE #
        ##################
      
        # extract weight from layers
        
    
        # Visualize the weights
        _ = plt.hist(weight, bins=20)
        plt.title("Weight histogram of layer "+name)
        plt.show()

# (e)

In [None]:
# comment on the differences between L1 and L2 regularization. 