# Computer Vision Homework 3: Big vs Small Models

## Brief

Due date: Nov 16, 2022

Required files: `homework-3.ipynb`, `report.pdf`

To download the jupyter notebook from colab, you can refer to the colab tutorial we gave.


## Codes for Problem 1 and Problem 2

### Import Packages

In [None]:
import glob
import os
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torch.utils.data import DataLoader, Dataset, RandomSampler
from torchvision import transforms, models, datasets
from tqdm import tqdm

%matplotlib inline

### Check GPU Environment

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device} device')

In [None]:
! nvidia-smi -L

### Set the Seed to Reproduce the Result

In [None]:
def set_all_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
set_all_seed(123)

### Create Dataset and Dataloader

In [None]:
batch_size = 256

train_transform = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(root='data', train=True, download=True, transform=train_transform)
valid_dataset = datasets.CIFAR10(root='data', train=False, download=True, transform=test_transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

sixteenth_train_sampler = RandomSampler(train_dataset, num_samples=len(train_dataset)//16)
half_train_sampler = RandomSampler(train_dataset, num_samples=len(train_dataset)//2)

sixteenth_train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sixteenth_train_sampler)
half_train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=half_train_sampler)

### Load Models

In [None]:
# HINT: Remember to change the model to 'resnet50' and the weights to weights="IMAGENET1K_V1" when needed.
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', weights=None)

# Background: The original resnet18 is designed for ImageNet dataset to predict 1000 classes.
# TODO: Change the output of the model to 10 class.
model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
model = model.to(device)


### Training and Testing Models

In [None]:
# TODO: Fill in the code cell according to the pytorch tutorial we gave.
def train(dataloader, model, loss_fn, optimizer):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.train()

    for X, y in tqdm(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagatopn
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        pred = pred.argmax(dim=1, keepdim=True)
        correct += pred.eq(y.view_as(pred)).sum().item()
    
    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size
    return avg_epoch_loss, avg_acc

In [None]:
def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.eval()

    with torch.no_grad():
        for X, y in tqdm(dataloader):
            X, y = X.to(device), y.to(device)

            pred = model(X)

            epoch_loss += loss_fn(pred, y).item()
            pred = pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(y.view_as(pred)).sum().item()
    
    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size

    return avg_epoch_loss, avg_acc

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 2
train_acc_list = []
train_loss_list = []
test_acc_list = []
test_loss_list = []

big_model_acc = np.zeros(3)
small_model_acc = np.zeros(3)

x = np.arange(epochs)
for m in ['ResNet18', 'ResNet50']:
    # Choose model
    if m == 'ResNet18':
        model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', weights=None)
        model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
        model = model.to(device)
    elif m == 'ResNet50':
        model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', weights=None)
        model.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
        model = model.to(device)
    
    # Choose data loader
    for datasize in ['all', 'half', '1/16']:
        if datasize == 'all':
            dataloader = train_dataloader
        elif datasize == 'half':
            dataloader = half_train_dataloader
        elif datasize == '1/16':
            dataloader = sixteenth_train_dataloader

        train_acc_list = []
        train_loss_list = []
        test_acc_list = []
        test_loss_list = []

        for epoch in range(epochs):
            train_loss, train_acc = train(train_dataloader, model, loss_fn, optimizer)
            test_loss, test_acc = test(valid_dataloader, model, loss_fn)
            print(f"Epoch {epoch + 1:2d}: Loss = {train_loss:.4f} Acc = {train_acc:.2f} Test_Loss = {test_loss:.4f} Test_Acc = {test_acc:.2f}")
            train_acc_list.append(train_acc)
            train_loss_list.append(train_loss)
            test_acc_list.append(test_acc)
            test_loss_list.append(test_loss)
        
        if m == 'ResNet18':
            if datasize == 'all':
                small_model_acc[2] = test_acc
            elif datasize == 'half':
                small_model_acc[1] = test_acc
            elif datasize == '1/16':
                small_model_acc[0] = test_acc
            
        elif m == 'ResNet50':
            if datasize == 'all':
                big_model_acc[2] = test_acc
            elif datasize == 'half':
                big_model_acc[1] = test_acc
            elif datasize == '1/16':
                big_model_acc[0] = test_acc
            

        plt.plot(x, train_acc_list, color='red', label='train acc')
        plt.plot(x, test_acc_list, '--', color='blue', label='test acc')
        plt.legend(loc='upper left')
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
        plt.title(f'Accuracy (Train {m} on {datasize} data)')
        plt.savefig(f'fig/Accuracy (Train {m} on {datasize} data).png')
        plt.show()

        plt.plot(x, train_loss_list, color='red', label='train loss')
        plt.plot(x, test_loss_list, '--', color='blue', label='test loss')
        plt.legend(loc='upper left')
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title(f'Loss (Train {m} on {datasize} data)')
        plt.savefig(f'fig/Loss (Train {m} on {datasize} data).png')
        plt.show()

plt.plot([1/16, 0.5, 1], big_model_acc, color='red', label='Big model')
plt.plot([1/16, 0.5, 1], big_model_acc, color='green', label='Small model')
plt.legend(loc='lower right')
plt.xlabel('Dataset size')
plt.ylabel('Accuracy')
plt.title('Dataset size vs accuracy')
plt.savefig('result.png')
plt.show()



## Codes for Problem 3

In [None]:
# TODO: Try to achieve the best performance given all training data using whatever model and training strategy.

## Problems

1. (30%) Finish the rest of the codes for Problem 1 and Problem 2 according to the hint. (2 code cells in total.)
2. Train small model (resnet18) and big model (resnet50) from scratch on `sixteenth_train_dataloader`, `half_train_dataloader`, and `train_dataloader` respectively.
3. (30%) Achieve the best performance given all training data using whatever model and training strategy.  
  (You cannot use the model that was pretrained on CIFAR10)



## Discussion


- (30%) The relationship between the accuracy, model size, and the training dataset size.  
    (Total 6 models. Small model trains on the sixteenth, half, and all data. Big model trains on the sixteenth, half, and all data.)
- (10%) What if we train the ResNet with ImageNet initialized weights (`weights="IMAGENET1K_V1"`), how would the relationship change?

## Credits

1. [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html)