# Computer Vision Homework 3: Big vs Small Models

## Brief

Due date: Nov 16, 2022

Required files: `homework-3.ipynb`, `report.pdf`

To download the jupyter notebook from colab, you can refer to the colab tutorial we gave.


## Codes for Problem 1 and Problem 2

### Import Packages

In [None]:
import glob
import os
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torch.utils.data import DataLoader, Dataset, RandomSampler
from torchvision import transforms, models, datasets
from tqdm import tqdm

%matplotlib inline

### Check GPU Environment

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device} device')

In [None]:
! nvidia-smi -L

### Set the Seed to Reproduce the Result

In [None]:
def set_all_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
set_all_seed(123)

### Create Dataset and Dataloader

In [None]:
batch_size = 256

train_transform = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(root='data', train=True, download=True, transform=train_transform)
valid_dataset = datasets.CIFAR10(root='data', train=False, download=True, transform=test_transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

sixteenth_train_sampler = RandomSampler(train_dataset, num_samples=len(train_dataset)//16)
half_train_sampler = RandomSampler(train_dataset, num_samples=len(train_dataset)//2)

sixteenth_train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sixteenth_train_sampler)
half_train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=half_train_sampler)

### Load Models

In [None]:
# HINT: Remember to change the model to 'resnet50' and the weights to weights="IMAGENET1K_V1" when needed.
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', weights=None)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', weights=None)
# model = torch.hub.load('pytorch/vision:v0.10.0',
#                        'resnet18', weights="IMAGENET1K_V1")
model = torch.hub.load('pytorch/vision:v0.10.0',
                       'resnet50', weights="IMAGENET1K_V1")

# Background: The original resnet18 is designed for ImageNet dataset to predict 1000 classes.
# TODO: Change the output of the model to 10 class.
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

### Training and Testing Models

In [None]:
# TODO: Fill in the code cell according to the pytorch tutorial we gave.
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.train()

    for X, y in tqdm(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        pred = pred.argmax(dim=1, keepdim=True)
        correct += pred.eq(y.view_as(pred)).sum().item()

    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size

    return avg_epoch_loss, avg_acc


In [None]:
def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.eval()

    with torch.no_grad():
        for X, y in tqdm(dataloader):
            X, y = X.to(device), y.to(device)

            pred = model(X)

            epoch_loss += loss_fn(pred, y).item()
            pred = pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(y.view_as(pred)).sum().item()

    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size

    return avg_epoch_loss, avg_acc


In [None]:
epochs = 50
train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []
best_acc = 0

for epoch in range(epochs):
    train_loss, train_acc = train(train_dataloader, model, loss_fn, optimizer)
    # train_loss, train_acc = train(
    #     sixteenth_train_dataloader, model, loss_fn, optimizer)
    # train_loss, train_acc = train(
    #     half_train_dataloader, model, loss_fn, optimizer)

    test_loss, test_acc = test(valid_dataloader, model, loss_fn)

    if test_acc > best_acc:
        best_acc = test_acc
        # torch.save(model.state_dict(), 'resnet18_all_IMAGENET.pth')
        # torch.save(model.state_dict(), 'resnet18_sixteenth_IMAGENET.pth')
        # torch.save(model.state_dict(), 'resnet18_half_IMAGENET.pth')
        torch.save(model.state_dict(), 'resnet50_all_IMAGENET.pth')         
        # torch.save(model.state_dict(), 'resnet50_sixteenth_IMAGENET.pth')         
        # torch.save(model.state_dict(), 'resnet50_half_IMAGENET.pth')

    train_loss_list.append(train_loss)
    train_acc_list.append(train_acc)
    test_loss_list.append(test_loss)
    test_acc_list.append(test_acc)
    
    print(f"Epoch {epoch + 1:2d}: Loss = {train_loss:.4f} Acc = {train_acc:.2f} Test_Loss = {test_loss:.4f} Test_Acc = {test_acc:.2f}")
print("Done!")
print(f"Best Accuracy: {best_acc:.2f}")

In [None]:
plt.subplot(1, 2, 1)
plt.plot(train_loss_list, label='train_loss') 
plt.plot(test_loss_list, label='test_loss') 
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_acc_list, label='train_acc')
plt.plot(test_acc_list, label='test_acc')
plt.legend() 

plt.show()

### Plot the relationship between Dataset Size and Accuracy

In [None]:
resnet50_test_acc = []
resnet18_test_acc = []

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)


In [None]:
model.load_state_dict(torch.load('resnet50_sixteenth_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet50_test_acc.append(test_acc)


In [None]:
model.load_state_dict(torch.load('resnet50_half_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet50_test_acc.append(test_acc)


In [None]:
model.load_state_dict(torch.load('resnet50_all_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet50_test_acc.append(test_acc)


In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

In [None]:
model.load_state_dict(torch.load('resnet18_sixteenth_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet18_test_acc.append(test_acc)


In [None]:
model.load_state_dict(torch.load('resnet18_half_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet18_test_acc.append(test_acc)


In [None]:
model.load_state_dict(torch.load('resnet18_all_IMAGENET.pth'))
model.eval()
test_loss, test_acc = test(valid_dataloader, model, loss_fn)
print(f"Test Loss = {test_loss:.4f} Test Acc = {test_acc:.2f}")
resnet18_test_acc.append(test_acc)


In [None]:
Dataset_size = ['Sixteenth', 'Half', 'All']
resnet50 = resnet50_test_acc
resnet18 = resnet18_test_acc
# Dataset_size is axis x and Test_Accuracy is axis y
plt.plot(Dataset_size, resnet50, label='resnet50_IMAGENET')
plt.plot(Dataset_size, resnet18, label='resnet18_IMAGENET')
plt.legend()
plt.xlabel('Dataset Size')
plt.ylabel('Accuracy')
plt.show()

## Codes for Problem 3

In [None]:
# TODO: Try to achieve the best performance given all training data using whatever model and training strategy.
import torchvision.models
model = models.convnext_base(weights=models.ConvNeXt_Base_Weights.IMAGENET1K_V1)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 10)
model = model.to(device)

In [None]:
train_transform = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
])


In [None]:
batch_size = 256

train_dataset = datasets.CIFAR10(root='data', train=True, download=True, transform=train_transform)
valid_dataset = datasets.CIFAR10(root='data', train=False, download=True, transform=test_transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

In [None]:
# TODO: Fill in the code cell according to the pytorch tutorial we gave.
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)

In [None]:
epochs = 100
train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []
best_acc = 0

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.train()

    for X, y in tqdm(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        pred = pred.argmax(dim=1, keepdim=True)
        correct += pred.eq(y.view_as(pred)).sum().item()

    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size

    return avg_epoch_loss, avg_acc


In [None]:
def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    epoch_loss = 0
    correct = 0

    model.eval()

    with torch.no_grad():
        for X, y in tqdm(dataloader):
            X, y = X.to(device), y.to(device)

            pred = model(X)

            epoch_loss += loss_fn(pred, y).item()
            pred = pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(y.view_as(pred)).sum().item()

    avg_epoch_loss = epoch_loss / num_batches
    avg_acc = correct / size

    return avg_epoch_loss, avg_acc


In [None]:
for epoch in range(epochs):
    train_loss, train_acc = train(train_dataloader, model, loss_fn, optimizer)
    test_loss, test_acc = test(valid_dataloader, model, loss_fn)
    scheduler.step()

    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), 'convnext_base.pth')

    train_loss_list.append(train_loss)
    train_acc_list.append(train_acc)
    test_loss_list.append(test_loss)
    test_acc_list.append(test_acc)
    
    print(f"Epoch {epoch + 1:2d}: Loss = {train_loss:.4f} Acc = {train_acc:.2f} Test_Loss = {test_loss:.4f} Test_Acc = {test_acc:.2f}")
print("Done!")
print(f"Best Accuracy: {best_acc:.2f}")

In [None]:
plt.subplot(1, 2, 1)
plt.plot(train_loss_list, label='train_loss') 
plt.plot(test_loss_list, label='test_loss') 
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_acc_list, label='train_acc')
plt.plot(test_acc_list, label='test_acc')
plt.legend() 

plt.show()

## Problems

1. (30%) Finish the rest of the codes for Problem 1 and Problem 2 according to the hint. (2 code cells in total.)
2. Train small model (resnet18) and big model (resnet50) from scratch on `sixteenth_train_dataloader`, `half_train_dataloader`, and `train_dataloader` respectively.
3. (30%) Achieve the best performance given all training data using whatever model and training strategy.  
  (You cannot use the model that was pretrained on CIFAR10)



## Discussion


- (30%) The relationship between the accuracy, model size, and the training dataset size.  
    (Total 6 models. Small model trains on the sixteenth, half, and all data. Big model trains on the sixteenth, half, and all data.)
- (10%) What if we train the ResNet with ImageNet initialized weights (`weights="IMAGENET1K_V1"`), how would the relationship change?

## Credits

1. [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html)