In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.__version__)
print(device)


2.4.1
cuda


In [3]:
from ph1 import *
from helper import *

In [4]:
import cv2
print(cv2.__version__)


4.5.5


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from eecs598.utils import reset_seed
from collections import OrderedDict

# for plotting
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%matplotlib inline

In [6]:
to_float= torch.float
to_long = torch.long
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [7]:
loader_train, loader_val, loader_test = load_CIFAR(path='./datasets/')

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [8]:
def adjust_learning_rate(optimizer, lrd, epoch, schedule):
  """
  Multiply lrd to the learning rate if epoch is in schedule
  
  Inputs:
  - optimizer: An Optimizer object we will use to train the model
  - lrd: learning rate decay; a factor multiplied at scheduled epochs
  - epochs: the current epoch number
  - schedule: the list of epochs that requires learning rate update
  
  Returns: Nothing, but learning rate might be updated
  """
  if epoch in schedule:
    for param_group in optimizer.param_groups:
      print('lr decay from {} to {}'.format(param_group['lr'], param_group['lr'] * lrd))
      param_group['lr'] *= lrd


def check_accuracy_part34(loader, model):
  if loader.dataset.train:
    print('Checking accuracy on validation set')
  else:
    print('Checking accuracy on test set')   
  num_correct = 0
  num_samples = 0
  model.eval()  # set model to evaluation mode
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device=device, dtype=to_float)  # move to device, e.g. GPU
      y = y.to(device=device, dtype=to_long)
      scores = model(x)
      _, preds = scores.max(1)
      num_correct += (preds == y).sum()
      num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
  return acc


def train_part345(model, optimizer, epochs=1, learning_rate_decay=.1, schedule=[], verbose=True):
  """
  Train a model on CIFAR-10 using the PyTorch Module API.
  
  Inputs:
  - model: A PyTorch Module giving the model to train.
  - optimizer: An Optimizer object we will use to train the model
  - epochs: (Optional) A Python integer giving the number of epochs to train for
  
  Returns: Nothing, but prints model accuracies during training.
  """
  model = model.to(device=device)  # move the model parameters to CPU/GPU
  num_iters = epochs * len(loader_train)
  print_every = 100
  if verbose:
    num_prints = num_iters // print_every + 1
  else:
    num_prints = epochs
  acc_history = torch.zeros(num_prints, dtype=to_float)
  iter_history = torch.zeros(num_prints, dtype=to_long)
  for e in range(epochs):
    
    adjust_learning_rate(optimizer, learning_rate_decay, e, schedule)
    
    for t, (x, y) in enumerate(loader_train):
      model.train()  # put model to training mode
      x = x.to(device=device, dtype=to_float)  # move to device, e.g. GPU
      y = y.to(device=device, dtype=to_long)

      scores = model(x)
      loss = F.cross_entropy(scores, y)

      # Zero out all of the gradients for the variables which the optimizer
      # will update.
      optimizer.zero_grad()

      # This is the backwards pass: compute the gradient of the loss with
      # respect to each  parameter of the model.
      loss.backward()

      # Actually update the parameters of the model using the gradients
      # computed by the backwards pass.
      optimizer.step()

      tt = t + e * len(loader_train)

      if verbose and (tt % print_every == 0 or (e == epochs-1 and t == len(loader_train)-1)):
        print('Epoch %d, Iteration %d, loss = %.4f' % (e, tt, loss.item()))
        acc = check_accuracy_part34(loader_val, model)
        acc_history[tt // print_every] = acc
        iter_history[tt // print_every] = tt
        print()
      elif not verbose and (t == len(loader_train)-1):
        print('Epoch %d, Iteration %d, loss = %.4f' % (e, tt, loss.item()))
        acc = check_accuracy_part34(loader_val, model)
        acc_history[e] = acc
        iter_history[e] = tt
        print()
  return acc_history, iter_history

In [None]:
# example of specifications
networks = {
  'resnet20': {# acc=82.9%
    'block': ResidualBlock,
    'stage_args': [
      (16, 16, 3, False),  # (in_channels, out_channels, num_blocks, downsample)
      (16, 32, 3, True),
      (32, 64, 3, True),
    ]
  },
  'plain32': { # acc=72%
    'block': PlainBlock,
    'stage_args': [
      (8, 8, 5, False),
      (8, 16, 5, True),
      (16, 32, 5, True),
    ]
  },
  'resnet32': { # acc=84.4%
    'block': ResidualBlock,
    'stage_args': [
      (16, 16, 5, False),  # Use 5 residual blocks in this stage
      (16, 32, 5, True),
      (32, 64, 5, True),
    ]
  },
  'resnet44': { # acc=86%
    'block': ResidualBlock,
    'stage_args': [
      (16, 16, 7, False),  # Use 7 residual blocks in this stage
      (16, 32, 7, True),
      (32, 64, 7, True),
    ]
  },
  'resnet56': { # acc=85.2%
    'block': ResidualBlock,
    'stage_args': [
        (16, 16, 9, False),  # Use 9 residual blocks in this stage
        (16, 32, 9, True),
        (32, 64, 9, True),
    ]
  },
  'resnet110': { # acc=85.7%
    'block': ResidualBlock,
    'stage_args': [
        (16, 16, 18, False),  # Use 18 residual blocks in this stage
        (16, 32, 18, True),
        (32, 64, 18, True),
    ]
  },
  'resnet110-b': { # acc=78.5%
    'block': ResidualBottleneckBlock,
    'stage_args': [
        (16, 16, 18, False),  # Use 18 residual blocks in this stage
        (16, 32, 18, True),
        (32, 64, 18, True),
    ]
  },
  'wideresnet': { # acc=86.2%
    'block': ResidualBlock,
    'stage_args': [
        (16, 32, 3, False),  # Double the channels
        (32, 64, 3, True),
        (64, 128, 3, True),
    ]
  },
  'resnet47': { # acc=81.8%
    'block': ResidualBottleneckBlock,
    'stage_args': [
      (32, 32, 5, False),
      (32, 64, 5, True),
      (64, 128, 5, True),
    ],
  },
  'resnet9': { # acc=85.10%
        'block': ResidualBlock,
        'stage_args': [
            (3, 64, 1, False),   
            (64, 128, 2, True),  
            (128, 128, 1, False),
            (128, 256, 2, True), 
            (256, 256, 1, False) 
        ],
    },
    'resnet9k': { # acc=86.8%
        'block': ResidualBlock, 
        'stage_args': [
            (64, 64, 1, False),  
            (64, 128, 1, True),  
            (128, 256, 2, True), 
            (256, 512, 2, True), 
        ]
    }
}

def get_resnet(name):
  # YOUR_TURN: Impelement ResNet.__init__ and ResNet.forward
  return ResNet(**networks[name])

In [10]:
names = ['resnet20', 'plain32', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet110-b', 'wideresnet', 'resnet47', 'resnet9', 'resnet9k']
acc_history_dict = {}
iter_history_dict = {}
for name in names:
  reset_seed(0)
  print(name, '\n')
  model = get_resnet(name)
  
  optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=.9, weight_decay=1e-4)

  acc_history, iter_history = train_part345(model, optimizer, epochs=10, schedule=[6, 8], verbose=False)
  acc_history_dict[name] = acc_history
  iter_history_dict[name] = iter_history

resnet20 

Epoch 0, Iteration 765, loss = 1.3618
Checking accuracy on validation set
Got 403 / 1000 correct (40.30)

Epoch 1, Iteration 1531, loss = 0.8849
Checking accuracy on validation set
Got 613 / 1000 correct (61.30)

Epoch 2, Iteration 2297, loss = 0.5947
Checking accuracy on validation set
Got 680 / 1000 correct (68.00)

Epoch 3, Iteration 3063, loss = 0.4528
Checking accuracy on validation set
Got 733 / 1000 correct (73.30)

Epoch 4, Iteration 3829, loss = 0.5120
Checking accuracy on validation set
Got 767 / 1000 correct (76.70)

Epoch 5, Iteration 4595, loss = 0.3333
Checking accuracy on validation set
Got 787 / 1000 correct (78.70)

lr decay from 0.01 to 0.001
Epoch 6, Iteration 5361, loss = 0.2157
Checking accuracy on validation set
Got 824 / 1000 correct (82.40)

Epoch 7, Iteration 6127, loss = 0.1801
Checking accuracy on validation set
Got 828 / 1000 correct (82.80)

lr decay from 0.001 to 0.0001
Epoch 8, Iteration 6893, loss = 0.3295
Checking accuracy on validation set
G