# ResNet for CIFAR-10 
**based on Section 4.2 in "Deep Residual Learning for Image Recognition" from Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
# use GPU if available
USE_GPU = True
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else: 
    device = torch.device('cpu')
print('using device:', device)

using device: cpu


In [3]:
# Check basic functionality of architecture
from ResNet import ResNet, ResidualBlocks, ConvBlock

def test_ResNet():
    n = 1
    x = torch.zeros((64, 3, 32, 32), dtype=dtype) # minibatch size 64
    model = ResNet(n)
    print(model)
    scores = model(x)
    print(scores.size())
    
test_ResNet()

ResNet(
  (initial_layer): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layerBlock1): ResidualBlocks(
    (blocks): Sequential(
      (0): ConvBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (layerBlock2): ResidualBlocks(
    (blocks): Sequential(
      (0): ConvBlock(
        (max): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
        (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2):

In [4]:
# import CIFAR data

NUM_TRAIN = 45000
batch_size = 128

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to process
# the data by subtracting the means RGB value and dividing by the 
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.

cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True, transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True, transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=batch_size)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [5]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else: 
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval() # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype) # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (y == preds).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        
        
def check_accuracy_batch(scores, labels, verbose=False):
    _, preds = scores.max(1)
    num_correct = (labels == preds).sum()
    num_samples = preds.size(0)
    acc = float(num_correct) / num_samples
    if verbose:
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    return acc

In [6]:
# training and validation function for one epoch

def train_ResNet(model, optimizer, dataloader):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: (torch.nn.Module) A PyTorch module giving the model to train.
    - optimizer: (torch.optim) An optimizer object we will use to train the model
    - dataloader: (DataLoader) A DataLoader object supplying the training data
    
    Returns: Nothing, but prints model accuracies during training.
    """
    # set model to training mode
    model.train()
    
    # loss history 
    loss_history = []
    train_accuracy_history = []
    
    for it, (train_batch, labels_batch) in enumerate(dataloader):
        verbose = False
        if it % 10 == 0:
            verbose = True
        
        # compute scores and loss
        scores = model(train_batch)
        loss = F.cross_entropy(scores, labels_batch)
        
        accuracy = check_accuracy_batch(scores, labels_batch, verbose=verbose)
        loss_history.append(loss)
        train_accuracy_history.append(accuracy)
    
        # clear previous gradients
        optimizer.zero_grad()
        
        # build gradient computational graph
        loss.backward()
        
        # actual backprop und updating params
        optimizer.step()
        
    return loss_history, train_accuracy_history
    
    
    
    
def evaluate_ResNet(model, dataloader):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: (torch.nn.Module) A PyTorch module giving the model to train.
    - optimizer: (torch.optim) An optimizer object we will use to train the model
    - dataloader: (DataLoader) A DataLoader object supplying the training data
    
    Returns: Nothing, but prints model accuracies during training.
    """
    # set model to test mode
    model.eval()
    
    # loss history 
    loss_history = []
    test_accuracy_history = []
    
    for it, (test_batch, labels_batch) in enumerate(dataloader):
  
        # compute scores and loss
        scores = model(test_batch)
        loss = F.cross_entropy(scores, labels_batch)
        
        accuracy = check_accuracy_batch(scores, labels_batch)
        loss_history.append(loss)
        test_accuracy_history.append(accuracy)
    
    
    return loss_history, test_accuracy_history

    #if dataloader.train == True:
    #    plt.ylabel('Validation accuracy')
    #else:
    #    plt.ylabel('Test accuracy')
    
    
    
    
    """model = model.to(device=device) # move model parameters to CPU/GPU
    for t, (x, y) in enumerate(loader_train):
        model.train()
        x = x.to(device=device, dtype= dtype)
        y = y.to(device=device, dtype=torch.long)
        scores = model(x)
        loss = F.cross_entropy(scores, y)
            
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        if t % 100 == 0:
            print('Iteration %d, loss = %4f' % (t, loss.item()))
            check_accuracy(loader_val, model)
            print()
        if t == 32000 or t == 48000:
            lr /= 10
        if t == 64000:
            return
                
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else: 
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval() # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype) # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (y == preds).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        """

In [None]:
# Check implementation
n = 3
lr = 0.1
model = ResNet(n)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.95)
overall_train_loss, overall_val_loss, overall_train_acc, overall_val_acc = [], [], [], []

for epoch in range(3):
    print('Epoch ', epoch)
    scheduler.step()
    train_loss, train_acc = train_ResNet(model, optimizer, loader_train)
    val_loss, val_acc = evaluate_ResNet(model, loader_val)
    overall_train_loss += train_loss
    overall_train_acc += train_acc
    overall_val_loss += val_loss
    overall_val_acc += val_acc


Epoch  0




Got 13 / 128 correct (10.16)
Got 27 / 128 correct (21.09)
Got 29 / 128 correct (22.66)
Got 24 / 128 correct (18.75)
Got 26 / 128 correct (20.31)
Got 38 / 128 correct (29.69)
Got 42 / 128 correct (32.81)
Got 40 / 128 correct (31.25)
Got 40 / 128 correct (31.25)
Got 50 / 128 correct (39.06)
Got 40 / 128 correct (31.25)
Got 42 / 128 correct (32.81)
Got 40 / 128 correct (31.25)
Got 56 / 128 correct (43.75)
Got 44 / 128 correct (34.38)
Got 55 / 128 correct (42.97)
Got 50 / 128 correct (39.06)
Got 52 / 128 correct (40.62)
Got 57 / 128 correct (44.53)
Got 43 / 128 correct (33.59)
Got 45 / 128 correct (35.16)
Got 52 / 128 correct (40.62)
Got 77 / 128 correct (60.16)
Got 59 / 128 correct (46.09)
Got 63 / 128 correct (49.22)
Got 55 / 128 correct (42.97)
Got 69 / 128 correct (53.91)
Got 61 / 128 correct (47.66)
Got 56 / 128 correct (43.75)
Got 69 / 128 correct (53.91)
Got 61 / 128 correct (47.66)
Got 61 / 128 correct (47.66)
Got 60 / 128 correct (46.88)
Got 68 / 128 correct (53.12)
Got 68 / 128 c

In [1]:
# plot loss and accuracy curves
def plot_loss_acc(train_loss, train_acc, val_loss, val_acc):   
    print('Loss')
    plt.subplot(2, 1, 1)
    plt.plot(train_loss)
    plt.plot(val_loss)
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.legend(['Training', 'Validation'])

    plt.subplot(2, 1, 2)
    plt.plot(train_acc)
    plt.plot(val_acc)
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.legend(['Training', 'Validation'])
    
plot_loss_acc(overall_train_loss, overall_train_acc, overall_val_loss, overall_val_acc)
print(overall_train_loss[0])
print(overall_val_loss[0])

NameError: name 'overall_train_loss' is not defined

[[tensor(4.0431, grad_fn=<NllLossBackward>),
  tensor(3.2244, grad_fn=<NllLossBackward>),
  tensor(3.3097, grad_fn=<NllLossBackward>),
  tensor(3.1141, grad_fn=<NllLossBackward>),
  tensor(3.4346, grad_fn=<NllLossBackward>),
  tensor(2.6042, grad_fn=<NllLossBackward>),
  tensor(2.4679, grad_fn=<NllLossBackward>),
  tensor(2.6407, grad_fn=<NllLossBackward>),
  tensor(2.2931, grad_fn=<NllLossBackward>),
  tensor(2.7352, grad_fn=<NllLossBackward>),
  tensor(2.3425, grad_fn=<NllLossBackward>),
  tensor(2.3787, grad_fn=<NllLossBackward>),
  tensor(2.0823, grad_fn=<NllLossBackward>),
  tensor(2.7473, grad_fn=<NllLossBackward>),
  tensor(2.4665, grad_fn=<NllLossBackward>),
  tensor(2.5150, grad_fn=<NllLossBackward>),
  tensor(2.1866, grad_fn=<NllLossBackward>),
  tensor(2.1648, grad_fn=<NllLossBackward>),
  tensor(2.0765, grad_fn=<NllLossBackward>),
  tensor(2.1105, grad_fn=<NllLossBackward>),
  tensor(2.1374, grad_fn=<NllLossBackward>),
  tensor(2.0693, grad_fn=<NllLossBackward>),
  tensor(1