# Benchmarking cifar-10 and cifar-100

Cifar-10 is a dataset that contains images that belong to 10 classes, this is a well studied dataset along with Cifar-100, which has images belonging to 100 classes.

**Objective**

Benchmark the performance of pre-trained models on the CIFAR-10 dataset, which includes :

1. AlexNet
2. Vgg
3. ResNet
4. SqueezeNet
5. DenseNet and
6. Inception v3


*Note*: The benchmarks might differ if you use different version of PyTorch and torchvision. Also there is a stochasticity introduced during the selection of validation set.

## Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision

import torchvision.models as models
from torchvision.utils import make_grid
from torchvision import models
from torchvision import datasets
from torchvision import transforms

import numpy as np
import matplotlib.pyplot as plt
import time
import copy

import logging

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  0.4.1
Torchvision Version:  0.2.1


In [2]:
logging.basicConfig(filename='benchmark.log', filemode='w', 
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.DEBUG)
logging.info("Start of benchmark run")

The torchvision library offers a lot of state of the art neural network models which come pretrained. Lets take a look at them. For the purpose of benchmarking we will limit ourselves to few of them.

In [3]:
available_model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))
print([model for model in available_model_names])

['alexnet', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'inception_v3', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn']


In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    print('Machine supports cuda')
else:
    print('Machine does not have cuda, falling back on cpu')

Machine supports cuda


In [5]:
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
#model_names = ['resnet', 'alexnet', 'vgg', 'squeezenet', 'densenet', 'inception']
model_names = ['squeezenet'] 

# Number of classes in the dataset
num_classes = 10

# Number of epochs to train for 
num_epochs = 350

# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = True

In [6]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [7]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet121
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

# Print the model we just instantiated
#print(model_ft)

## Load the data

Let's load in the data. Pre-trained models also have to be normalized. We will be creating a training set and a validation set, by default when val_split = 0 we will use the test set, otherwise the training set will be split in the ratio. 

Minimal data transforms are being done, to ensure all models perform well. 

In [8]:
def load_data_pre_process(input_size):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])

    data_transforms = {
        'train' : transforms.Compose(
        [transforms.RandomResizedCrop(input_size),
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
         normalize
        ]),
        'val' : transforms.Compose(
        [transforms.Resize(input_size),
         transforms.CenterCrop(input_size),
         transforms.ToTensor(),
         normalize
        ])
    }
    img_datasets = {'train': datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms['train']),
                    'val' : datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms['val'])
                    }
    
    dataloaders_dict = {'train': torch.utils.data.DataLoader(img_datasets['train'], batch_size=128, 
                                                       shuffle=True, num_workers=4),
                        'val': torch.utils.data.DataLoader(img_datasets['val'], batch_size=100, 
                                                       shuffle=False, num_workers=4)                
                       }
    return dataloaders_dict

## Create the optimizer

In [9]:
# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.

def create_optimizer(model_ft):
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name,param in model_ft.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t",name)
    else:
        for name,param in model_ft.named_parameters():
            if param.requires_grad == True:
                print("\t",name)
    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(params_to_update, lr=0.1, momentum=0.9, weight_decay=5e-4)
    return optimizer_ft

## Training the model

In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []
    perf_hist = {'train_acc' : [], 'train_loss' : [], 'val_acc': [], 'val_loss': []}

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    if device == 'cuda':
        model = torch.nn.DataParallel(model)
        cudnn.benchmark = True

    for epoch in range(num_epochs):
        logging.debug('Epoch {}/{}'.format(epoch, num_epochs - 1))
        logging.debug('-' * 10)
        optimizer = exp_lr_scheduler(optimizer, epoch+1)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            logging.debug('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            if phase == 'train':
                perf_hist['train_acc'].append(epoch_acc)
                perf_hist['train_loss'].append(epoch_loss)
            else:
                perf_hist['val_acc'].append(epoch_acc)
                perf_hist['val_loss'].append(epoch_loss)

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    perf_hist['time'] = time_elapsed
    return model, perf_hist

## Run the benchmark

Now let's run the benchmark tests and see how well each model performs on the Cifar-10 dataset

In [None]:
def benchmark_models(model_names):
    """ Benchmark the different models
    
        Parameters
        ----------
        model_names : List of String
            Names of models that are considered for benchmarking
            
        Returns
        -------
        model_hist : List of Dict, that contains train_acc, train_loss, val_acc, and val_loss
    """
    model_hist = []
    for model in model_names:
        # Initialize the model for this run
        model_ft, input_size = initialize_model(model, num_classes, feature_extract)
        # Send the model to GPU
        model_ft = model_ft.to(device)
        params_to_update = model_ft.parameters()
        
        optimizer_ft = create_optimizer(model_ft)
        # Setup the loss fxn
        criterion = nn.CrossEntropyLoss()
        dataloaders_dict = load_data_pre_process(input_size)
        # Train and evaluate
        print(f'Starting training of {model} model')
        logging.info(f'Starting training of {model} model')
        model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, 
                                     is_inception=(model=="inception"))
        hist['model'] = model
        model_hist.append(hist)
    return model_hist

In [None]:
def exp_lr_scheduler(optimizer, epoch, lr_decay=0.1, lr_decay_epoch=125):
    """Decay learning rate by a factor of lr_decay every lr_decay_epoch epochs"""
    if epoch % lr_decay_epoch:
        return optimizer
    
    for param_group in optimizer.param_groups:
        param_group['lr'] *= lr_decay
    logging.debug(f'Decayed Learning rate by factor of {lr_decay}')
    return optimizer

In [None]:
model_hist = benchmark_models(model_names)

  init.kaiming_uniform(m.weight.data)
  init.normal(m.weight.data, mean=0.0, std=0.01)


Params to learn:
	 classifier.1.weight
	 classifier.1.bias
Files already downloaded and verified
Files already downloaded and verified
Starting training of squeezenet model







































































































































































































# Visualization

Now let's see how each model performed and how the compare against each other

In [None]:
import matplotlib.pyplot as plt

In [None]:
def plot_graph(model, attr, plt_title):
    """ Plot graphs of model history
        
        Parameters
        ----------
        model : dict
        attr : String
            Name of the attribute that is to be plotted
        plt_title : String
        
        Returns
        -------
        None
        
    """
    for model in model_hist:
        plt.plot(model[attr], label=model['model'])
    plt.legend();
    plt.title(label=plt_title)

In [None]:
plot_graph(model, 'train_acc', 'Train Accuracy vs Epoch')

In [None]:
plot_graph(model, 'train_loss', 'Train loss vs Epoch')

In [None]:
plot_graph(model, 'val_acc', 'Validation Accuracy vs Epoch')

In [None]:
plot_graph(model, 'val_loss', 'Validation Loss vs Epoch')

## Final accuracies of the models

In [None]:
for model in model_hist:
    print(f"{model['model']} accuracy : {model['val_acc'][-1]}")

Finally let's take a look at how much time it took to train these models.

In [None]:
print([x['time'] for x in model_hist])