In [None]:
!pip install torchsummary
!pip install efficientnet_pytorch

Computer Vision - Multiclass Classification

Dataset: Dog Breed Identification, from: https://www.kaggle.com/c/dog-breed-identification/overview

Method: Comparing Models
- ResNet50
- EfficientNet
- VGG16
- InceptionV3

Metric: Cross Entropy Loss, Accuracy

Note: Runtime: ~12 to 15 minutes for all models to train 1 epoch (use kaggle cuda gpu)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid
from torch.optim import lr_scheduler
from torchsummary import summary
from efficientnet_pytorch import EfficientNet

import cv2
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image
from PIL import ImageFile
from IPython.display import display

ImageFile.LOAD_TRUNCATED_IMAGES = True

import glob
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(42)

Read the dataset

The dataset provided has a training set and a test set of images of dogs but we only use the training set because the test set has hidden labels. Each image has a filename that is its unique id. The dataset comprises 120 breeds of dogs. The goal is to create a classifier capable of determining a dog's breed from a photo.

In [None]:
PATH = '../input/dog-breed-identification/'
labels = pd.read_csv(PATH + 'labels.csv')
labelnames = pd.read_csv(PATH + 'sample_submission.csv').keys()[1:]
print("Train folder has ", len(os.listdir(PATH+'train')),'images which matches with label\'s', len(labels),'images')

Basic Visualization of Dog Breed distribution

In [None]:
img_file = PATH+'train'

df=labels.assign(img_path=lambda x: img_file + x['id'] +'.jpg')

ax=pd.value_counts(df['breed'],ascending=True).plot(kind='barh',fontsize="40",title="Class Distribution",figsize=(50,100))
ax.set(xlabel="Images per class", ylabel="Classes")
ax.xaxis.label.set_size(40)
ax.yaxis.label.set_size(40)
ax.title.set_size(60)
plt.show()

Create a mapping between breed to code and code to breed to easily connect the data in two directions

One hot encodding applied to the breed type

Split the dataset into

Train: 7666 datas (75%)

Validation: 1534 datas (15%)

Test: 1022 datas (10%)

In [None]:
codes = range(len(labelnames))
breed_to_code = dict(zip(labelnames, codes))
code_to_breed = dict(zip(codes, labelnames))
labels['target'] =  [breed_to_code[x] for x in labels.breed]
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)

train = labels_pivot.sample(frac=0.75)
temp = labels_pivot[~labels_pivot['id'].isin(train['id'])]
valid = temp.sample(frac=0.6)
test = temp[~temp['id'].isin(valid['id'])]
print("Train shape: ", train.shape)
print("Validation shape: ", valid.shape)
print("Test shape:", test.shape)
print("Data shape overview (Train):")
print(train)

In [None]:
labels.head(5)

The image is resized to 256x256 and then cropped the image into 224x224 randomly to avoid squashed images and normalized it using Imagenet's mean and standard deviation after converting to tensor. for train, test and valid set.

For training images, data augmentation is used which includes random rotation of 30 degrees and horizontal flip.

In [None]:
# Image transformations
img_transform = {
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 256),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'valid':transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

Write three separate data loaders for the training, validation, and test datasets of dog images (located at dog_images/train, dog_images/valid, and dog_images/test, respectively).

In [None]:
class DogBreedDataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, img_dir, label, transform):
        'Initialization'
        self.img_dir = img_dir
        self.transform = transform
        self.label = label

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.label)

    def __getitem__(self, index):
        if self.label is not None:
            img_name = '{}.jpg'.format(self.label.iloc[index, 0])
            fullname = self.img_dir + img_name
            image = Image.open(fullname)
            label = self.label.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            if self.transform:
                image = self.transform(image)
            return [image, label]
        

In [None]:
batch_size = 12
num_workers = 4
train_img = DogBreedDataset(PATH+'train/', train, transform = img_transform['train'])
valid_img = DogBreedDataset(PATH+'train/', valid, transform = img_transform['valid'])
test_img = DogBreedDataset(PATH+'train/', test, transform = img_transform['test'])


dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, num_workers = num_workers, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, num_workers = num_workers, shuffle=False),
    'test':torch.utils.data.DataLoader(test_img, batch_size, num_workers = num_workers, shuffle=False)
}


In [None]:
use_cuda = torch.cuda.is_available()

Show some sample of the image just to make sure the transfomration and augmentation is valid

In [None]:
def imshow(axis, inp):
    """Denormalize and show"""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    axis.imshow(inp)

In [None]:
img, label = next(iter(dataloaders['test']))
print(img.size(), label.size())
fig = plt.figure(1, figsize=(16, 12))
grid = ImageGrid(fig, 111, nrows_ncols=(3, 4), axes_pad=0.05)    
for i in range(img.size()[0]):
    ax = grid[i]
    imshow(ax, img[i])

We used three convolutional layers with relu activations which are followed by maxpool layers. Also, used two fully connected layers. Between fully connected layers, dropout technique with probability = 0.25 is used to avoid the overfitting.

Define train and test method in general

In [None]:
def train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, save_path):
    valid_loss_min = np.Inf 
    
    dataset_sizes = {'train': len(loaders['train'].dataset), 
                     'valid': len(loaders['valid'].dataset),
                     'test': len(loaders['test'].dataset)}
    
    for epoch in range(1, n_epochs+1):
        train_loss = 0.0
        train_corrects = 0.0
        valid_loss = 0.0
        valid_corrects = 0.0
        
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            optimizer.zero_grad()
            output = model(data)
            _, preds = torch.max(output.data, 1)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            # Record the average training loss
            train_loss += loss.data
            train_corrects += torch.sum(preds == target.data)
            
            if batch_idx % 100 == 0:
                print('Epoch: %d \tBatch: %d \tTraining Loss: %.6f' %(epoch, batch_idx + 1, train_loss / ((batch_idx + 1) * 12)))
        
        train_loss = train_loss / dataset_sizes['train']
        train_corrects = train_corrects / dataset_sizes['train']
        
        scheduler.step()

        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            # Update the average validation loss
            output = model(data)
            _, preds = torch.max(output.data, 1)
            loss = criterion(output, target)
            valid_loss += loss.data
            valid_corrects += torch.sum(preds == target.data)
        
        valid_loss = valid_loss / dataset_sizes['valid']
        valid_corrects = valid_corrects / dataset_sizes['valid']
        
        # Print validation statistics 
        print('Epoch: {} \tValidation Loss: {:.4f} \tValidation Acc: {:.4f}'.format(
            epoch, 
            valid_loss,
            valid_corrects
            ))
        
        # Save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(), save_path)
            print('Validation loss decreased ({:.4f} --> {:.4f}).  Saving model...'.format(valid_loss_min,valid_loss))
            valid_loss_min = valid_loss    

    return model

In [None]:
def test(loaders, model, optimizer, criterion, use_cuda):
    test_loss = 0.0
    test_corrects = 0.0
    
    dataset_sizes = {'train': len(loaders['train'].dataset), 
                     'valid': len(loaders['valid'].dataset),
                     'test': len(loaders['test'].dataset)}
    
    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # Update the average validation loss
        output = model(data)
        _, preds = torch.max(output.data, 1)
        loss = criterion(output, target)
        test_loss += loss.data
        test_corrects += torch.sum(preds == target.data)
    
    test_loss = test_loss / dataset_sizes['test']
    test_corrects = test_corrects / dataset_sizes['test']
    
    print('Test Loss: {:.4f} \tTest Acc: {:.4f}'.format(
        test_loss,
        test_corrects
        ))
    
    return test_loss, test_corrects

# ResNet50

ResNet50 is a variant of ResNet model which has 48 Convolution layers along with 1 MaxPool and 1 Average Pool layer. It has 3.8 x 10^9 Floating points operations. It is a widely used ResNet model and we have explored ResNet50 architecture in depth. You can load a pretrained version of the network trained on more than a million images from the ImageNet database.

In [None]:
model_resnet = models.resnet50(pretrained=True)

# Freeze training for all "features" layers
for param in model_resnet.parameters():
    param.requires_grad = False
    
# Replace the last fully connected layer with a Linnear layer 120 output
in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(in_features, 120)

if use_cuda:
    model_resnet = model_resnet.cuda()

Setup criterion, optimizer, and learning rate scheduler

In [None]:
criterion = nn.CrossEntropyLoss()
model_resnet_grad_paramaters = filter(lambda p: p.requires_grad, model_resnet.parameters())
optimizer = torch.optim.SGD(model_resnet_grad_paramaters, lr=0.001, momentum = 0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Train the ResNet50 model

In [None]:
n_epoch = 10

model_resnet = train(n_epoch, dataloaders, model_resnet, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_resnet.pt')

Test the ResNet50 model

In [None]:
model_resnet.load_state_dict(torch.load('model_resnet.pt'))

resnet_score = test(dataloaders, model_resnet, optimizer, criterion, use_cuda)

In [None]:
summary(model_resnet, input_size=(3, 224, 224))

Train the ResNet50 without freezing the trained layer to see the comparison

In [None]:
model_resnet = models.resnet50(pretrained=True)
    
# Replace the last fully connected layer with a Linnear layer 120 output
in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(in_features, 120)

if use_cuda:
    model_resnet = model_resnet.cuda()
      
criterion = nn.CrossEntropyLoss()
model_resnet_grad_paramaters = filter(lambda p: p.requires_grad, model_resnet.parameters())
optimizer = torch.optim.SGD(model_resnet_grad_paramaters, lr=0.001, momentum = 0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
n_epoch = 10

model_resnet = train(n_epoch, dataloaders, model_resnet, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_resnet.pt')

In [None]:
model_resnet.load_state_dict(torch.load('model_resnet.pt'))

resnet_score_unfreeze = test(dataloaders, model_resnet, optimizer, criterion, use_cuda)

# EfficientNet

EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients.

In [None]:
model_fnet = EfficientNet.from_name('efficientnet-b1')

# Freeze training for all "features" layers
for param in model_fnet.parameters():
    param.requires_grad = True

# Replace the last fully connected layer with a Linnear layer 120 output
in_features = model_fnet._fc.in_features
model_fnet._fc = nn.Linear(in_features, 120)

# Setup criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_fnet.parameters())
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_fnet = model_fnet.cuda()

Train the EfficientNet model

In [None]:
n_epoch = 10

model_fnet = train(n_epoch, dataloaders, model_resnet, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_fnet.pt')

Test the EfficientNet model

In [None]:
model_fnet.load_state_dict(torch.load('model_fnet.pt'))

fnet_score = test(dataloaders, model_fnet, optimizer, criterion, use_cuda)

In [None]:
summary(model_fnet, input_size=(3, 224, 224))

Train the EfficientNet without freezing the trained layer to see the comparison

In [None]:
model_fnet = EfficientNet.from_name('efficientnet-b1')

# Replace the last fully connected layer with a Linnear layer 120 output
in_features = model_fnet._fc.in_features
model_fnet._fc = nn.Linear(in_features, 120)

# Setup criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_fnet.parameters())
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_fnet = model_fnet.cuda()

In [None]:
n_epoch = 10

model_fnet = train(n_epoch, dataloaders, model_resnet, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_fnet.pt')

In [None]:
model_fnet.load_state_dict(torch.load('model_fnet.pt'))

fnet_score_unfreeze = test(dataloaders, model_fnet, optimizer, criterion, use_cuda)

# VGG16

VGG16 is a convolution neural net architecture that is considered to be one of the excellent vision model architecture till date. Most unique thing about VGG16 is that instead of having a large number of hyper-parameter they focused on having convolution layers of 3x3 filter with a stride 1 and always used same padding and maxpool layer of 2x2 filter of stride 2. It follows this arrangement of convolution and max pool layers consistently throughout the whole architecture. In the end it has 2 FC(fully connected layers) followed by a softmax for output. The 16 in VGG16 refers to it has 16 layers that have weights. This network is a pretty large network and it has about 138 million (approx) parameters.

In [None]:
model_vgg = models.vgg16_bn(pretrained=True)

# Freeze training for all "features" layers
for param in model_vgg.features.parameters():
    param.require_grad = False
    
in_features = model_vgg.classifier[6].in_features
features = list(model_vgg.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(in_features, 120)]) # Add our layer with 120 outputs
model_vgg.classifier = nn.Sequential(*features)

# Setup criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_vgg.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_vgg = model_vgg.cuda()

Train the VGG16 model

In [None]:
n_epoch = 10

model_vgg = train(n_epoch, dataloaders, model_vgg, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_vgg.pt')

Test the VGG16 model

In [None]:
model_vgg.load_state_dict(torch.load('model_vgg.pt'))

vgg_score = test(dataloaders, model_vgg, optimizer, criterion, use_cuda)

In [None]:
summary(model_vgg, input_size=(3, 224, 224))

Train the VGG16 without freezing the trained layer to see the comparison

In [None]:
model_vgg = models.vgg16_bn(pretrained=True)
    
in_features = model_vgg.classifier[6].in_features
features = list(model_vgg.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(in_features, 120)]) # Add our layer with 120 outputs
model_vgg.classifier = nn.Sequential(*features)

# Setup criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_vgg.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_vgg = model_vgg.cuda()

In [None]:
n_epoch = 10

model_vgg = train(n_epoch, dataloaders, model_vgg, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_vgg.pt')

In [None]:
model_vgg.load_state_dict(torch.load('model_vgg.pt'))

vgg_score_unfreeze = test(dataloaders, model_vgg, optimizer, criterion, use_cuda)

# InceptionV3

Inception-v3 is a convolutional neural network architecture from the Inception family that makes several improvements including using Label Smoothing, Factorized 7 x 7 convolutions, and the use of an auxiliary classifer to propagate label information lower down the network (along with the use of batch normalization for layers in the sidehead).

For inceptionV3, we need to modify the train, test, and image transformation

For the train, and test, we modify that the loss receive ouput.logits as the inception3 outputs has more than logits

For the image transformation, we resize it into 299x299 for inceptionV3 able to read the input, thus below will re read the input

In [None]:
def inception_train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, save_path):
    valid_loss_min = np.Inf 
    
    dataset_sizes = {'train': len(loaders['train'].dataset), 
                     'valid': len(loaders['valid'].dataset),
                     'test': len(loaders['test'].dataset)}
    
    for epoch in range(1, n_epochs+1):
        train_loss = 0.0
        train_corrects = 0.0
        valid_loss = 0.0
        valid_corrects = 0.0
        
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            optimizer.zero_grad()
            output = model(data)
            _, preds = torch.max(output.logits.data, 1)
            loss = criterion(output.logits, target)
            loss.backward()
            optimizer.step()
            # Record the average training loss
            train_loss += loss.data
            train_corrects += torch.sum(preds == target.data)
            
            if batch_idx % 100 == 0:
                print('Epoch: %d \tBatch: %d \tTraining Loss: %.6f' %(epoch, batch_idx + 1, train_loss / ((batch_idx + 1) * 12)))
        
        train_loss = train_loss / dataset_sizes['train']
        train_corrects = train_corrects / dataset_sizes['train']
        
        scheduler.step()

        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            # Update the average validation loss
            output = model(data)
            _, preds = torch.max(output.data, 1)
            loss = criterion(output, target)
            valid_loss += loss.data
            valid_corrects += torch.sum(preds == target.data)
        
        valid_loss = valid_loss / dataset_sizes['valid']
        valid_corrects = valid_corrects / dataset_sizes['valid']
        
        # Print validation statistics 
        print('Epoch: {} \tValidation Loss: {:.4f} \tValidation Acc: {:.4f}'.format(
            epoch, 
            valid_loss,
            valid_corrects
            ))
        
        # Save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(), save_path)
            print('Validation loss decreased ({:.4f} --> {:.4f}).  Saving model...'.format(valid_loss_min,valid_loss))
            valid_loss_min = valid_loss    

    return model

In [None]:
def inception_test(loaders, model, optimizer, criterion, use_cuda):
    test_loss = 0.0
    test_corrects = 0.0
    
    dataset_sizes = {'train': len(loaders['train'].dataset), 
                     'valid': len(loaders['valid'].dataset),
                     'test': len(loaders['test'].dataset)}
    
    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # Update the average validation loss
        output = model(data)
        _, preds = torch.max(output.data, 1)
        loss = criterion(output, target)
        test_loss += loss.data
        test_corrects += torch.sum(preds == target.data)
    
    test_loss = test_loss / dataset_sizes['test']
    test_corrects = test_corrects / dataset_sizes['test']
    
    print('Test Loss: {:.4f} \tTest Acc: {:.4f}'.format(
        test_loss,
        test_corrects
        ))
    
    return test_loss, test_corrects

In [None]:
PATH = '../input/dog-breed-identification/'
labels = pd.read_csv(PATH + 'labels.csv')
labelnames = pd.read_csv(PATH + 'sample_submission.csv').keys()[1:]

In [None]:
codes = range(len(labelnames))
breed_to_code = dict(zip(labelnames, codes))
code_to_breed = dict(zip(codes, labelnames))
labels['target'] =  [breed_to_code[x] for x in labels.breed]
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)

train = labels_pivot.sample(frac=0.75)
temp = labels_pivot[~labels_pivot['id'].isin(train['id'])]
valid = temp.sample(frac=0.6)
test = temp[~temp['id'].isin(valid['id'])]

In [None]:
# Image transformations
img_transform = {
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 299),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=299),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'valid':transforms.Compose([
        transforms.Resize(size = 299),
        transforms.CenterCrop(size = 299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 299),
        transforms.CenterCrop(size = 299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

In [None]:
class DogBreedDataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, img_dir, label, transform):
        'Initialization'
        self.img_dir = img_dir
        self.transform = transform
        self.label = label

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.label)

    def __getitem__(self, index):
        if self.label is not None:
            img_name = '{}.jpg'.format(self.label.iloc[index, 0])
            fullname = self.img_dir + img_name
            image = Image.open(fullname)
            label = self.label.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            if self.transform:
                image = self.transform(image)
            return [image, label]

In [None]:
batch_size = 12
num_workers = 4
train_img = DogBreedDataset(PATH+'train/', train, transform = img_transform['train'])
valid_img = DogBreedDataset(PATH+'train/', valid, transform = img_transform['valid'])
test_img = DogBreedDataset(PATH+'train/', test, transform = img_transform['test'])


dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, num_workers = num_workers, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, num_workers = num_workers, shuffle=False),
    'test':torch.utils.data.DataLoader(test_img, batch_size, num_workers = num_workers, shuffle=False)
}


In [None]:
model_inception = models.inception_v3(pretrained=True)

# Freeze training for all "features" layers
for param in model_inception.parameters():
    param.requires_grad = False

# Setup the last layer net to handle 120 outputs
# Handle the auxilary net
aux_in_features = model_inception.AuxLogits.fc.in_features
model_inception.AuxLogits.fc = nn.Linear(aux_in_features, 120)
# Handle the primary net
in_features = model_inception.fc.in_features
model_inception.fc = nn.Linear(in_features, 120)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_inception.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_inception = model_inception.cuda()

Train the InceptionV3 model

In [None]:
n_epoch = 10

model_inception = inception_train(n_epoch, dataloaders, model_inception, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_inception.pt')

Test the InceptionV3 model

In [None]:
model_inception.load_state_dict(torch.load('model_inception.pt'))

inception_score = inception_test(dataloaders, model_inception, optimizer, criterion, use_cuda)

In [None]:
summary(model_inception, input_size=(3, 299, 299))

Train the InceptionV3 without freezing the trained layer to see the comparison

In [None]:
model_inception = models.inception_v3(pretrained=True)

# Setup the last layer net to handle 120 outputs
# Handle the auxilary net
aux_in_features = model_inception.AuxLogits.fc.in_features
model_inception.AuxLogits.fc = nn.Linear(aux_in_features, 120)
# Handle the primary net
in_features = model_inception.fc.in_features
model_inception.fc = nn.Linear(in_features, 120)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_inception.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if use_cuda:
    model_inception = model_inception.cuda()

In [None]:
n_epoch = 10

model_inception = inception_train(n_epoch, dataloaders, model_inception, optimizer, criterion, exp_lr_scheduler, use_cuda, 'model_inception.pt')

In [None]:
model_inception.load_state_dict(torch.load('model_inception.pt'))

inception_score_unfreeze = inception_test(dataloaders, model_inception, optimizer, criterion, use_cuda)

# Summary

In [None]:
# print('CNN from scratch Acc: {:.4f}'.format(scratch_score[1]))
print('ResNet50 Acc: {:.4f}'.format(resnet_score[1]))
print('EfficientNet Acc: {:.4f}'.format(fnet_score[1]))
print('VGG16 Acc: {:.4f}'.format(vgg_score[1]))
print('Inception Acc: {:.4f}'.format(inception_score[1]))

# print('CNN from scratch Loss: {:.4f}'.format(scratch_score[0]))
print('ResNet50 Loss: {:.4f}'.format(resnet_score[0]))
print('EfficientNet Loss: {:.4f}'.format(fnet_score[0]))
print('VGG16 Loss: {:.4f}'.format(vgg_score[0]))
print('Inception Loss: {:.4f}'.format(inception_score[0]))

In [None]:
model_df = pd.DataFrame(data=[["ResNet50", float(format(resnet_score[1]))]], 
                          columns=['Model', 'Accuracy'])

temp_df = pd.DataFrame(data=[["ResNet50 Unfreeze", float(format(resnet_score_unfreeze[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["EfficientNet", float(format(fnet_score[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["EfficientNet Unfreeze", float(format(fnet_score_unfreeze[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["VGG16", float(format(vgg_score[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["VGG16 Unfreeze", float(format(vgg_score_unfreeze[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["InceptionV3", float(format(inception_score[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

temp_df = pd.DataFrame(data=[["InceptionV3 Unfreeze", float(format(inception_score_unfreeze[1]))]], 
                          columns=['Model', 'Accuracy'])
model_df = model_df.append(temp_df, ignore_index=True)

model_df

In [None]:
model_df.set_index('Model', inplace=True)
model_df['Accuracy'].plot(kind='barh', figsize=(12, 8))

The InceptionV3 model successfully become the top 1 which predict 120 breeds with 85.4% accuracy. 

Followed by EfficientNet with 85.7% accuracy, Resnet with 82.7% accuracy, and VGG16 with 80.0% accuracy.

And the unfreeze version is slightly weaker than the freeze trained layer, possibly of overfitting as can be seen that in the first 3 epoch, the unfreeze is better but after some high amount of epoch, it's weaker ~1 to 2 percent