# Convnets for Classification with PyTorch

This notebook contains some CNN implementations and experiements using pytorch.

Classification is the most common task we use CNN's for and we will experiment on three datasets, two of which are simple datasets with a relatively small number of classes and then we will scale upto a much larger dataset. 




In [8]:
import numpy as np
import pandas as pd =
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import os
import random
import seaborn as sns
from sklearn.model_selection import train_test_split
from collections import defaultdict

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision.transforms as transforms
import torchvision
import PIL

device = 'cuda' # running on Kaggle with a GPU, change to 'cpu' if no gpu available

Some helper functions to be used throughout the notebook. 

In [18]:
def plot(config):
    history = pd.DataFrame(config['history'])
    plt.ylabel('Loss')
    plt.xlabel('Epoch number')
    sns.lineplot(x=history.index, y=history['train_loss'], label = 'Train Loss')
    sns.lineplot(x=history.index, y=history['valid_loss'], label = 'Valid Loss')
    plt.legend(loc="center right")
    ax2 = plt.twinx()
    ax2.set_ylabel('Accuracy')
    sns.lineplot(x=history.index, y=history['valid_acc'], ax=ax2, color = 'red', label='Valid Acc.')

def train_model(model, config):
    # train the model with the specified configuration
    def train():
        model.train()
        epoch_loss = 0
        for batch in train_loader:
            x, y = batch
            x = x.to(device)
            y = y.to(device)

            out = model(x)
            loss = criterion(out, y)
            epoch_loss += loss.item()
            loss.backward()
            
            optim.step()
            model.zero_grad()
        
        history['train_loss'].append(epoch_loss / len(train_loader))
    
    def evaluation():
        correct = 0
        eval_loss = 0
        model.eval()
        for batch in valid_loader:
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device)

                out = model(x)
                eval_loss += criterion(out, y).item()
                softmax_out = torch.softmax(out, axis=1)
                correct += (softmax_out.argmax(1) == y).sum().item()

        eval_acc = correct / len(valid_loader.dataset)
            
        history['valid_loss'].append(eval_loss / len(valid_loader))
        history['valid_acc'].append(eval_acc)
        
    def earlystop():
        nonlocal current_patience
        nonlocal trials
        
        best_val_loss = min(history['valid_loss'])
        current_val_loss = history['valid_loss'][-1]
        if current_val_loss <= best_val_loss:
            # save best model
            torch.save(
                {
                    'model': model.state_dict(),
                    'optim': optim.state_dict()
                },
                f'{name}.pt'
            )

        else:
            current_patience -= 1
            if current_patience == 0:
                # load previous best model
                checkpoint = torch.load(f'{name}.pt')
                model.load_state_dict(checkpoint['model'])
                optim.load_state_dict(checkpoint['optim'])
                trials -= 1
                if trials == 0:
                    # end training and early stop
                    print('Early Stopped at epoch', e+1)
                    return True
                                    
               
    epochs = config['epochs']
    optim = config['optim']
    criterion = config['criterion']
    train_loader = config['train_loader']
    valid_loader = config['valid_loader']
    device = config['device']
    history = config['history']
    
    if 'early_stop' in config.keys():
        patience = current_patience = config['early_stop']['patience']
        trials = config['early_stop']['trials']
        name = config['early_stop']['name']    
    
    # training evaluation loop
    for e in range(epochs):
        train()
        evaluation()
        
        if (e + 1) % 5 == 0: 
            print('Epoch', e+1)
            print('Epoch Train Loss:', history['train_loss'][-1])
            print('Epoch Valid Loss', history['valid_loss'][-1])
            print('Epoch Valid Acc', history['valid_acc'][-1], '\n')
        
        if 'early_stop' in config.keys():
            if earlystop():
                break
    
    # return the optimizer so we can continue training if we want
    config["optim"] = optim

def test_eval(model, config):
    correct = 0
    eval_loss = 0
    model.eval()
    criterion = config["criterion"]
    loader = config['test_loader']
    for batch in loader:
        with torch.no_grad():
            x, y = batch
            x = x.to(device)
            y = y.to(device)

            out = model(x)
            eval_loss += criterion(out, y).item()
            softmax_out = torch.softmax(out, axis=1)
            correct += (softmax_out.argmax(1) == y).sum().item()
            
    print('Test Acc.:', correct / len(loader.dataset), 'Test Loss', eval_loss / len(loader))

# Fashion MNIST

First, let's start with a simple dataset, the Fashion MNIST (f-mnist); this dataset is small so we can read it entirely into RAM.

In [10]:
# for formality, we will split the train set into a train and validation set an 8/2 split.
train_valid = pd.read_csv('../input/fashionmnist/fashion-mnist_train.csv')
test = pd.read_csv('../input/fashionmnist/fashion-mnist_test.csv')
train, valid = train_test_split(train_valid, test_size=0.2)
train.head()

Now we separate the images and the truth labels

In [11]:
train_x, train_y = train.iloc[:, 1:].values, train['label'].values
valid_x, valid_y = valid.iloc[:, 1:].values, valid['label'].values
test_x, test_y = test.iloc[:, 1:].values, test['label'].values

train_x = normalize(train_x)
valid_x = normalize(valid_x)
test_x = normalize(test_x)

print('Train set', train_x.shape, train_y.shape)
print('Valid set', valid_x.shape, valid_y.shape)
print('Test set',test_x.shape, test_y.shape)

Create some custom datasets with the torch "Dataset" API.

Note: admittedly there are better ways to load the data, but I thought this would be a good simple example to showcase the pytorch "Dataset" API

In [12]:
train_valid_test = {
    'train': (train_x, train_y),
    'valid': (valid_x, valid_y),
    'test': (test_x, test_y)
}

class FashionMNIST(Data.Dataset):
    
    def __init__(self, which, transform=None):
        assert which in list(train_valid_test.keys())
        self.x, self.y = train_valid_test[which]
        self.x = torch.from_numpy(self.x).float()
        self.y = torch.from_numpy(self.y)
        self.transform = transform
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        x, y = self.x[idx], self.y[idx]
        if self.transform:
            x = self.transform(x)
        
        return x, y
    

train_loader = Data.DataLoader(FashionMNIST('train'), batch_size = 64, shuffle = True)
valid_loader = Data.DataLoader(FashionMNIST('valid'), batch_size = 64, shuffle = True)
test_loader = Data.DataLoader(FashionMNIST('test'), batch_size = 64, shuffle = True)

In [13]:
labels = np.array([
    'T-shirt/top', 
    'Trouser', 
    'Pullover', 
    'Dress', 
    'Coat', 
    'Sandal', 
    'Shirt', 
    'Sneaker', 
    'Bag', 
    'Ankle boot'
])

fig, ax = plt.subplots(3, 3, figsize=(9,9))

for i in range(3):
    for j in range(3):
        idx = random.randint(0, len(train_x))
        ax[i][j].set_title(labels[train_y[idx]])
        ax[i][j].xaxis.set_visible(False)
        ax[i][j].yaxis.set_visible(False)
        ax[i][j].imshow(train_x[idx].reshape(28, 28))

Below are a few helper functions to train, test and visualise our models.

## An MLP benchmark

We first train an MLP on the f-mnist dataset as a benchmark for our CNN model.

In [14]:
simple_mlp = nn.Sequential(
    nn.Linear(784, 300),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(300, 150),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(150, 10)
).to(device)

mlp_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(simple_mlp.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               }
}     

Train our simple MLP

In [15]:
train_model(simple_mlp, mlp_config)

Visualise our MLP training progress

In [16]:
plt.title("Simple MLP")
plot(mlp_config)

Evaluate our model on test set

In [19]:
test_eval(simple_mlp, mlp_config)

## A simple CNN

Before we build our CNN, we need to transform our data so that it can be used as input to a conv2d layer. Currently, we have our data represented as a 784 dimensional vector, and we need to reshape it into (1, 28, 28), where each entry stands for channel, height, and width, respectively.

We can do this easily by adding a transformation to our custom dataset but for simplicity, we will instead reshape it when the model receives the input, this can be ahieved by making a simple custom layer as follows:

In [20]:
class chw(nn.Module): 
    # a custom layer to reshape in input of (batch_size, n_features) -> (batch_size, channel, height, width)
    def __init__(self, dims):
        super().__init__()
        self.c, self.h, self.w = dims
        

    def forward(self, x):
        return x.view(-1, self.c, self.h, self.w)

In [21]:
simple_cnn = nn.Sequential(
    chw((1, 28, 28)), # custom layer for data transformation
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(3,3), stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 12, 3, 1, 1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(588, 128),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(128, 10)
).to(device)

simple_cnn_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(simple_cnn.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               }
}

Let's train our simple CNN!

In [22]:
train_model(simple_cnn, simple_cnn_config)

Again, let's visualise our model progress

In [23]:
plt.title('Simple CNN')
plot(simple_cnn_config)

It looks like there may still be some room for improvements, let's train for a few more epochs

In [24]:
simple_cnn_config['epochs'] = 10 # train for 10 epochs this time
train_model(simple_cnn, simple_cnn_config)

Visualising the simple cnn again

In [25]:
plt.title('Simple CNN')
plot(simple_cnn_config)

In [26]:
test_eval(simple_cnn, simple_cnn_config)

## ResNet

Now, let's try the same dataset with one of the SOTA CNN architectured, namely the ResNet architecture. Pytorch has built in CNN architectures for many of the popular architectures, however the built in model takes in images with 3 input channels which is 2 more than what we have with our f-mnist dataset. There are various techniques we can employ to remedy this but I think it is more meaningful to build your own from scratch.

In [27]:
class ResidualLayer(nn.Module):
    
    def __init__(self, in_c, out_c, stride=1, first=False):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_c, 
            out_channels=out_c, 
            kernel_size=3, 
            stride=stride, 
            padding=1)
        
        self.conv2 = nn.Conv2d(out_c, out_c, 3, 1, 1)
        self.conv3 = nn.Conv2d(in_c, out_c, 1, stride) if first else None
        self.relu = nn.ReLU()        
        
        self.bn1 = nn.BatchNorm2d(out_c)
        self.bn2 = nn.BatchNorm2d(out_c)
        
    def forward(self, x):
        y = self.conv1(x)
        y = self.bn1(y)
        y = self.relu(y)
        y = self.conv2(y)
        y = self.bn2(y)
        y = self.relu(y)
        if self.conv3:
            return y + self.conv3(x)
        else:
            return y
    
class ResidualBlock(nn.Module):
    
    def __init__(self, in_c, out_c, num_layers):
        super().__init__()
        self.layers = nn.ModuleList()
        self.layers.append(ResidualLayer(in_c, out_c, 2, True))
        for i in range(1, num_layers):
            self.layers.append(ResidualLayer(out_c, out_c))
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            
        return x
    
nn.Sequential(
    nn.Conv2d(1, 64, 7, 2, 3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(3, 2, 1)
)

my_resnet = nn.Sequential(
    chw((1, 28, 28)),
    torchvision.transforms.Resize((224, 224)),
    # the base block is a little bit special compared the the rest of the network
    # it takes in the one channel and is the only layer to use max pool
    nn.Sequential(
    nn.Conv2d(1, 64, 7, 2, 3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(3, 2, 1)
    ), 
    # the resnet blocks, each made of 2 layers
    # Note: the first layer of the resnet block is takes a stride of 2 
    # to reduce the size of the feature maps for subsequent layers
    ResidualBlock(64, 64, 2), 
    ResidualBlock(64, 128, 2), 
    ResidualBlock(128, 256, 2), 
    ResidualBlock(256, 512, 2),
    
    # the global average pool essentiall takes the averegre of each "pixel" for each feature map, 
    # this cuts down the useage of a fully connected layer, thus cutting the number of paremeters dramatically
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(512, 10)
).to(device)

resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(my_resnet.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               }
}

In [28]:
train_model(my_resnet, resnet_config)

Visualize

In [29]:
plot(resnet_config)

It appears that the model started to overfit after 7 epochs, however the accuracy is not severly affected (if at all). We can remedy this by implementing an early stopping mechanism as follows: 

In [30]:
my_resnet = nn.Sequential(
    chw((1, 28, 28)),
    torchvision.transforms.Resize((224, 224)),
    # the base block is a little bit special compared the the rest of the network
    # it takes in the one channel and is the only layer to use max pool
    nn.Sequential(
    nn.Conv2d(1, 64, 7, 2, 3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(3, 2, 1)
    ), 
    # the resnet blocks, each made of 2 layers
    # Note: the first layer of the resnet block is takes a stride of 2 
    # to reduce the size of the feature maps for subsequent layers
    ResidualBlock(64, 64, 2), 
    ResidualBlock(64, 128, 2), 
    ResidualBlock(128, 256, 2), 
    ResidualBlock(256, 512, 2),
    
    # the global average pool essentiall takes the averegre of each "pixel" for each feature map, 
    # this cuts down the useage of a fully connected layer, thus cutting the number of paremeters dramatically
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(512, 10)
).to(device)

resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(my_resnet.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 3,
        'trials': 1
    }
}

train_model(my_resnet, resnet_config)
plot(resnet_config)

Let's look at the each model's performance on the test set:

In [31]:
print('Resnet')
test_eval(my_resnet, resnet_config)
print('\nSimple CNN')
test_eval(simple_cnn, simple_cnn_config)
print('\nSimple MLP')
test_eval(simple_mlp, mlp_config)

Unsurpringsly, ResNet performed the best with 93% accuracy while the other two are at least 2% worse.

# Fish dataset

Now, we will shift from the f-mnist dataset to a slightly more complex dataset (instances have 3 channels instead of 1!), namely the Fish dataset. Let's begin with some data exploration and then some preprocessing to get the data ready for model consumption. The main goal of this section is to compare the performances between a pretrained cnn base and the same trained from scratch.

The dataset is split into two main directories, the "Fish_Dataset" is "NA_Fish_Dataset" after performing data augmentation, additionally, the augmented version also has a "GT" version for each class and they are for image segmentation tasks.

For our project, we will make use of the "NA" (stands for "not augmented"... I guess?) directory and perform data augmentation ourselves with the torchvision api.

We will make a custom dataset like before, but this time is a bit more tricky as some of the images are off different sizes and formats (png, and JPEG)

In [32]:
# extract the file names and class lables from the folder
fishes = {"id": [], "class": [], 'label': []}

labels = defaultdict(lambda: len(labels))

for (root,dirs,files) in os.walk('../input/a-large-scale-fish-dataset/NA_Fish_Dataset', topdown=True):
    if root != '../input/a-large-scale-fish-dataset/NA_Fish_Dataset':
        for (r,d,f) in os.walk(root):
            fish_class = r.split('/')[-1]
            fish_label = labels[fish_class]
            for i in f:
                fishes['id'].append(f'{root}/{i}')
                fishes['class'].append(fish_class)
                fishes['label'].append(fish_label)


# this dataframe is used to refer to the files when we make a custom dataset to load the data
fishes_df = pd.DataFrame(fishes)
fishes_df.head()

In [33]:
# a look at the class distributions
print('Sample size:', fishes_df.shape[0])
print(fishes_df['class'].value_counts())

As shown from the cell above, there are a whopping 430 fishes (not distinct fishes as some are just the same fish but different angles :) ) in the dataset! This is not a lot but perfect for us to see the power of a pretrained base as they would, ideally, give good predictive performance even with scarce training data. Motivated by this, we will perform a rather extreme split of 50/10/40 (train/val./test) on the dataset.

In [34]:
# the 50/10/40 train, valid, test split
train_val, test = train_test_split(fishes_df, test_size=4/10, stratify=fishes['label'], random_state=23)
train, valid = train_test_split(train_val, test_size=1/6, stratify=train_val['label'], random_state=40)

In [35]:
print('\nTrain:',train.shape[0])
print(train["class"].value_counts())
print('\nValid:',valid.shape[0])
print(valid["class"].value_counts())
print('\nTest:',test.shape[0])
print(test["class"].value_counts())

Note: We used stratify sampling for each split to ensure that each class is properly represented for each split. Now let's create out custom datasets!

In [36]:
fishes_datasets = {
    'train': train,
    'valid': valid,
    'test': test
}

class FishesDataset(Data.Dataset):
    
    def __init__(self, which, transform=None):
        self.data = fishes_datasets[which]
        self.transform = transform
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        image = PIL.Image.open(fishes_df.loc[idx, 'id'])
        x = transforms.functional.to_tensor(image)
        x = transforms.Resize((224, 224))(x)
        if self.transform:
            x = self.transform(x)
        
        y = fishes_df.loc[idx, 'label']
        return x, y
        
train_loader = Data.DataLoader(FishesDataset('train'), batch_size=64, shuffle=True)
valid_loader = Data.DataLoader(FishesDataset('valid'), batch_size=64, shuffle=True)
test_loader = Data.DataLoader(FishesDataset('test'), batch_size=64, shuffle=True)

Let's have a look at some of the fishes!

In [39]:
fig, ax = plt.subplots(3, 3, figsize=(6, 6))
x, y = list(train_loader)[0]

for i in range(3):
    for j in range(3):
        idx = random.randint(0,63)
        ax[i][j].set_title(list(labels.keys())[y[idx]])
        ax[i][j].xaxis.set_visible(False)
        ax[i][j].yaxis.set_visible(False)
        ax[i][j].imshow(x[idx].permute(1,2,0))

## Simple CNN

We will use a simple CNN as our benchmark for this dataset, we don't use an MLP because the model would be too large (the input dimension would be 3 * 224 * 224) and training would be too slow.

In [40]:
simple_cnn = nn.Sequential(
    nn.Conv2d(3, 6, 5, 2, 2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(6, 12, 5, 2, 2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(12, 16, 5, 2, 2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(144, 64),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(64, 9)
).to(device)

simple_cnn_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(simple_cnn.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               }
}

In [41]:
train_model(simple_cnn, simple_cnn_config)

In [42]:
plot(simple_cnn_config)

In [43]:
my_resnet = nn.Sequential(
    # the base block is a little bit special compared the the rest of the network
    # it takes in the one channel and is the only layer to use max pool
    nn.Sequential(
    nn.Conv2d(3, 64, 7, 2, 3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(3, 2, 1)
    ), 
    # the resnet blocks, each made of 2 layers
    # Note: the first layer of the resnet block is takes a stride of 2 
    # to reduce the size of the feature maps for subsequent layers
    ResidualBlock(64, 64, 2), 
    ResidualBlock(64, 128, 2), 
    ResidualBlock(128, 256, 2), 
    ResidualBlock(256, 512, 2),
    
    # the global average pool essentiall takes the averegre of each "pixel" for each feature map, 
    # this cuts down the useage of a fully connected layer, thus cutting the number of paremeters dramatically
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(512, 10)
).to(device)

resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(my_resnet.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 3,
        'trials': 1
    }
}

train_model(my_resnet, resnet_config)
plot(resnet_config)

In [44]:
test_eval(my_resnet, resnet_config)

## Pretrained CNN

Now, we will use pytorch's pretrained ResNet to train on the same dataset under the same conditions to see the power of a pretrained network.

First we need to download the set up the model and download it's weights, then we freeze every layer and assign a new head to the model. By default the new linear layer will have it's weigths "requires_grad" set to True.

In [45]:
import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)

for params in resnet18.parameters():
    params.requires_grad = False
    
resnet18.fc = nn.Linear(512, 9)

In [46]:
resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 20,
    'optim': torch.optim.Adam(resnet18.fc.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'test_loader': test_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 3,
        'trials': 1
    }
}

train_model(resnet18.to(device), resnet_config)
plot(resnet_config)

In [47]:
print('Pretrained ResNet')
test_eval(resnet18, resnet_config)
print('\nMy ResNet')
test_eval(my_resnet, resnet_config)
print('\nSimple CNN')
test_eval(simple_cnn, simple_cnn_config)

# CIFAR-10

The last dataset we will experiment on is the CIFAR-10 Dataset. 

In [48]:
batch_size = 64
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_loader = Data.DataLoader(torchvision.datasets.CIFAR10("data", train=True, transform=transform, download=True), batch_size=batch_size)
valid_loader = Data.DataLoader(torchvision.datasets.CIFAR10("data", train=False, transform=transform, download=True), batch_size=batch_size)

A look at a few images of the dataset.


In [49]:
# get a batch from the loader
for batch in train_loader:
    x, y = batch
    break

# plot 9 random images
fig, ax = plt.subplots(3,3, figsize=(6, 6))

labels = np.array([
    'plane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck'
])
for i in range(3):
    for j in range(3):
        idx = random.randint(0,64)
        ax[i][j].set_title(labels[y[idx]])
        ax[i][j].xaxis.set_visible(False)
        ax[i][j].yaxis.set_visible(False)
        ax[i][j].imshow(x[idx].permute(1,2,0))

This time we will use Pytorch's built in ResNet class and then we will compare with another popular architecture.

In [50]:
resnet18 = torchvision.models.resnet18()
resnet18.fc = nn.Linear(512, 10)

resnet18.to(device)
resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 30,
    'optim': torch.optim.Adam(resnet18.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 5,
        'trials': 2
    }
}

train_model(resnet18, resnet_config)

In [51]:
plot(resnet_config)

We will try with a VGG16 model, but we will use a pretrained base instead of training from sratch.

In [52]:
vgg16 = torchvision.models.vgg16(pretrained=True)
vgg16.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 10)
)

vgg16.to(device)
vgg16_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 30,
    'optim': torch.optim.Adam(vgg16.classifier.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 5,
        'trials': 2
    }
}

train_model(vgg16, vgg16_config)

In [53]:
plot(vgg16_config)

Now we try Resnet with pretrained base

In [54]:
resnet18 = torchvision.models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(512, 10)

resnet18.to(device)
resnet_config = {
    'criterion': nn.CrossEntropyLoss(),
    'epochs': 30,
    'optim': torch.optim.Adam(resnet18.fc.parameters()),
    'lr': 0.001,
    'train_loader': train_loader,
    'valid_loader': valid_loader,
    'device': device,
    
    'history': {
        'train_loss': [], 'valid_loss': [],'valid_acc': []
               },
    # add early stopping criteria to the model config
    'early_stop': {
        'name': 'resnet',
        'patience': 3,
        'trials': 1
    }
}

train_model(resnet18, resnet_config)

In [55]:
plot(resnet_config)