In [None]:
import os
import shutil
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms
from torchvision.utils import save_image
from torch.utils.tensorboard import SummaryWriter

In [None]:
import numpy as np
import torch
import torch.nn as nn


def cutmix(batch, alpha):
    data, targets = batch

    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)

    image_h, image_w = data.shape[2:]
    cx = np.random.uniform(0, image_w)
    cy = np.random.uniform(0, image_h)
    w = image_w * np.sqrt(1 - lam)
    h = image_h * np.sqrt(1 - lam)
    x0 = int(np.round(max(cx - w / 2, 0)))
    x1 = int(np.round(min(cx + w / 2, image_w)))
    y0 = int(np.round(max(cy - h / 2, 0)))
    y1 = int(np.round(min(cy + h / 2, image_h)))

    data[:, :, y0:y1, x0:x1] = shuffled_data[:, :, y0:y1, x0:x1]
    targets = (targets, shuffled_targets, lam)

    return data, targets


class CutMixCollator:
    def __init__(self, alpha):
        self.alpha = alpha

    def __call__(self, batch):
        batch = torch.utils.data.dataloader.default_collate(batch)
        batch = cutmix(batch, self.alpha)
        return batch


class CutMixCriterion:
    def __init__(self, reduction):
        self.criterion = nn.CrossEntropyLoss(reduction=reduction)

    def __call__(self, preds, targets):
        targets1, targets2, lam = targets[0], targets[1], targets[2]
        #print(f"target-1: {targets1}")
        ##print(f"target-2: {targets2}")
        #print(f"target-3: {lam}")
        return lam * self.criterion(
            preds, targets1) + (1 - lam) * self.criterion(preds, targets2)

**Importing Car Dataset**

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#Import Stanford car dataset and split in to train and test sets 
train_dataset = datasets.StanfordCars(root='./data', split='train', transform=data_transforms["train"], download=True)
valid_dataset = datasets.StanfordCars(root='./data', split='test', transform=data_transforms["val"], download=True)

# Total number of respective entries in training and validatio dataset
N_train = len(train_dataset)
N_valid = len(valid_dataset)
print(f"Training set size: {N_train} images")
print(f"Valdiation set size: {N_valid} images")

# Creating training and validarion loader of batch size 32 and 4 worker processes
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
# valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=32, shuffle=True, num_workers=4)

cc = CutMixCollator(1.0)
d_cc = torch.utils.data.dataloader.default_collate
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, collate_fn =cc,pin_memory=True,
        drop_last=True, )
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=32, shuffle=True, num_workers=4, collate_fn =d_cc, pin_memory=True,
        drop_last=True,)

In [None]:
train_dataset

In [None]:
# Fetch the individual classes of car dataset
class_names = train_dataset.classes
print(class_names)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Data Visualizaton**

In [None]:
# Fetch the random 32 images from the complete train loader comprising of 8144 images
def show_grid(data, titles=None):
    # Create image tensor
    data = data.numpy().transpose((0, 2, 3, 1))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    data = std * data + mean
    data = np.clip(data, 0, 1)
    
    plt.figure(figsize=(8*2, 4*2))

    # fetching selected images
    for i in range(32):
        plt.subplot(4,8,i+1)
        plt.imshow(data[i])
        plt.axis("off")
        if titles is not None:
            # Add image title
            plt.title(titles[i],fontsize = 7)
    plt.tight_layout()
    plt.show()
            
# Get a batch of training data and displaying it
inputs, classes = next(iter(train_loader))
titles = [class_names[x] for x in classes[1]]

show_grid(inputs, titles=titles)

**Testing/Training Functions**

In [None]:
# Function TRAIN_EPOCH
# Model dataset training and validation functions
# Parameters
# Model Selection: ResNet - 152
# Train Loader
# Optimizer: Adam
# Criterion: Cross Entropy Loss
# Epoch: 15
# Device: Cuda

def train_epoch(model, train_loader, optimizer, criterion, epoch, device):
    """ Training a model for one epoch """
    
    loss_list = []
    criterion = CutMixCriterion(reduction = "mean")
    for i, (images, labels) in enumerate(train_loader):
        # using images and labels on GPU
        images = images.to(device)
        labels = labels

        if isinstance(labels, (tuple, list)):
            l1,l2,l3 = labels
            labels = (l1.to(device),l2.to(device),l3)
        else:
            labels = labels.to(device)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
         
        # Forward pass to get output/log odss function
        outputs = model(images)
         
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())
         
        # Getting gradients w.r.t. parameters
        loss.backward()
         
        # Updating parameters
        optimizer.step()
        
    mean_loss = np.mean(loss_list)
    return mean_loss, loss_list

# Function EVAL_MODEL
# Parameters
# Model Selection: ResNet - 152
# Evaluation Loader
# Criterion: Cross Entropy Loss
# Device: Cuda

@torch.no_grad()
def eval_model(model, eval_loader, criterion, device):
    """ Evaluating the model for either validation or test """
    correct = 0
    total = 0
    # saving individual loss at each epoch
    loss_list = []
    criterion = nn.CrossEntropyLoss(reduction='mean')
    for images, labels in eval_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass only to get logits/output
        outputs = model(images)
                 
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())
            
        # Get predictions from the maximum value
        preds = torch.argmax(outputs, dim=1)
        correct += len( torch.where(preds==labels)[0] )
        total += len(labels)
                 
    # Total correct predictions and loss
    accuracy = correct / total * 100
    loss = np.mean(loss_list)
    
    return accuracy, loss

# Function TRAIN_MODEL
# Parameters
# Model Selection: ResNet - 152
# Optimzer: Adam
# LR Scheduler: STEPLR
# Criterion: Cross Entropy Loss
# Train Loader
# Evaluation Loader
# Number of Epochs: 15
# Tensor Board: True

def train_model(model, optimizer, scheduler, criterion, train_loader, valid_loader, num_epochs, tboard=None, start_epoch=0):
    """ Training a model for a given number of epochs"""
    
    # loss (train and validation) and validation accuracy lists saving repective accuracies and losses for each epoch
    train_loss = []
    val_loss =  []
    valid_acc = []
    loss_iters = []
    for epoch in range(num_epochs):
        # validation epoch
        # important for dropout and batch norms
        model.eval()  
        accuracy, loss = eval_model(
                    model=model, eval_loader=valid_loader,
                    criterion=criterion, device=device
            )
        
        valid_acc.append(accuracy)
        val_loss.append(loss)

        # writing validation accuracy and losses on tensor board writer for each epoch
        writer.add_scalar(f'Accuracy/Valid', accuracy, global_step=epoch+start_epoch)
        writer.add_scalar(f'Loss/Valid', loss, global_step=epoch+start_epoch)
        
        # training epoch
        model.train()  # important for dropout and batch norms
        mean_loss, cur_loss_iters = train_epoch(
                model=model, train_loader=train_loader, optimizer=optimizer,
                criterion=criterion, epoch=epoch, device=device
            )

        # decays the learning rate of each parameter group by gamma for every stepsize epoch 
        scheduler.step()
        train_loss.append(mean_loss)
        # write losses on tensor board writer
        writer.add_scalar(f'Loss/Train', mean_loss, global_step=epoch+start_epoch)

        loss_iters = loss_iters + cur_loss_iters
        
        # print the accuracies and losses for every 5 epochs
        if(epoch % 5 == 0 or epoch==num_epochs-1):
            print(f"Epoch {epoch+1}/{num_epochs}")
            print(f"    Train loss: {round(mean_loss, 5)}")
            print(f"    Valid loss: {round(loss, 5)}")
            print(f"    Accuracy: {accuracy}%")
            print("\n")
    
    print(f"Training completed")
    return train_loss, val_loss, loss_iters, valid_acc

In [None]:
# save the state of the model
def save_model(model, optimizer, epoch, stats):
    """ Saving model checkpoint """
    
    if(not os.path.exists("models")):
        os.makedirs("models")
    savepath = f"models/checkpoint_epoch_{epoch}.pth"

    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'stats': stats
    }, savepath)
    return

# load saved/existing model
def load_model(model, optimizer, savepath):
    """ Loading pretrained checkpoint """
    
    checkpoint = torch.load(savepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint["epoch"]
    stats = checkpoint["stats"]
    
    return model, optimizer, epoch, stats

# curved smoothening function
def smooth(f, K=5):
    """ Smoothing a function using a low-pass filter (mean) of size K """
    kernel = np.ones(K) / K
    f = np.concatenate([f[:int(K//2)], f, f[int(-K//2):]])  # to account for boundaries
    smooth_f = np.convolve(f, kernel, mode="same")
    smooth_f = smooth_f[K//2: -K//2]  # removing boundary-fixes
    return smooth_f

# set the initial random seed
def set_random_seed(random_seed=None):
    """
    Using random seed for numpy and torch
    """
    if(random_seed is None):
        random_seed = 13
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    return

# call random seed function
set_random_seed()

**2.1. Fine-Tuning**

In [None]:
# using model resnet152 having 152 layers
# downloading pretrained model
model = models.resnet152(pretrained=True)  # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

In [None]:
# starting frim 512 input features reducing it to 196 car labels (which is the required output features)
model.fc = nn.Sequential(
        nn.Linear(512,256),  #input_features: 512, output_features:256
        nn.ReLU(),
        nn.Linear(256,196) #input_features: 256, output_features:196
)

In [None]:
# replacing classification head
model = models.resnet152(pretrained=True)
num_nuerons = model.fc.in_features
model.fc = nn.Linear(num_nuerons, 196)

model = model.to(device)

In [None]:
criterion = CutMixCriterion(reduction='mean')
# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
# Adding tensor board lofs for CNN tuned
TBOARD_LOGS = os.path.join(os.getcwd(), "tboard_logs", "CNN_tuned")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)

In [None]:
%load_ext tensorboard

In [None]:
# Training and validating resnet152 on the dataset
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=model, optimizer=optimizer, scheduler=scheduler, criterion=criterion,
        train_loader=train_loader, valid_loader=valid_loader, num_epochs=15, tboard=writer
    )

In [None]:
# plots and visualization curves for training, loss curves and validation accuracy
plt.style.use('seaborn')
fig, ax = plt.subplots(1,3)
fig.set_size_inches(24,5)

smooth_loss = smooth(loss_iters, 31)
ax[0].plot(loss_iters, c="blue", label="Loss", linewidth=3, alpha=0.5)
ax[0].plot(smooth_loss, c="red", label="Smoothed Loss", linewidth=3, alpha=1)
ax[0].legend(loc="best")
ax[0].set_xlabel("Iteration")
ax[0].set_ylabel("CE Loss")
ax[0].set_title("Training Progress")

epochs = np.arange(len(train_loss)) + 1
ax[1].plot(epochs, train_loss, c="red", label="Train Loss", linewidth=3)
ax[1].plot(epochs, val_loss, c="blue", label="Valid Loss", linewidth=3)
ax[1].legend(loc="best")
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("CE Loss")
ax[1].set_title("Loss Curves")

epochs = np.arange(len(val_loss)) + 1
ax[2].plot(epochs, valid_acc, c="red", label="Valid accuracy", linewidth=3)
ax[2].legend(loc="best")
ax[2].set_xlabel("Epochs")
ax[2].set_ylabel("Accuracy (%)")
ax[2].set_title(f"Valdiation Accuracy (max={round(np.max(valid_acc),2)}% @ epoch {np.argmax(valid_acc)+1})")

plt.show()

A) The training progress shows the gradual decrease of cross entropy loass as the number of iterattion increased over the course of time.

B) The loss curvess (train loss, validation loss) shows the similar pattern of cross entropy loss over the training progress.

C)The validation accuracy on the other hand is increasing over the training progress. 

In [None]:
%tensorboard --logdir tboard_logs

**2.2: Select a good training recipe: augmentations, optimizer, learning rate scheduling, classifier, loss function**

In [None]:
# Installing Optuna
!pip install --quiet optuna

In [None]:
import optuna
import torch.optim as optim
from optuna.trial import TrialState
optuna.__version__

In [None]:
# Defining Learning Rate scheduler class 
class LRScheduler():
    """
    Learning rate scheduler. If the validation loss does not decrease for the 
    given number of patience epochs, then the learning rate will decrease by
    by given factor.
    """
    def __init__(
        self, optimizer, patience=5, min_lr=1e-6, factor=0.5
    ):
        """
        new_lr = old_lr * factor
        :param optimizer: the optimizer we are using
        :param patience: how many epochs to wait before updating the lr
        :param min_lr: least lr value to reduce to while updating
        :param factor: factor by which the lr should be updated
        """
        self.optimizer = optimizer
        self.patience = patience
        self.min_lr = min_lr
        self.factor = factor
        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 
                self.optimizer,
                mode='min',
                patience=self.patience,
                factor=self.factor,
                min_lr=self.min_lr,
                verbose=True
            )
    def __call__(self, val_loss):
        self.lr_scheduler.step(val_loss)


In [None]:
# Objective Function for training and testing dataset to be used during hyperparameter optimization using optuna
def objective(trial):
    loss_list=[]
    val_loss_list=[]

    # Generate the model.
    model = define_model(trial).to(device)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning

    LR = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    
    optimizer1 = getattr(optim, optimizer_name)(model.parameters(), lr=LR)
    criterion = CutMixCriterion(reduction='mean')
    lr_scheduler = LRScheduler(optimizer1)
   
    for epoch in range(15):
        model.train()
        # progress_bar = tqdm(train_loader, total=len(train_loader))
        for i, (imgs, labels) in enumerate(train_loader):
            # using GPU
            imgs = imgs.to(device)
            # forward pass
            flattened_imgs = imgs.flatten(start_dim=1)
            preds = model(flattened_imgs)
            l1,l2,l3 = labels
            labels = (l1.to(device),l2.to(device),l3)
            # computing error
            loss = criterion(preds, labels)
            loss_list.append(loss.item())

            # removing accumulated gradients
            optimizer.zero_grad()
        
            # backprogating error to compute gradients
            loss.backward()
        
            # updating arameters
            optimizer.step()
        

        # Validation of the model.
        model.train()

        n_correct = 0
        with torch.no_grad():
            
            for i, (imgs, labels) in enumerate(valid_loader): 
                #everything needs to be on the same device
                imgs = imgs.to(device)
                labels = labels.to(device)
                criterion = nn.CrossEntropyLoss(reduction='mean')
                # forward pass
                flattened_imgs = imgs.flatten(start_dim=1)
                preds = model(flattened_imgs)

                loss = criterion(preds, labels)
                val_loss_list.append(loss.item())
                lr_scheduler(loss.item())
                pred_labels = torch.argmax(preds, dim=-1)
                cur_correct = len(torch.where(pred_labels == labels)[0])
                n_correct = n_correct + cur_correct

        accuracy = n_correct / len(valid_dataset)
        # print(f"Test accuracy: {round(accuracy,2)}%")
        trial.report(accuracy, epoch)
        criterion = CutMixCriterion(reduction='mean')

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    return accuracy

In [None]:
# For optuna trials, we need to pass the parameters in such a way that these parameters maximizes the probability of getting good values.
# We tested several values like n_layers between 1 to 5 and we found the optimal number should be somewhere around 1 to 3 as more layer make this this layer prone to overfitting as we don't have enough samples to 
# train as raw MLP (as opposed to a CNN which can extract image related features from the training dataset)

CLASSES = 196
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 2)
    layers = []

    in_features = 224 * 224 * 3
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 300,400)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    # layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=5)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

In [None]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
print("  Accuracy Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
optuna.visualization.plot_contour(study, params=['optimizer','lr','n_layers', 'n_units_l1'])

In [None]:
optuna.visualization.plot_edf(study)

In [None]:
optuna.visualization.plot_intermediate_values(study)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
optuna.visualization.plot_pareto_front(study)