## Library Imports

In [1]:
from time import time
notebook_start_time = time()

In [2]:
import os
import re
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
from torchvision import models, transforms

from time import time
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore")

## Constants and Utilities

In [3]:
SEED = 49
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

FEATURES_PATH = "../input/petfinder-pf-cc-ua-all-dataset"
TARGETS_PATH  = "../input/petfinder-pawpularity-score"

verbose = False
DEBUG = False

sc_y = StandardScaler()

In [4]:
def breaker(num=50, char="*") -> None:
    print("\n" + num*char + "\n")


def head(x, no_of_ele=5) -> None:
    print(x[:no_of_ele])


def get_targets(path: str) -> np.ndarray:
    df = pd.read_csv(os.path.join(path, "train.csv"), engine="python")
    targets  = df.iloc[:, -1].copy().values
    return targets


def show_graphs(L: list) -> None:
    TL, VL = [], []
    for i in range(len(L)):
        TL.append(L[i]["train"])
        VL.append(L[i]["valid"])    
    
    x_Axis = np.arange(1, len(L) + 1)
    plt.figure()
    plt.plot(x_Axis, TL, "r", label="train")
    plt.plot(x_Axis, VL, "b", label="valid")
    plt.grid()
    plt.legend()
    plt.title("MSE Loss")
    plt.show()

## Dataset Template

In [5]:
class DS(Dataset):
    def __init__(self, features=None, targets=None):
        self.features = features
        self.targets  = targets
        
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.features[idx]), torch.FloatTensor(self.targets[idx])

## Build Dataloaders

In [6]:
def build_dataloaders(tr_features: np.ndarray, va_features: np.ndarray,
                      tr_targets: np.ndarray, va_targets: np.ndarray,
                      batch_size: int, seed: int):

    if verbose:
        breaker()
        print("Building Train and Validation DataLoaders ...")
    
    tr_data_setup = DS(features=tr_features, targets=tr_targets)
    va_data_setup = DS(features=va_features, targets=va_targets)
    
    dataloaders = {
        "train" : DL(tr_data_setup, batch_size=batch_size, shuffle=True, generator=torch.manual_seed(seed)),
        "valid" : DL(va_data_setup, batch_size=batch_size, shuffle=False)
    }
    
    return dataloaders

# Build Model

In [7]:
def build_model(IL: int, seed: int):
    class ANN(nn.Module):
        def __init__(self, IL=None):
            super(ANN, self).__init__()

            self.predictor = nn.Sequential()
            self.predictor.add_module("BN", nn.BatchNorm1d(num_features=IL, eps=1e-5))
            self.predictor.add_module("FC", WN(nn.Linear(in_features=IL, out_features=1)))

        def get_optimizer(self, lr=1e-3, wd=0):
            params = [p for p in self.parameters() if p.requires_grad]
            return optim.Adam(params, lr=lr, weight_decay=wd)

        def get_plateau_scheduler(self, optimizer=None, patience=5, eps=1e-8):
            return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=patience, eps=eps, verbose=True)

        def forward(self, x):
            return self.predictor(x)
    
    if verbose:
        breaker()
        print("Building Model ...")
        print("\n{} -> 1".format(IL))
    
    torch.manual_seed(seed)
    model = ANN(IL=IL)
    
    return model

In [8]:
def fit(model=None, optimizer=None, scheduler=None, 
        epochs=None, early_stopping_patience=None,
        dataloaders=None, model_name=None, fold=None, 
        verbose=False) -> tuple:
    
    name = "./{}_Fold_{}_state.pt".format(model_name, fold)
    
    if verbose:
        breaker()
        print("Training Fold {}...".format(fold))
        breaker()
    # else:
        # print("Training Fold {}...".format(fold))


    Losses = []
    bestLoss = {"train" : np.inf, "valid" : np.inf}

    model.to(DEVICE)
    start_time = time()
    for e in range(epochs):
        e_st = time()
        epochLoss = {"train" : np.inf, "valid" : np.inf}

        for phase in ["train", "valid"]:
            if phase == "train":
                model.train()
            else:
                model.eval()
            
            lossPerPass = []

            for X, y in dataloaders[phase]:
                X, y = X.to(DEVICE), y.to(DEVICE)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    output = model(X)
                    loss = torch.nn.MSELoss()(output, y)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                lossPerPass.append(loss.item())
            epochLoss[phase] = np.mean(np.array(lossPerPass))
        Losses.append(epochLoss)

        if early_stopping_patience:
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                BLE = e + 1
                torch.save({"model_state_dict": model.state_dict(),
                            "optim_state_dict": optimizer.state_dict()},
                           name)
                early_stopping_step = 0
            else:
                early_stopping_step += 1
                if early_stopping_step > early_stopping_patience:
                    if verbose:
                        print("\nEarly Stopping at Epoch {}".format(e))
                    break
        
        if epochLoss["valid"] < bestLoss["valid"]:
            bestLoss = epochLoss
            BLE = e + 1
            torch.save({"model_state_dict": model.state_dict(),
                        "optim_state_dict": optimizer.state_dict()},
                       name)
        
        if scheduler:
            scheduler.step(epochLoss["valid"])

        if verbose:
            print("Epoch: {} | Train Loss: {:.5f} | Valid Loss: {:.5f} | Time: {:.2f} seconds".format(e+1, epochLoss["train"], epochLoss["valid"], time()-e_st))
    
    if verbose:
        breaker()
        print("Best Validation Loss at Epoch {}".format(BLE))
        breaker()
        print("Time Taken [{} Epochs] : {:.2f} minutes".format(len(Losses), (time()-start_time)/60))
        breaker()
        print("Training Completed")
        breaker()

    return Losses, BLE, name

#####################################################################################################

def predict_batch(model=None, dataloader=None, mode="test", path=None) -> np.ndarray:
    model.load_state_dict(torch.load(path, map_location=DEVICE)["model_state_dict"])
    model.to(DEVICE)
    model.eval()

    y_pred = torch.zeros(1, 1).to(DEVICE)
    if re.match(r"valid", mode, re.IGNORECASE):
        for X, _ in dataloader:
            X = X.to(DEVICE)
            with torch.no_grad():
                output = model(X)
            y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
    elif re.match(r"test", mode, re.IGNORECASE):
        for X in dataloader:
            X = X.to(DEVICE)
            with torch.no_grad():
                output = model(X)
            y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
    
    return y_pred[1:].detach().cpu().numpy()

In [9]:
def train(features: np.ndarray, targets: np.ndarray,
          n_splits: int, batch_size: int, lr: float, wd: float, 
          epochs: int, early_stopping: int,
          model_name: str, 
          patience=None, eps=None) -> list:        
    
    metrics = []
        
    KFold_start_time = time()
    if verbose:
        breaker()
        print("\t\t{}".format(model_name))
        breaker()
        print("Performing {} Fold CV ...".format(n_splits))
    fold = 1
    for tr_idx, va_idx in KFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(features):

        tr_features, va_features = features[tr_idx], features[va_idx]
        tr_targets, va_targets   = targets[tr_idx], targets[va_idx]

        tr_targets, va_targets   = tr_targets.reshape(-1, 1), va_targets.reshape(-1, 1)

        tr_targets = sc_y.fit_transform(tr_targets)
        va_targets = sc_y.transform(va_targets)

        dataloaders = build_dataloaders(tr_features, va_features, 
                                         tr_targets, va_targets, 
                                         batch_size, SEED)
        model = build_model(IL=tr_features.shape[1], seed=SEED)
        
        optimizer = model.get_optimizer(lr=lr, wd=wd)
        scheduler = None
        if isinstance(patience, int) and isinstance(eps, float):
            scheduler = model.get_plateau_scheduler(optimizer, patience, eps)
        
        L, _, name = fit(model=model, optimizer=optimizer, scheduler=scheduler, 
                         epochs=epochs, early_stopping_patience=early_stopping,
                         dataloaders=dataloaders, model_name=model_name, fold=fold, verbose=False)
        y_pred = predict_batch(model=model, dataloader=dataloaders["valid"], mode="valid", path=name)
        RMSE = np.sqrt(mean_squared_error(sc_y.inverse_transform(y_pred), sc_y.inverse_transform(va_targets)))
        if verbose:
            print("\nValidation RMSE [Fold {}]: {:.5f}".format(fold, RMSE))
            breaker()
            show_graphs(L)
        
        metrics_dict = {"Model" : model_name, "Fold" : fold, "RMSE" : RMSE}
        metrics.append(metrics_dict)
        
        fold += 1
    
    if verbose:
        breaker()
        print("Total Time to {} Fold CV : {:.2f} minutes".format(n_splits, (time() - KFold_start_time)/60))
    
    return metrics, (time() - KFold_start_time)/60

## Main

In [10]:
def main():
    
    ########### Params ###########
    
    if DEBUG:
        n_splits = 3
        batch_size = 512
        lr, wd = 1e-3, 1e-1
        patience, eps = 5, 1e-8
        epochs, early_stopping = 5, 5
        MODEL_NAMES = ["resnet18", "resnet34"]
    else:
        n_splits = 10
        batch_size = 32
        lr, wd = 5e-4, 1e-1
        patience, eps = 5, 1e-8
        epochs, early_stopping = 100, 8
        MODEL_NAMES  = ["resnet18", "resnet34", "resnet50", "resnet101", "resnet152",
                        "resnext50", "resnext101",
                        "wresnet50", "wresnet101",
                        "vgg11", "vgg13", "vgg16", "vgg19",
                        "densenet121", "densenet161", "densenet169", "densenet201",
                        "mobilenet"]
    
    ##############################

    complete_metrics = []
    
    if verbose:
        breaker()
        print("Loading Data ...")
    else:
        breaker()
    
    
    for model_name in MODEL_NAMES:

        features = np.load(os.path.join(FEATURES_PATH, "{}_features.npy".format(model_name)))
        targets = get_targets(TARGETS_PATH)

        # Without Scheduler
        metrics, time_taken = train(features, targets, n_splits, batch_size, lr, wd, epochs, early_stopping, model_name=model_name, patience=None, eps=None)
        
        # # With Plateau Scheduler
        # metrics = train(features, targets, n_splits, batch_size, lr, wd, epochs, early_stopping, model_name=model_name, patience=patience, eps=eps)    

        complete_metrics.append(metrics)

        if not verbose:
            print("{} Features -> {:.2f} minutes".format(model_name.capitalize(), time_taken))

    if verbose:
        breaker()
        for i in range(len(complete_metrics)):
            for j in range(len(complete_metrics[i])):
                print(complete_metrics[i][j])

    with open("./complete_metrics.pkl", "wb") as fp:
        pickle.dump(complete_metrics, fp)
    
    breaker()

In [11]:
main()


**************************************************

Resnet18 Features -> 2.56 minutes
Resnet34 Features -> 2.46 minutes
Resnet50 Features -> 2.62 minutes
Resnet101 Features -> 2.56 minutes
Resnet152 Features -> 2.55 minutes
Resnext50 Features -> 2.71 minutes
Resnext101 Features -> 2.61 minutes
Wresnet50 Features -> 2.71 minutes
Wresnet101 Features -> 2.59 minutes
Vgg11 Features -> 2.83 minutes
Vgg13 Features -> 2.70 minutes
Vgg16 Features -> 2.83 minutes
Vgg19 Features -> 2.71 minutes
Densenet121 Features -> 2.48 minutes
Densenet161 Features -> 2.41 minutes
Densenet169 Features -> 2.33 minutes
Densenet201 Features -> 2.53 minutes
Mobilenet Features -> 2.49 minutes

**************************************************



In [12]:
if not verbose:
    with open("./complete_metrics.pkl", "rb") as fp:
            params = pickle.load(fp)

    rmse = []
    for i in range(len(params)):
        for j in range(len(params[i])):
            rmse.append(params[i][j]["RMSE"])

    best_index = rmse.index(min(rmse))
    if DEBUG:
        best_index_1 = best_index // 3
        best_index_2 = best_index % 3
    else:
        best_index_1 = best_index // 10
        best_index_2 = best_index % 10

    breaker()
    print("Best --> {}".format(params[best_index_1][best_index_2]))
    breaker()


**************************************************

Best --> {'Model': 'densenet201', 'Fold': 8, 'RMSE': 16.733944546854936}

**************************************************



In [13]:
breaker()
print("Notebook Runtime : {:.2f} minutes".format((time()-notebook_start_time)/60))
breaker()


**************************************************

Notebook Runtime : 47.43 minutes

**************************************************

