## Get Data

In [1]:
from IPython.display import clear_output
from google.colab import files
files.upload()

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

!kaggle datasets download -d jackstapleton/petfinder-pf-nc-ua-all-dataset

!mkdir ~/.data
!unzip -q petfinder-pf-nc-ua-all-dataset.zip -d /.data

clear_output()

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

clear_output()

## Library Imports

In [1]:
import os
import gc
import re
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
from torchvision import models, transforms

from time import time
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error
# from sklearn.preprocessing import StandardScaler

## Constants and Utilities

In [2]:
SEED = 49
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
PATH = "/.data"

verbose = False
DEBUG = False

# sc_y = StandardScaler()

In [3]:
def breaker(num=50, char="*") -> None:
    print("\n" + num*char + "\n")


def get_targets() -> np.ndarray:
    df = pd.read_csv("/content/gdrive/My Drive/train.csv", engine="python")
    targets = df["Pawpularity"].copy().values
    classify_targets = targets.copy() 
    regress_targets  = targets.copy()
    for i in range(10):
        classify_targets[np.logical_and(targets >= (i*10)+1, targets <= (i+1) * 10)] = i
    regress_targets = regress_targets % 10
    return targets, classify_targets, regress_targets


def show_graphs(L: list, title=None) -> None:
    TL, VL = [], []
    for i in range(len(L)):
        TL.append(L[i]["train"])
        VL.append(L[i]["valid"])
    x_Axis = np.arange(1, len(L) + 1)
    plt.figure()
    plt.plot(x_Axis, TL, "r", label="train")
    plt.plot(x_Axis, VL, "b", label="valid")
    plt.grid()
    plt.legend()
    if title:
        plt.title("{} Loss".format(title))
    else:
        plt.title("Loss")
    plt.show()

## Dataset Template and BuildDataloader

In [4]:
class DS(Dataset):
    def __init__(self, features=None, classify_targets=None, regress_targets=None):
        self.features = features
        self.classify_targets = classify_targets
        self.regress_targets = regress_targets
        
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.features[idx]), torch.LongTensor(self.classify_targets[idx]), torch.FloatTensor(self.regress_targets[idx])
    

def build_dataloaders(tr_features: np.ndarray, va_features: np.ndarray,
                      tr_classify_targets: np.ndarray, va_classify_targets: np.ndarray,
                      tr_regress_targets: np.ndarray, va_regress_targets: np.ndarray,
                      batch_size: int, seed: int):

    if verbose:
        breaker()
        print("Building Train and Validation DataLoaders ...")
    
    tr_data_setup = DS(features=tr_features, classify_targets=tr_classify_targets, regress_targets=tr_regress_targets)
    va_data_setup = DS(features=va_features, classify_targets=va_classify_targets, regress_targets=va_regress_targets)
    
    dataloaders = {
        "train" : DL(tr_data_setup, batch_size=batch_size, shuffle=True, generator=torch.manual_seed(seed)),
        "valid" : DL(va_data_setup, batch_size=batch_size, shuffle=False)
    }
    
    return dataloaders

## Build Model

In [5]:
def build_model(IL: int, seed: int):
    class ANN(nn.Module):
        def __init__(self, IL=None):
            super(ANN, self).__init__()

            self.classifier = nn.Sequential()
            self.classifier.add_module("BN", nn.BatchNorm1d(num_features=IL, eps=1e-5))
            self.classifier.add_module("FC", WN(nn.Linear(in_features=IL, out_features=10)))
            self.classifier.add_module("AN", nn.LogSoftmax(dim=1))

            self.regressor = nn.Sequential()
            self.regressor.add_module("BN", nn.BatchNorm1d(num_features=IL, eps=1e-5))
            self.regressor.add_module("FC", WN(nn.Linear(in_features=IL, out_features=1)))

        def get_optimizer(self, lr=1e-3, wd=0):
            params = [p for p in self.parameters() if p.requires_grad]
            return optim.Adam(params, lr=lr, weight_decay=wd)

        def get_plateau_scheduler(self, optimizer=None, patience=5, eps=1e-8):
            return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=patience, eps=eps, verbose=True)

        def forward(self, x1, x2=None):
            if x2 is not None:
                return self.classifier(x1), self.regressor(x1), self.classifier(x2), self.regressor(x2)
            else:
                return self.classifier(x1), self.regressor(x1)
    
    if verbose:
        breaker()
        print("Building Model ...")
        print("\n{} -> 1".format(IL))
    
    torch.manual_seed(seed)
    model = ANN(IL=IL)
    
    return model

## Fit and Predict Helpers

In [6]:
def fit(model=None, optimizer=None, scheduler=None, 
        epochs=None, early_stopping_patience=None,
        dataloaders=None, fold=None, verbose=False) -> tuple:
    
    name = "./Fold_{}_state.pt".format(fold)
    
    if verbose:
        breaker()
        print("Training Fold {}...".format(fold))
        breaker()
    else:
        print("Training Fold {}...".format(fold))

    Losses = []
    bestLoss = {"train" : np.inf, "valid" : np.inf}

    start_time = time()
    for e in range(epochs):
        e_st = time()
        epochLoss = {"train" : np.inf, "valid" : np.inf}

        for phase in ["train", "valid"]:
            if phase == "train":
                model.train()
            else:
                model.eval()
            
            lossPerPass = []

            for X, y1, y2 in dataloaders[phase]:
                X, y1, y2 = X.to(DEVICE), y1.to(DEVICE).view(-1), y2.to(DEVICE)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    classify_output, regress_output = model(X)
                    loss = torch.nn.NLLLoss()(classify_output, y1) + torch.nn.MSELoss()(regress_output, y2)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                lossPerPass.append(loss.item())
            epochLoss[phase] = np.mean(np.array(lossPerPass))
        Losses.append(epochLoss)

        if early_stopping_patience:
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                BLE = e + 1
                torch.save({"model_state_dict": model.state_dict(),
                            "optim_state_dict": optimizer.state_dict()},
                           name)
                early_stopping_step = 0
            else:
                early_stopping_step += 1
                if early_stopping_step > early_stopping_patience:
                    if verbose:
                        print("\nEarly Stopping at Epoch {}".format(e))
                    break
        
        if epochLoss["valid"] < bestLoss["valid"]:
            bestLoss = epochLoss
            BLE = e + 1
            torch.save({"model_state_dict": model.state_dict(),
                        "optim_state_dict": optimizer.state_dict()},
                       name)
        
        if scheduler:
            scheduler.step(epochLoss["valid"])
        
        if verbose:
            print("Epoch: {} | Train Loss: {:.5f} | Valid Loss: {:.5f} | Time: {:.2f} seconds".format(e+1, epochLoss["train"], epochLoss["valid"], time()-e_st))
    
    if verbose:
        breaker()
        print("Best Validation Loss at Epoch {}".format(BLE))
        breaker()
        print("Time Taken [{} Epochs] : {:.2f} minutes".format(len(Losses), (time()-start_time)/60))
        breaker()
        print("Training Completed")
        breaker()

    return Losses, BLE, name

#####################################################################################################

def predict_batch(model=None, dataloader=None, mode="test", path=None) -> np.ndarray:    
    model.load_state_dict(torch.load(path, map_location=DEVICE)["model_state_dict"])
    model.to(DEVICE)
    model.eval()

    y_pred_classify = torch.zeros(1, 1).to(DEVICE)
    y_pred_regress  = torch.zeros(1, 1).to(DEVICE)

    if re.match(r"valid", mode, re.IGNORECASE):
        for X, _, _ in dataloader:
            X = X.to(DEVICE)
            with torch.no_grad():
                classify_output, regress_output = model(X)
            classify_output = torch.argmax(classify_output, dim=1)
            y_pred_classify = torch.cat((y_pred_classify, classify_output.view(-1, 1)), dim=0)
            y_pred_regress = torch.cat((y_pred_regress, regress_output.view(-1, 1)), dim=0)

    elif re.match(r"test", mode, re.IGNORECASE):
        for X in dataloader:
            X = X.to(DEVICE)
            with torch.no_grad():
                classify_output, regress_output = model(X)
            classify_output = torch.argmax(classify_output, dim=1)
            y_pred_classify = torch.cat((y_pred_classify, classify_output.view(-1, 1)), dim=0)
            y_pred_regress = torch.cat((y_pred_regress, regress_output.view(-1, 1)), dim=0)
    
    y_pred_classify = y_pred_classify[1:].detach().cpu().numpy()
    y_pred_regress = y_pred_regress[1:].detach().cpu().numpy()

    y_pred_classify = y_pred_classify * 10
    y_pred = y_pred_classify + y_pred_regress
    
    return y_pred

## Train

In [7]:
def train(features: np.ndarray, targets: np.ndarray,
          classify_targets: np.ndarray, regress_targets: np.ndarray,
          n_splits: int, batch_size: int, lr: float, wd: float, 
          epochs: int, early_stopping: int, 
          patience=None, eps=None) -> list:        
    
    metrics = []
        
    KFold_start_time = time()
    breaker()
    print("Performing {} Fold CV ...".format(n_splits))
    breaker()
    fold = 1
    for tr_idx, va_idx in StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(features, classify_targets):

        tr_features, va_features = features[tr_idx], features[va_idx]
        tr_targets, va_targets = targets[tr_idx], targets[va_idx]
        tr_classify_targets, va_classify_targets = classify_targets[tr_idx], classify_targets[va_idx]
        tr_regress_targets, va_regress_targets = regress_targets[tr_idx], regress_targets[va_idx]

        tr_classify_targets, va_classify_targets = tr_classify_targets.reshape(-1, 1), va_classify_targets.reshape(-1, 1)
        tr_regress_targets, va_regress_targets = tr_regress_targets.reshape(-1, 1), va_regress_targets.reshape(-1, 1)

        dataloaders = build_dataloaders(tr_features, va_features,
                                        tr_classify_targets, va_classify_targets, 
                                        tr_regress_targets, va_regress_targets, 
                                        batch_size, SEED)
        model = build_model(IL=tr_features.shape[1], seed=SEED).to(DEVICE)
        optimizer = model.get_optimizer(lr=lr, wd=wd)
        scheduler = None
        if isinstance(patience, int) and isinstance(eps, float):
            scheduler = model.get_plateau_scheduler(optimizer, patience, eps)

        L, _, name = fit(model=model, optimizer=optimizer, scheduler=scheduler, 
                         epochs=epochs, early_stopping_patience=early_stopping,
                         dataloaders=dataloaders, fold=fold, verbose=verbose)
        y_pred = predict_batch(model=model, dataloader=dataloaders["valid"], mode="valid", path=name)
        RMSE = np.sqrt(mean_squared_error(y_pred, va_targets))
        if verbose:
            print("Validation RMSE [Fold {}]: {:.5f}".format(fold, RMSE))
            breaker()
            show_graphs(L)
        
        metrics_dict = {"Fold" : fold, "RMSE" : RMSE}
        metrics.append(metrics_dict)
        
        fold += 1

    breaker()
    print("Total Time to {} Fold CV : {:.2f} minutes".format(n_splits, (time() - KFold_start_time)/60))

    return metrics, (time() - KFold_start_time)/60

## Main


In [8]:
def main():
    breaker()
    print("Clean Memory , {} Objects Collected ...".format(gc.collect()))

    ########### Params ###########
    
    if DEBUG:
        n_splits = 3
        patience, eps = 5, 1e-8
        epochs, early_stopping = 5, 5

        batch_size = 128
        lr = 1e-3
        wd = 1e-1
    else:
        n_splits = 10
        patience, eps = 5, 1e-8
        epochs, early_stopping = 100, 8

        batch_size = 128
        lr = 1e-3
        wd = 1e-1
    
    ##############################

    if verbose:
        breaker()
        print("Loading Data ...")
    
    features = np.load(os.path.join(PATH, "densenet169_features.npy"))
    targets, classify_targets, regress_targets = get_targets()

    # Without Scheduler
    metrics, _ = train(features, targets, classify_targets, regress_targets, n_splits, batch_size, lr, wd, epochs, early_stopping, patience=None, eps=None)

    # With Scheduler
    # train(images, targets, n_splits, batch_size, lr, wd, epochs, early_stopping, pretrained_ann_path, patience=patience, eps=eps)

    if verbose:
        pass
    else:
        breaker()
    rmse = []
    for i in range(len(metrics)):
        print("Fold {}, RMSE {:.5f}".format(metrics[i]["Fold"], metrics[i]["RMSE"]))
        rmse.append(metrics[i]["RMSE"])
    breaker()

    best_index = rmse.index(min(rmse))

    print("Best RMSE : {:.5f}".format(metrics[best_index]["RMSE"]))
    print("Avg RMSE  : {:.5f}".format(sum(rmse) / len(rmse)))

    with open("metrics.pkl", "wb") as fp:
        pickle.dump(metrics, fp)

    breaker()

In [9]:
main()


**************************************************

Clean Memory , 255 Objects Collected ...

**************************************************

Performing 10 Fold CV ...

**************************************************

Training Fold 1...
Training Fold 2...
Training Fold 3...
Training Fold 4...
Training Fold 5...
Training Fold 6...
Training Fold 7...
Training Fold 8...
Training Fold 9...
Training Fold 10...

**************************************************

Total Time to 10 Fold CV : 2.54 minutes

**************************************************

Fold 1, RMSE 23.69315
Fold 2, RMSE 22.04580
Fold 3, RMSE 23.46380
Fold 4, RMSE 24.57623
Fold 5, RMSE 22.85770
Fold 6, RMSE 22.71320
Fold 7, RMSE 24.42093
Fold 8, RMSE 22.53111
Fold 9, RMSE 23.82031
Fold 10, RMSE 24.42035

**************************************************

Best RMSE : 22.04580
Avg RMSE  : 23.45426

**************************************************

