## Get Data

In [1]:
from IPython.display import clear_output
from google.colab import files
files.upload()

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

!kaggle datasets download -d jackstapleton/petfinder-pf-cc-ua-all-dataset
!kaggle datasets download -d jackstapleton/petfinder-pf-d169-nc-a-dataset

!mkdir ~/.data_ua
!mkdir ~/.data_a

!unzip -q petfinder-pf-cc-ua-all-dataset.zip -d /.data_ua
!unzip -q petfinder-pf-d169-nc-a-dataset.zip -d /.data_a

clear_output()

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [1]:
import os
import re
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
from torchvision import models, transforms

from time import time
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
SEED = 49
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
PATH_UA = "/.data_ua"
PATH_A = "/.data_a"

verbose = False
DEBUG = False

names = sorted(os.listdir(PATH_A))[:5]

sc_y = StandardScaler()

In [3]:
def breaker(num=50, char="*") -> None:
    print("\n" + num*char + "\n")


def get_targets() -> np.ndarray:
    df = pd.read_csv("/content/gdrive/My Drive/train.csv", engine="python")
    targets = df["Pawpularity"].copy().values
    return targets.reshape(-1, 1)


def show_graphs(L: list, title=None) -> None:
    TL, VL = [], []
    for i in range(len(L)):
        TL.append(L[i]["train"])
        VL.append(L[i]["valid"])
    x_Axis = np.arange(1, len(L) + 1)
    plt.figure()
    plt.plot(x_Axis, TL, "r", label="train")
    plt.plot(x_Axis, VL, "b", label="valid")
    plt.grid()
    plt.legend()
    if title:
        plt.title("{} Loss".format(title))
    else:
        plt.title("Loss")
    plt.show()

## Independent and Sum Losses

In [4]:
def do_independent(features_1: np.ndarray, features_2: np.ndarray, features_3: np.ndarray, 
                   features_4: np.ndarray, features_5: np.ndarray, features_6: np.ndarray,
                   targets: np.ndarray):

    class DS(Dataset):
        def __init__(self, features_1, features_2, features_3, features_4, features_5, features_6, targets):
            self.features_1 = features_1
            self.features_2 = features_2
            self.features_3 = features_3
            self.features_4 = features_4
            self.features_5 = features_5
            self.features_6 = features_6
            self.targets = targets

        def __len__(self):
            return self.features_1.shape[0]
        
        def __getitem__(self, idx):
            return torch.FloatTensor(self.features_1[idx]), \
                torch.FloatTensor(self.features_2[idx]), \
                torch.FloatTensor(self.features_3[idx]), \
                torch.FloatTensor(self.features_4[idx]), \
                torch.FloatTensor(self.features_5[idx]), \
                torch.FloatTensor(self.features_6[idx]), \
                torch.FloatTensor(self.targets[idx])


    def build_dataloaders(tr_features_1: np.ndarray, tr_features_2: np.ndarray, tr_features_3: np.ndarray, 
                          tr_features_4: np.ndarray, tr_features_5: np.ndarray, tr_features_6: np.ndarray,
                          va_features_1: np.ndarray, va_features_2: np.ndarray, va_features_3: np.ndarray, 
                          va_features_4: np.ndarray, va_features_5: np.ndarray, va_features_6: np.ndarray,
                          tr_targets: np.ndarray, va_targets: np.ndarray, batch_size: int, seed: int):

        if verbose:
            breaker()
            print("Building Train and Valid Dataloaders ...")

        tr_data_setup = DS(tr_features_1, tr_features_2, tr_features_3, tr_features_4, tr_features_5, tr_features_6, tr_targets)
        va_data_setup = DS(va_features_1, va_features_2, va_features_3, va_features_4, va_features_5, va_features_6, va_targets)

        dataloaders = {
            "train" : DL(tr_data_setup, batch_size=batch_size, shuffle=True, generator=torch.manual_seed(seed)),
            "valid" : DL(va_data_setup, batch_size=batch_size, shuffle=False)
        }

        return dataloaders


    def build_model(IL: int, seed: int):
        class ANN(nn.Module):
            def __init__(self, IL=None):
                super(ANN, self).__init__()

                self.predictor = nn.Sequential()
                self.predictor.add_module("BN", nn.BatchNorm1d(num_features=IL, eps=1e-5))
                self.predictor.add_module("FC", WN(nn.Linear(in_features=IL, out_features=1)))

            def get_optimizer(self, lr=1e-3, wd=0):
                params = [p for p in self.parameters() if p.requires_grad]
                return optim.Adam(params, lr=lr, weight_decay=wd)

            def get_plateau_scheduler(self, optimizer=None, patience=5, eps=1e-8):
                return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=patience, eps=eps, verbose=True)

            def forward(self, x1, x2=None, x3=None, x4=None, x5=None, x6=None):
                if x2 is not None and x3 is not None and x4 is not None and x5 is not None and x6 is not None:
                    return self.predictor(x1), self.predictor(x2), self.predictor(x3), self.predictor(x4), self.predictor(x5), self.predictor(x6)
                else:
                    return self.predictor(x1)
        
        if verbose:
            breaker()
            print("Building Model ...")
            print("\n{} -> 1".format(IL))
        
        torch.manual_seed(seed)
        model = ANN(IL=IL)
        
        return model


    def fit(model=None, optimizer=None, scheduler=None, 
            epochs=None, early_stopping_patience=None,
            dataloaders=None, fold=None, lr=None, wd=None, verbose=False) -> tuple:
        
        name = "./Independent_Fold_{}_state.pt".format(lr, wd, fold)
        
        if verbose:
            breaker()
            print("Training Fold {}...".format(fold))
            breaker()
        else:
            print("Training Fold {}...".format(fold))

        Losses = []
        bestLoss = {"train" : np.inf, "valid" : np.inf}

        start_time = time()
        for e in range(epochs):
            e_st = time()
            epochLoss = {"train" : np.inf, "valid" : np.inf}

            for phase in ["train", "valid"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()
                
                lossPerPass = []

                for X1, X2, X3, X4, X5, X6, y in dataloaders[phase]:
                    X1, X2, X3, X4, X5, X6, y = X1.to(DEVICE), X2.to(DEVICE), X3.to(DEVICE), \
                                                X4.to(DEVICE), X5.to(DEVICE), X6.to(DEVICE), \
                                                y.to(DEVICE)

                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output_1, output_2, output_3, output_4, output_5, output_6 = model(X1, X2, X3, X4, X5, X6)
                        loss = torch.nn.MSELoss()(output_1, y) + torch.nn.MSELoss()(output_2, y) + torch.nn.MSELoss()(output_3, y) + \
                               torch.nn.MSELoss()(output_4, y) + torch.nn.MSELoss()(output_5, y) + torch.nn.MSELoss()(output_6, y)
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass.append(loss.item())
                epochLoss[phase] = np.mean(np.array(lossPerPass))
            Losses.append(epochLoss)

            if early_stopping_patience:
                if epochLoss["valid"] < bestLoss["valid"]:
                    bestLoss = epochLoss
                    BLE = e + 1
                    torch.save({"model_state_dict": model.state_dict(),
                                "optim_state_dict": optimizer.state_dict()},
                            name)
                    early_stopping_step = 0
                else:
                    early_stopping_step += 1
                    if early_stopping_step > early_stopping_patience:
                        if verbose:
                            print("\nEarly Stopping at Epoch {}".format(e))
                        break
            
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                BLE = e + 1
                torch.save({"model_state_dict": model.state_dict(),
                            "optim_state_dict": optimizer.state_dict()},
                        name)
            
            if scheduler:
                scheduler.step(epochLoss["valid"])
            
            if verbose:
                print("Epoch: {} | Train Loss: {:.5f} | Valid Loss: {:.5f} | Time: {:.2f} seconds".format(e+1, epochLoss["train"], epochLoss["valid"], time()-e_st))
        
        if verbose:
            breaker()
            print("Best Validation Loss at Epoch {}".format(BLE))
            breaker()
            print("Time Taken [{} Epochs] : {:.2f} minutes".format(len(Losses), (time()-start_time)/60))
            breaker()
            print("Training Completed")
            breaker()

        return Losses, BLE, name

    #####################################################################################################

    def predict_batch(model=None, dataloader=None, mode="test", path=None) -> np.ndarray:    
        model.load_state_dict(torch.load(path, map_location=DEVICE)["model_state_dict"])
        model.to(DEVICE)
        model.eval()

        y_pred = torch.zeros(1, 1).to(DEVICE)
        if re.match(r"valid", mode, re.IGNORECASE):
            for X, _, _, _, _, _, _ in dataloader:
                X = X.to(DEVICE)
                with torch.no_grad():
                    output = model(X)
                y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
        elif re.match(r"test", mode, re.IGNORECASE):
            for X in dataloader:
                X = X.to(DEVICE)
                with torch.no_grad():
                    output = model(X)
                y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
        
        return y_pred[1:].detach().cpu().numpy()


    def train(features_1: np.ndarray, features_2: np.ndarray, features_3: np.ndarray,
              features_4: np.ndarray, features_5: np.ndarray, features_6: np.ndarray,
              targets: np.ndarray, n_splits: int, 
              batch_size: int, lr: float, wd: float, 
              epochs: int, early_stopping: int, 
              patience=None, eps=None):

        metrics = []
            
        KFold_start_time = time()
        if verbose:
            breaker()
            print("Performing {} Fold CV ...".format(n_splits))
        else:
            breaker()
        fold = 1
        for tr_idx, va_idx in KFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(features_1):

            tr_features_1, va_features_1 = features_1[tr_idx], features_1[va_idx]
            tr_features_2, va_features_2 = features_2[tr_idx], features_2[va_idx]
            tr_features_3, va_features_3 = features_3[tr_idx], features_3[va_idx]
            tr_features_4, va_features_4 = features_4[tr_idx], features_4[va_idx]
            tr_features_5, va_features_5 = features_5[tr_idx], features_5[va_idx]
            tr_features_6, va_features_6 = features_6[tr_idx], features_6[va_idx]

            tr_targets, va_targets = targets[tr_idx], targets[va_idx]

            tr_targets = sc_y.fit_transform(tr_targets)
            va_targets = sc_y.transform(va_targets)

            dataloaders = build_dataloaders(tr_features_1, tr_features_2, tr_features_3, tr_features_4, tr_features_5, tr_features_6,
                                            va_features_1, va_features_2, va_features_3, va_features_4, va_features_5, va_features_6,
                                            tr_targets, va_targets, 
                                            batch_size, SEED)
            model = build_model(IL=tr_features_1.shape[1], seed=SEED).to(DEVICE)
            optimizer = model.get_optimizer(lr=lr, wd=wd)
            scheduler = None
            if isinstance(patience, int) and isinstance(eps, float):
                scheduler = model.get_plateau_scheduler(optimizer, patience, eps)

            L, _, name = fit(model=model, optimizer=optimizer, scheduler=scheduler, 
                            epochs=epochs, early_stopping_patience=early_stopping,
                            dataloaders=dataloaders, fold=fold, lr=lr, wd=wd, verbose=verbose)
            y_pred = predict_batch(model=model, dataloader=dataloaders["valid"], mode="valid", path=name)
            RMSE = np.sqrt(mean_squared_error(sc_y.inverse_transform(y_pred), sc_y.inverse_transform(va_targets)))
            if verbose:
                print("Validation RMSE [Fold {}]: {:.5f}".format(fold, RMSE))
                breaker()
                show_graphs(L)
            
            metrics_dict = {"Fold" : fold, "RMSE" : RMSE}
            metrics.append(metrics_dict)
            
            fold += 1
        
        if verbose:
            breaker()
            print("Total Time to {} Fold CV : {:.2f} minutes".format(n_splits, (time() - KFold_start_time)/60))
        
        return metrics, (time() - KFold_start_time)/60
    
    metrics, _ = train(features_1, features_2, features_3, features_4, features_5, features_6, targets, 10, 128, 1e-3, 1e-1, 100, 8)

    if verbose:
        pass
    else:
        breaker()
    rmse = []
    for i in range(len(metrics)):
        print("Fold {}, RMSE {:.5f}".format(metrics[i]["Fold"], metrics[i]["RMSE"]))
        rmse.append(metrics[i]["RMSE"])
    breaker()

    best_index = rmse.index(min(rmse))

    print("Best RMSE : {:.5f}".format(metrics[best_index]["RMSE"]))
    print("Avg RMSE  : {:.5f}".format(sum(rmse) / len(rmse)))

    with open("metrics_1.pkl", "wb") as fp:
        pickle.dump(metrics, fp)

    breaker() 

**Main**

## Concate to Dataset

In [5]:
def do_concat(features: np.ndarray, targets: np.ndarray):

    class DS(Dataset):
        def __init__(self, features, targets):
            self.features = features
            self.targets = targets

        def __len__(self):
            return self.features.shape[0]
        
        def __getitem__(self, idx):
            return torch.FloatTensor(self.features[idx]), torch.FloatTensor(self.targets[idx])


    def build_dataloaders(tr_features: np.ndarray, va_features: np.ndarray,
                          tr_targets: np.ndarray, va_targets: np.ndarray, 
                          batch_size: int, seed: int):

        if verbose:
            breaker()
            print("Building Train and Valid Dataloaders ...")

        tr_data_setup = DS(tr_features, tr_targets)
        va_data_setup = DS(va_features, va_targets)

        dataloaders = {
            "train" : DL(tr_data_setup, batch_size=batch_size, shuffle=True, generator=torch.manual_seed(seed)),
            "valid" : DL(va_data_setup, batch_size=batch_size, shuffle=False)
        }

        return dataloaders


    def build_model(IL: int, seed: int):
        class ANN(nn.Module):
            def __init__(self, IL=None):
                super(ANN, self).__init__()

                self.predictor = nn.Sequential()
                self.predictor.add_module("BN", nn.BatchNorm1d(num_features=IL, eps=1e-5))
                self.predictor.add_module("FC", WN(nn.Linear(in_features=IL, out_features=1)))

            def get_optimizer(self, lr=1e-3, wd=0):
                params = [p for p in self.parameters() if p.requires_grad]
                return optim.Adam(params, lr=lr, weight_decay=wd)

            def get_plateau_scheduler(self, optimizer=None, patience=5, eps=1e-8):
                return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=patience, eps=eps, verbose=True)

            def forward(self, x1, x2=None):
                if x2 is not None:
                    return self.predictor(x1), self.predictor(x2)
                else:
                    return self.predictor(x1)
        
        if verbose:
            breaker()
            print("Building Model ...")
            print("\n{} -> 1".format(IL))
        
        torch.manual_seed(seed)
        model = ANN(IL=IL)
        
        return model


    def fit(model=None, optimizer=None, scheduler=None, 
            epochs=None, early_stopping_patience=None,
            dataloaders=None, fold=None, lr=None, wd=None, verbose=False) -> tuple:
        
        name = "./Concat_Fold_{}_state.pt".format(lr, wd, fold)
        
        if verbose:
            breaker()
            print("Training Fold {}...".format(fold))
            breaker()
        else:
            print("Training Fold {}...".format(fold))

        Losses = []
        bestLoss = {"train" : np.inf, "valid" : np.inf}

        start_time = time()
        for e in range(epochs):
            e_st = time()
            epochLoss = {"train" : np.inf, "valid" : np.inf}

            for phase in ["train", "valid"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()
                
                lossPerPass = []

                for X, y in dataloaders[phase]:
                    X, y = X.to(DEVICE), y.to(DEVICE)

                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output = model(X)
                        loss = torch.nn.MSELoss()(output, y)
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass.append(loss.item())
                epochLoss[phase] = np.mean(np.array(lossPerPass))
            Losses.append(epochLoss)

            if early_stopping_patience:
                if epochLoss["valid"] < bestLoss["valid"]:
                    bestLoss = epochLoss
                    BLE = e + 1
                    torch.save({"model_state_dict": model.state_dict(),
                                "optim_state_dict": optimizer.state_dict()},
                            name)
                    early_stopping_step = 0
                else:
                    early_stopping_step += 1
                    if early_stopping_step > early_stopping_patience:
                        if verbose:
                            print("\nEarly Stopping at Epoch {}".format(e))
                        break
            
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                BLE = e + 1
                torch.save({"model_state_dict": model.state_dict(),
                            "optim_state_dict": optimizer.state_dict()},
                        name)
            
            if scheduler:
                scheduler.step(epochLoss["valid"])
            
            if verbose:
                print("Epoch: {} | Train Loss: {:.5f} | Valid Loss: {:.5f} | Time: {:.2f} seconds".format(e+1, epochLoss["train"], epochLoss["valid"], time()-e_st))
        
        if verbose:
            breaker()
            print("Best Validation Loss at Epoch {}".format(BLE))
            breaker()
            print("Time Taken [{} Epochs] : {:.2f} minutes".format(len(Losses), (time()-start_time)/60))
            breaker()
            print("Training Completed")
            breaker()

        return Losses, BLE, name

    #####################################################################################################

    def predict_batch(model=None, dataloader=None, mode="test", path=None) -> np.ndarray:    
        model.load_state_dict(torch.load(path, map_location=DEVICE)["model_state_dict"])
        model.to(DEVICE)
        model.eval()

        y_pred = torch.zeros(1, 1).to(DEVICE)
        if re.match(r"valid", mode, re.IGNORECASE):
            for X, _ in dataloader:
                X = X.to(DEVICE)
                with torch.no_grad():
                    output = model(X)
                y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
        elif re.match(r"test", mode, re.IGNORECASE):
            for X in dataloader:
                X = X.to(DEVICE)
                with torch.no_grad():
                    output = model(X)
                y_pred = torch.cat((y_pred, output.view(-1, 1)), dim=0)
        
        return y_pred[1:].detach().cpu().numpy()


    def train(features: np.ndarray, targets: np.ndarray, n_splits: int, 
              batch_size: int, lr: float, wd: float, 
              epochs: int, early_stopping: int, 
              patience=None, eps=None):

        metrics = []
            
        KFold_start_time = time()
        if verbose:
            breaker()
            print("Performing {} Fold CV ...".format(n_splits))
        else:
            breaker()
        fold = 1
        for tr_idx, va_idx in KFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(features):

            tr_features, va_features = features[tr_idx], features[va_idx]
            tr_targets, va_targets = targets[tr_idx], targets[va_idx]

            tr_targets = sc_y.fit_transform(tr_targets)
            va_targets = sc_y.transform(va_targets)

            dataloaders = build_dataloaders(tr_features, va_features,
                                            tr_targets, va_targets, 
                                            batch_size, SEED)
            model = build_model(IL=tr_features.shape[1], seed=SEED).to(DEVICE)
            optimizer = model.get_optimizer(lr=lr, wd=wd)
            scheduler = None
            if isinstance(patience, int) and isinstance(eps, float):
                scheduler = model.get_plateau_scheduler(optimizer, patience, eps)

            L, _, name = fit(model=model, optimizer=optimizer, scheduler=scheduler, 
                            epochs=epochs, early_stopping_patience=early_stopping,
                            dataloaders=dataloaders, fold=fold, lr=lr, wd=wd, verbose=verbose)
            y_pred = predict_batch(model=model, dataloader=dataloaders["valid"], mode="valid", path=name)
            RMSE = np.sqrt(mean_squared_error(sc_y.inverse_transform(y_pred), sc_y.inverse_transform(va_targets)))
            if verbose:
                print("Validation RMSE [Fold {}]: {:.5f}".format(fold, RMSE))
                breaker()
                show_graphs(L)
            
            metrics_dict = {"Fold" : fold, "RMSE" : RMSE}
            metrics.append(metrics_dict)
            
            fold += 1
        
        if verbose:
            breaker()
            print("Total Time to {} Fold CV : {:.2f} minutes".format(n_splits, (time() - KFold_start_time)/60))
        
        return metrics, (time() - KFold_start_time)/60
    
    metrics, _ = train(features, targets, 10, 128, 1e-3, 1e-1, 100, 8)

    if verbose:
        pass
    else:
        breaker()
    rmse = []
    for i in range(len(metrics)):
        print("Fold {}, RMSE {:.5f}".format(metrics[i]["Fold"], metrics[i]["RMSE"]))
        rmse.append(metrics[i]["RMSE"])
    breaker()

    best_index = rmse.index(min(rmse))

    print("Best RMSE : {:.5f}".format(metrics[best_index]["RMSE"]))
    print("Avg RMSE  : {:.5f}".format(sum(rmse) / len(rmse)))

    with open("metrics_2.pkl", "wb") as fp:
        pickle.dump(metrics, fp)

    breaker() 

## Main

In [6]:
def main():
    features_1 = np.load(os.path.join(PATH_UA, "densenet169_features.npy"))
    features_2 = np.load(os.path.join(PATH_A, names[0]))
    features_3 = np.load(os.path.join(PATH_A, names[1]))
    features_4 = np.load(os.path.join(PATH_A, names[2]))
    features_5 = np.load(os.path.join(PATH_A, names[3]))
    features_6 = np.load(os.path.join(PATH_A, names[4]))

    features = np.concatenate((features_1, features_2, features_3, features_4, features_5, features_6), axis=0)
    targets = get_targets()

    breaker()
    print("\t --- INDEPENDENT ---")

    do_independent(features_1, features_2, features_3, features_4, features_5, features_6, targets)

    print("\t --- CONCAT ---")
    targets = np.array([targets for _ in range(6)]).reshape(-1).reshape(-1, 1)
    do_concat(features, targets)

In [7]:
main()


**************************************************

	 --- INDEPENDENT ---

**************************************************

Training Fold 1...
Training Fold 2...
Training Fold 3...
Training Fold 4...
Training Fold 5...
Training Fold 6...
Training Fold 7...
Training Fold 8...
Training Fold 9...
Training Fold 10...

**************************************************

Fold 1, RMSE 18.91639
Fold 2, RMSE 18.79880
Fold 3, RMSE 19.02069
Fold 4, RMSE 18.14201
Fold 5, RMSE 19.41679
Fold 6, RMSE 19.05259
Fold 7, RMSE 19.27267
Fold 8, RMSE 17.07658
Fold 9, RMSE 18.60525
Fold 10, RMSE 18.47649

**************************************************

Best RMSE : 17.07658
Avg RMSE  : 18.67783

**************************************************

	 --- CONCAT ---

**************************************************

Training Fold 1...
Training Fold 2...
Training Fold 3...
Training Fold 4...
Training Fold 5...
Training Fold 6...
Training Fold 7...
Training Fold 8...
Training Fold 9...
Training Fold 10..