In [1]:
import warnings, random, os, sys, tqdm, time
sys.path.append("../")
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer, RobustScaler

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import KFold

import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from pytorch_tabnet.tab_model import TabNetRegressor
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

pd.set_option("display.max_columns", 1200)
pd.set_option("display.max_rows", 1200)
%matplotlib inline



In [9]:
def metric(y_true, y_pred):
    res = []
    for i in range(0, y_true.shape[1]):
        y = y_true[:,i]
        pred = y_pred[:,i]
        res.append(log_loss(y, pred))
    return np.mean(res)

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed_everything(42)
        
    
def make_scaler(flag, seed):
    if flag == "quantile":
        return QuantileTransformer(n_quantiles=100,random_state=seed, output_distribution="normal")
    elif flag == "gauss":
        return GaussRankScaler()
    elif flag == "standard":
        return StandardScaler()
    elif flag == "minmax":
        return MinMaxScaler()
    elif flag == "robust":
        return RobustScaler()
    
seeds = [7, 8, 9, 10, 11, 12, 13]
SCALE = "quantile"
    


In [7]:
resnet_preds = pd.read_csv("../ex8-resnet/train_preds.csv")
dnn_preds = pd.read_csv("../ex7-DNN-FE/train_preds.csv")
y = pd.read_csv("../../../Data/Raw/train_targets_scored.csv")

resnet_preds = resnet_preds.drop("sig_id", axis=1).values
dnn_preds = dnn_preds.drop("sig_id", axis=1).values
y = y.drop("sig_id", axis=1).values

In [10]:
print(metric(y, resnet_preds))

0.01570424092946404


In [11]:
print(metric(y, dnn_preds))

0.015633094022344574


In [16]:
for i in range(0, 105, 5):
    j = 100 - i
    print(i, j, metric(y, dnn_preds*(i/100) + resnet_preds*(j/100)))

0 100 0.01570424092946404
5 95 0.01567932699717163
10 90 0.015659274050407627
15 85 0.01564233348926401
20 80 0.015627857969459546
25 75 0.015615489726489915
30 70 0.015605003702396442
35 65 0.015596250128449486
40 60 0.015589127877835856
45 55 0.015583570488814466
50 50 0.01557953841920421
55 45 0.015577014840007434
60 40 0.015576003770105602
65 35 0.015576530086617735
70 30 0.015578641433644026
75 25 0.015582412568621654
80 20 0.015587953547366664
85 15 0.015595425110973556
90 10 0.015605070374208418
95 5 0.015617295522042152
100 0 0.015633094022344574


In [None]:
for i in range(0,100, 5):
    print()