In [2]:
import warnings, random, os, sys, tqdm, time
sys.path.append("../")
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer, RobustScaler

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import KFold

import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from pytorch_tabnet.tab_model import TabNetRegressor
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

pd.set_option("display.max_columns", 1200)
pd.set_option("display.max_rows", 1200)
%matplotlib inline



In [3]:
def metric(y_true, y_pred):
    res = []
    for i in range(0, y_true.shape[1]):
        y = y_true[:,i]
        pred = y_pred[:,i]
        res.append(log_loss(y, pred))
    return np.mean(res)

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed_everything(42)
        
    
def make_scaler(flag, seed):
    if flag == "quantile":
        return QuantileTransformer(n_quantiles=100,random_state=seed, output_distribution="normal")
    elif flag == "gauss":
        return GaussRankScaler()
    elif flag == "standard":
        return StandardScaler()
    elif flag == "minmax":
        return MinMaxScaler()
    elif flag == "robust":
        return RobustScaler()
    
seeds = [7, 8, 9, 10, 11, 12, 13]
SCALE = "quantile"
    


In [4]:
resnet_preds = pd.read_csv("../ex8-resnet/train_preds.csv")
dnn_preds = pd.read_csv("../ex7-DNN-FE/train_preds.csv")
tabnet_preds = pd.read_csv("../ex5-TabNet-FE/train_preds.csv")
y = pd.read_csv("../../../Data/Raw/train_targets_scored.csv")

resnet_preds = resnet_preds.drop("sig_id", axis=1).values
dnn_preds = dnn_preds.drop("sig_id", axis=1).values
tabnet_preds = tabnet_preds.drop("sig_id", axis=1).values
y = y.drop("sig_id", axis=1).values

In [5]:
print(metric(y, resnet_preds))

0.01570424092946404


In [6]:
print(metric(y, dnn_preds))

0.015633094022344574


In [7]:
print(metric(y, tabnet_preds))

0.01568500708434628


In [15]:
global best_i, best_j, best_k, best_score
best_i=0
best_j=0
best_k=0
best_score=10
for i in range(0, 105, 5):
    for j in range(0, 105, 5):
        k= 100 - i - j
        if k < 0:
            break
        score = metric(y, dnn_preds*(i/100) + resnet_preds*(j/100) + tabnet_preds*(k/100))
        if score < best_score:
            best_i = i
            best_j = j
            best_k = k
            best_score = score
            print(best_i, best_j, best_k, best_score)
            #print(i, j, k, metric(y, dnn_preds*(i/100) + resnet_preds*(j/100) + tabnet_preds*(k/100)))

0 0 100 0.01568500708434628
0 5 95 0.01566692664294131
0 10 90 0.01565249699865784
0 15 85 0.015640680321951785
0 20 80 0.01563105035187033
0 25 75 0.015623351289185124
0 30 70 0.015617411351904538
0 35 65 0.015613109974208693
0 40 60 0.015610362069616187
0 45 55 0.015609109645654436
5 20 75 0.015603408275155951
5 25 70 0.015597498499506335
5 30 65 0.015593181361653573
5 35 60 0.015590376473746026
5 40 55 0.015589028699396653
10 15 75 0.015588626361806066
10 20 70 0.01558248599324071
10 25 65 0.015577944157282968
10 30 60 0.015574913989977082
10 35 55 0.015573335935325507
10 40 50 0.015573172413015337
15 15 70 0.015570765999093175
15 20 65 0.01556588266271164
15 25 60 0.015562532031675562
15 30 55 0.015560645879857324
15 35 50 0.015560180453571928
20 15 65 0.015556402539768923
20 20 60 0.015552653092742723
20 25 55 0.015550395712851307
20 30 50 0.015549577411778382
25 10 65 0.015549207331975718
25 15 60 0.01554497939210508
25 20 55 0.015542288653627107
25 25 50 0.015541068131873139
30 

In [16]:
preds2 = dnn_preds*0.5 + resnet_preds*0.1 + tabnet_preds*0.4
print(metric(y, preds2))

0.015522844392748546


In [19]:
resnet_sub = pd.read_csv("../ex8-resnet/submission.csv").drop("sig_id", axis=1).values
dnn_sub = pd.read_csv("../ex7-DNN-FE/submission.csv").drop("sig_id", axis=1).values
tabnet_sub = pd.read_csv("../ex5-TabNet-FE/submission.csv").drop("sig_id", axis=1).values

preds2 = dnn_sub*0.5 + resnet_sub*0.1 + tabnet_sub*0.4
sub_df = pd.read_csv("../../../Data/Raw/sample_submission.csv")
#sub_df = pd.read_csv("../input/lish-moa/sample_submission.csv")
cols = [col for col in sub_df.columns if col != "sig_id"]
sub_df[cols] = preds2
sub_df.to_csv("submission.csv", index=False)