In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/lish-moa'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# TabNet
!pip install --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl pytorch-tabnet


In [None]:
import random
import matplotlib.pyplot as plt
import copy
import seaborn as sns

from sklearn.metrics import log_loss
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.modules.loss import _Loss, _WeightedLoss

from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

from scipy.optimize import dual_annealing, minimize

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    
seed_everything(seed=42)

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_drug = pd.read_csv('../input/lish-moa/train_drug.csv')
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')

test_features = pd.read_csv('../input/lish-moa/test_features.csv')
sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [None]:
train_features = train_drug.merge(train_features, on='sig_id')

In [None]:
g_columns = [g for g in train_features.columns if 'g-' in g]
c_columns = [c for c in train_features.columns if 'c-' in c]

drug_id_columns = [i for i in train_features.columns if 'drug_id' in i]

cp_type_columns = [i for i in train_features.columns if 'cp_type' in i]
cp_time_columns = [i for i in train_features.columns if 'cp_time' in i]
cp_dose_columns = [i for i in train_features.columns if 'cp_dose' in i]

In [None]:
def one_hot_encode(train, test, cols):
    full_data = pd.concat([train, test], ignore_index=False)
    
    dummy = pd.get_dummies(full_data[cols], columns = cols)
    dummy_columns = list(dummy.columns)
    
    train = pd.concat([train, dummy.iloc[:train.shape[0]]], axis=1)
    test = pd.concat([test, dummy.iloc[train.shape[0]:]], axis=1)
    
    return train, test, dummy_columns

In [None]:
train_features, test_features, dummy_columns = one_hot_encode(train_features, test_features, ['cp_time', 'cp_dose'])

In [None]:
# RankGauss
transformer = QuantileTransformer(n_quantiles=100, random_state=42, output_distribution = 'normal')
train_features[g_columns + c_columns] = transformer.fit_transform(train_features[g_columns + c_columns].values)
test_features[g_columns + c_columns] = transformer.transform(test_features[g_columns + c_columns].values)

In [None]:
def pcaprocess(train, test, cols, pct, seed = 42):
    pca = PCA(n_components=pct, random_state=seed)
    train_transformed = pca.fit_transform(train[cols])
    
    pca_columns = [f"pca-{i}" for i in range(train_transformed.shape[1])]
    
    train = pd.concat([train, pd.DataFrame(train_transformed, index = train.index, columns=pca_columns)], axis=1)
    
    test_transformed = pca.transform(test[cols])
    test = pd.concat([test, pd.DataFrame(test_transformed, index = test.index, columns=pca_columns)], axis=1)
    
    return train, test, pca_columns

In [None]:
train_features, test_features, g_pca_columns = pcaprocess(train_features, test_features, g_columns, 0.95)
train_features.columns = ['g-' + col if col in g_pca_columns else col for col in train_features.columns]
test_features.columns = ['g-' + col if col in g_pca_columns else col for col in test_features.columns]
g_pca_columns = ['g-' + col for col in g_pca_columns]
print(len(g_pca_columns))

In [None]:
train_features, test_features, c_pca_columns = pcaprocess(train_features, test_features, c_columns, 0.95)
train_features.columns = ['c-' + col if col in c_pca_columns else col for col in train_features.columns]
test_features.columns = ['c-' + col if col in c_pca_columns else col for col in test_features.columns]
c_pca_columns = ['c-' + col for col in c_pca_columns]
print(len(c_pca_columns))

In [None]:
def varprocess(train, test, cols, threshold):
    var_thresh = VarianceThreshold(threshold) 
    var_thresh.fit(train[cols])
    
    small_var_columns = [cols[i] for i in list(var_thresh.get_support(indices=True))]
    
    return small_var_columns

In [None]:
pca_columns = varprocess(train_features, test_features, g_pca_columns + c_pca_columns, 0.8)
print(len(pca_columns))

In [None]:
feature_cols = g_columns + c_columns + pca_columns + dummy_columns 
print(len(feature_cols))

In [None]:
train = train_features.merge(train_targets_scored, on='sig_id')
train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)

test = test_features[test_features['cp_type']!='ctl_vehicle'].reset_index(drop=True)

In [None]:
target_cols = train_targets_scored.columns[1:].values.tolist()
len(target_cols)

**Basic NN**

In [None]:
num_features=len(feature_cols)
num_targets=len(target_cols)
hidden_size=1500

NFOLDS = 7
BATCH_SIZE = 128
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
SEED = [0, 1, 2, 3 ,4, 5, 6]


In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        #self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(0.2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(0.2)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    # This is used to solve na problem, which caused by weight_norm
    def recalibrate_layer(self, layer):

        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7
    
    def forward(self, x):
        x = self.batch_norm1(x)
        #x = self.dropout1(x)
        self.recalibrate_layer(self.dense1)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        self.recalibrate_layer(self.dense2)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        self.recalibrate_layer(self.dense3)
        x = self.dense3(x)
        
        return x


In [None]:
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct

In [None]:
def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

In [None]:
def run_predict(fold, seed):
    
    seed_everything(seed)
    
    val_idx = train[train['kfold'] == fold].index
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    x_valid =  valid_df[feature_cols].values
    valid_dataset = TestDataset(x_valid)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    x_test = test[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )
    
    model.load_state_dict(torch.load(f"../input/moatest1/nn-base-2/SEED{seed}_FOLD{fold}_.pth", 
                                     map_location=torch.device(DEVICE)))
    model.to(DEVICE)
    
    oof = np.zeros((len(train), len(target_cols)))
    oof[val_idx] = inference_fn(model, validloader, DEVICE)
    
    predictions = np.zeros((len(test), len(target_cols)))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

In [None]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_predict(fold, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
def logloss(y_true, y_pred):
    score = 0
    for i in range(len(target_cols)):
        score_ = log_loss(y_true[:, i], y_pred[:, i])
        score += score_ / len(target_cols)
    return score

In [None]:

oof_base = np.zeros((len(train), len(target_cols)))
predictions_base = np.zeros((len(test), len(target_cols)))
cv_loss = []

for seed in SEED:
    if 'kfold' in train.columns:
        train = train.drop(columns='kfold', axis=1)
    folds = pd.read_csv(f'../input/moatest1/SEED{seed}_FOLDS.csv')
    train = folds.merge(train, on='sig_id')
  
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof_base += oof_ / len(SEED)
    predictions_base += predictions_ / len(SEED)
    loss_ = logloss(train[target_cols].values, oof_)
    print(f"SEED: {seed}, LOSS: {loss_}")
    cv_loss.append(loss_)


In [None]:
print(logloss(train[target_cols].values, oof_base))

**4 Heads with Resnet**

In [None]:
class TestDataset:
    def __init__(self, gen_features, gen_pca_features, cell_features, cell_pca_features):
        self.gen_features = gen_features
        self.gen_pca_features = gen_pca_features
        self.cell_features = cell_features
        self.cell_pca_features = cell_pca_features
        
    def __len__(self):
        return (self.gen_features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'gen_x' : torch.tensor(self.gen_features[idx, :], dtype=torch.float),
            'gen_pca_x' : torch.tensor(self.gen_pca_features[idx, :], dtype=torch.float),
            'cell_x' : torch.tensor(self.cell_features[idx, :], dtype=torch.float),
            'cell_pca_x' : torch.tensor(self.cell_pca_features[idx, :], dtype=torch.float)
        }
        return dct
    

In [None]:
def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        gen_x = data['gen_x'].to(device)
        gen_pca_x = data['gen_pca_x'].to(device)
        cell_x = data['cell_x'].to(device)
        cell_pca_x = data['cell_pca_x'].to(device)
        
        with torch.no_grad():
            outputs = model(gen_x, gen_pca_x, cell_x, cell_pca_x)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

In [None]:
class Model(nn.Module):
    def __init__(self, num_gen_features, num_gen_pca_features, 
                 num_cell_features, num_cell_pca_features, num_targets):
        super(Model, self).__init__()
        self.gen_batch_norm1 = nn.BatchNorm1d(num_gen_features)
        #self.gen_dropout1 = nn.Dropout(0.2)
        self.gen_dense1 = nn.utils.weight_norm(nn.Linear(num_gen_features, 512))
        self.gen_batch_norm2 = nn.BatchNorm1d(512)
        self.gen_dropout2 = nn.Dropout(0.1)
        self.gen_dense2 = nn.utils.weight_norm(nn.Linear(512, 256))
        
        self.gen_pca_batch_norm1 = nn.BatchNorm1d(256 + num_gen_pca_features)
        self.gen_pca_dropout1 = nn.Dropout(0.1)
        self.gen_pca_dense1 = nn.utils.weight_norm(nn.Linear((256 + num_gen_pca_features), 256))
        self.gen_pca_batch_norm2 = nn.BatchNorm1d(256)
        #self.gen_pca_dropout2 = nn.Dropout(0.1)
        self.gen_pca_dense2 = nn.utils.weight_norm(nn.Linear(256, 256))
        
        self.cell_batch_norm1 = nn.BatchNorm1d(num_cell_features)
        #self.cell_dropout1 = nn.Dropout(0.2)
        self.cell_dense1 = nn.utils.weight_norm(nn.Linear(num_cell_features, 128))
        self.cell_batch_norm2 = nn.BatchNorm1d(128)
        self.cell_dropout2 = nn.Dropout(0.1)
        self.cell_dense2 = nn.utils.weight_norm(nn.Linear(128, 64))
        
        self.cell_pca_batch_norm1 = nn.BatchNorm1d(64 + num_cell_pca_features)
        self.cell_pca_dropout1 = nn.Dropout(0.1)
        self.cell_pca_dense1 = nn.utils.weight_norm(nn.Linear((64 + num_cell_pca_features), 64))
        self.cell_pca_batch_norm2 = nn.BatchNorm1d(64)
        #self.cell_pca_dropout2 = nn.Dropout(0.1)
        self.cell_pca_dense2 = nn.utils.weight_norm(nn.Linear(64, 64))
        
        self.batch_norm3 = nn.BatchNorm1d(256+64)
        #self.dropout3 = nn.Dropout(0.1)
        self.dense3 = nn.utils.weight_norm(nn.Linear(256+64, 256))
        self.batch_norm4 = nn.BatchNorm1d(256)
        self.dropout4 = nn.Dropout(0.1)
        self.dense4 = nn.utils.weight_norm(nn.Linear(256, num_targets))
        
    def recalibrate_layer(self, layer):

        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7
        
    
    def forward(self, gen_x, gen_pca_x, cell_x, cell_pca_x):
        gen_x = self.gen_batch_norm1(gen_x)
        #gen_x = self.gen_dropout1(gen_x)
        self.recalibrate_layer(self.gen_dense1)
        gen_x = F.leaky_relu(self.gen_dense1(gen_x))
        gen_x = self.gen_batch_norm2(gen_x)
        gen_x = self.gen_dropout2(gen_x)
        self.recalibrate_layer(self.gen_dense2)
        gen_x = F.leaky_relu(self.gen_dense2(gen_x))
        
        gen_pca_x = torch.cat((gen_x,gen_pca_x),dim=1)
        gen_pca_x = self.gen_pca_batch_norm1(gen_pca_x)
        gen_pca_x = self.gen_pca_dropout1(gen_pca_x)
        self.recalibrate_layer(self.gen_pca_dense1)
        gen_pca_x = F.leaky_relu(self.gen_pca_dense1(gen_pca_x))
        gen_pca_x = self.gen_pca_batch_norm2(gen_pca_x)
        #gen_pca_x = self.gen_pca_dropout2(gen_pca_x)
        self.recalibrate_layer(self.gen_pca_dense2)
        gen_pca_x = F.leaky_relu(self.gen_pca_dense2(gen_pca_x))
        
        gen = (gen_x + gen_pca_x) / 2

        cell_x = self.cell_batch_norm1(cell_x)
        #cell_x = self.cell_dropout1(cell_x)
        self.recalibrate_layer(self.cell_dense1)
        cell_x = F.leaky_relu(self.cell_dense1(cell_x))
        cell_x = self.cell_batch_norm2(cell_x)
        cell_x = self.cell_dropout2(cell_x)
        self.recalibrate_layer(self.cell_dense2)
        cell_x = F.leaky_relu(self.cell_dense2(cell_x))
        
        cell_pca_x = torch.cat((cell_x,cell_pca_x),dim=1)
        cell_pca_x = self.cell_pca_batch_norm1(cell_pca_x)
        cell_pca_x = self.cell_pca_dropout1(cell_pca_x)
        self.recalibrate_layer(self.cell_pca_dense1)
        cell_pca_x = F.leaky_relu(self.cell_pca_dense1(cell_pca_x))
        cell_pca_x = self.cell_pca_batch_norm2(cell_pca_x)
        #cell_pca_x = self.cell_pca_dropout2(cell_pca_x)
        self.recalibrate_layer(self.cell_pca_dense2)
        cell_pca_x = F.leaky_relu(self.cell_pca_dense2(cell_pca_x))
              
        cell = (cell_x + cell_pca_x) / 2
        
        x = torch.cat((gen,cell),dim=1)
        x = self.batch_norm3(x)
        #x = self.dropout3(x)
        self.recalibrate_layer(self.dense3)
        x = F.leaky_relu(self.dense3(x))
        x = self.batch_norm4(x)
        x = self.dropout4(x)
        self.recalibrate_layer(self.dense4)
        x = self.dense4(x)
        return x


In [None]:
g_pca_columns_sub = g_pca_columns[:160]
c_pca_columns_sub = c_pca_columns[:10]

In [None]:
def run_predict(fold, seed):
    
    seed_everything(seed)
    
    val_idx = train[train['kfold'] == fold].index
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    gen_x_valid = valid_df[g_columns + dummy_columns].values
    gen_pca_x_valid = valid_df[g_pca_columns_sub + dummy_columns].values
    cell_x_valid = valid_df[c_columns + dummy_columns].values
    cell_pca_x_valid = valid_df[c_pca_columns_sub + dummy_columns].values
    
    valid_dataset = TestDataset(gen_x_valid, gen_pca_x_valid, cell_x_valid, cell_pca_x_valid)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    gen_x_test = test[g_columns + dummy_columns].values
    gen_pca_x_test = test[g_pca_columns_sub + dummy_columns].values
    cell_x_test = test[c_columns + dummy_columns].values
    cell_pca_x_test = test[c_pca_columns_sub + dummy_columns].values
    
    testdataset = TestDataset(gen_x_test, gen_pca_x_test, cell_x_test, cell_pca_x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_gen_features=len(g_columns + dummy_columns),
        num_gen_pca_features=len(g_pca_columns_sub + dummy_columns),
        num_cell_features=len(c_columns + dummy_columns),
        num_cell_pca_features=len(c_pca_columns_sub + dummy_columns),
        num_targets=len(target_cols)
    )
    
    model.load_state_dict(torch.load(f"../input/moatest1/nn-mh-2/SEED{seed}_FOLD{fold}_MH.pth",
                                    map_location=torch.device(DEVICE)))
    model.to(DEVICE)
    
    oof = np.zeros((len(train), len(target_cols)))
    oof[val_idx] = inference_fn(model, validloader, DEVICE)
    
    predictions = np.zeros((len(test), len(target_cols)))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

In [None]:

oof_mh = np.zeros((len(train), len(target_cols)))
predictions_mh = np.zeros((len(test), len(target_cols)))
cv_loss_mh = []

for seed in SEED:
    if 'kfold' in train.columns:
        train = train.drop(columns='kfold', axis=1)
    folds = pd.read_csv(f'../input/moatest1/SEED{seed}_FOLDS.csv')
    train = folds.merge(train, on='sig_id')
    
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof_mh += oof_ / len(SEED)
    predictions_mh += predictions_ / len(SEED)
    loss_ = logloss(train[target_cols].values, oof_)
    print(f"SEED: {seed}, LOSS: {loss_}")
    cv_loss_mh.append(loss_)


In [None]:
print(logloss(train[target_cols].values, oof_mh))

**Tabnet**

In [None]:
os.environ["CUDA_LAUNCH_BLOCKING"] = '1'

In [None]:
g_pca_columns_tabnet = g_pca_columns[:160]
c_pca_columns_tabnet = c_pca_columns[:10]

tabnet_cols = dummy_columns + g_columns + c_columns + g_pca_columns_tabnet + c_pca_columns_tabnet
len(tabnet_cols)

In [None]:
X_test = test[tabnet_cols].values

In [None]:
def run_predict(fold, seed):
    seed_everything(seed)
    
    val_idx = train[train['kfold'] == fold].index

    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)

    X_train, y_train = train_df[tabnet_cols].values, train_df[target_cols].values
    X_val, y_val = valid_df[tabnet_cols].values, valid_df[target_cols].values

    tabnet_params = dict(
        n_d = 24,
        n_a = 48,
        n_steps = 1,
        n_independent = 2,
        n_shared = 1,
        momentum = 0.02,
        gamma = 1.3,
        lambda_sparse = 0,
        optimizer_fn = optim.Adam,
        optimizer_params = dict(lr = 2.5e-2, weight_decay = 1e-5),
        mask_type = "entmax",
        scheduler_params = dict(
            mode = "min", patience = 5, min_lr = 1e-5, factor = 0.9, verbose=1),
        scheduler_fn = torch.optim.lr_scheduler.ReduceLROnPlateau,
        seed = seed,
        verbose = 10
    )

    ### Model ###
    model = TabNetRegressor(**tabnet_params)
    
    dir_name = f"../input/moatest1/tabnet/SEED{seed}_FOLD{fold}_tabnet"
    !cp -r {dir_name}/* .
    !zip tabnet.zip model_params.json network.pt
    model.load_model('tabnet.zip')   

    ### Predict on validation ###
    preds_val = model.predict(X_val)
    # Apply sigmoid to the predictions
    preds = 1 / (1 + np.exp(-preds_val))
    #score = np.min(model.history["val_logits_ll"])

    ### Save OOF for CV ###
    oof = np.zeros((len(train), len(target_cols)))
    oof[val_idx] = preds

    ### Predict on test ###
    preds_test = model.predict(X_test)
    predictions = np.zeros((len(test), len(target_cols)))
    predictions = 1 / (1 + np.exp(-preds_test))
    
    return oof, predictions

In [None]:

oof_tabnet = np.zeros((len(train), len(target_cols)))
predictions_tabnet = np.zeros((len(test), len(target_cols)))
cv_loss_tabnet = []

for seed in SEED:
    if 'kfold' in train.columns:
        train = train.drop(columns='kfold', axis=1)
    folds = pd.read_csv(f'../input/moatest1/SEED{seed}_FOLDS.csv')
    train = folds.merge(train, on='sig_id')
    
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof_tabnet += oof_ / len(SEED)
    predictions_tabnet += predictions_ / len(SEED)
    loss_ = logloss(train[target_cols].values, oof_)
    print(f"SEED: {seed}, LOSS: {loss_}")
    cv_loss_tabnet.append(loss_)

In [None]:
print(cv_loss_tabnet)
print(logloss(train[target_cols].values, oof_tabnet))

In [None]:
print(logloss(train[target_cols].values, oof_base))
print(logloss(train[target_cols].values, oof_mh))
print(logloss(train[target_cols].values, oof_tabnet))

In [None]:
def loss_to_min(weights):
    oof_blend = weights[0] * oof_base_i + (1-weights[0]-weights[1]) * oof_mh_i + weights[1] * oof_tabnet_i
    score = log_loss(train[target_cols_i].values, oof_blend)
    return score

In [None]:
ws = []
bnds = [(0, 0.5) for _ in range(2)]
init_guess = [0.4, 0.2]
for i in range(len(target_cols)):
    #print('optimizing for label %s'%i)
    oof_base_i = oof_base[:, i]
    oof_mh_i = oof_mh[:, i]
    oof_tabnet_i = oof_tabnet[:, i]
    target_cols_i = target_cols[i]
    opt = minimize(loss_to_min, init_guess, method = 'L-BFGS-B', bounds = bnds)
    ws.append(opt.x)

In [None]:
oof_blend = np.zeros((len(train), len(target_cols)))
pred_blend = np.zeros((len(test), len(target_cols)))
for i in range(len(target_cols)):
    w_base = ws[i][0]
    w_mh = 1- ws[i][0] - ws[i][1]
    w_tabnet = ws[i][1]
    oof_blend[:,i] = w_base * oof_base[:,i] + w_mh * oof_mh[:,i] + w_tabnet * oof_tabnet[:,i]
    pred_blend[:,i] = w_base * predictions_base[:,i] + w_mh * predictions_mh[:,i] + w_tabnet * predictions_tabnet[:,i]

In [None]:
print(logloss(train[target_cols].values, oof_blend))

In [None]:
mytest = test.copy()
mytest[target_cols] = pred_blend

In [None]:
sub = test_features[['sig_id']].merge(mytest[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission.csv', index=False)