In [None]:
!pip install -q --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl pytorch-tabnet

In [None]:
!pip install optuna -q

In [None]:
import os
import random
import sys

sys.path.append('../input/iterative-stratification/iterative-stratification-master')

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from time import time

import optuna

import torch
import torch.nn.functional as F
from torch import nn, optim, Tensor
from torch.utils.data import DataLoader
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

from sklearn.decomposition import PCA
from sklearn.metrics import log_loss
from sklearn.preprocessing import QuantileTransformer

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # torch.backends.cudnn.benchmark = False
    # torch.backends.cudnn.deterministic = True

In [None]:
def preprocess(df):
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map(lambda x: int(x/24 -2)) # -1, 0, 1
    del df['sig_id']
    return df

In [None]:
def mapping_filter(train, test, scored, nonscored, drug):
    train = preprocess(train)
    test = preprocess(test)
        
    scored = scored.loc[train['cp_type']==0].reset_index(drop=True)
    nonscored = nonscored.loc[train['cp_type']==0].reset_index(drop=True)
    drug = drug.loc[train['cp_type']==0].reset_index(drop=True)
    
    train = train.loc[train['cp_type']==0].reset_index(drop=True)
    
    del scored['sig_id']
    del nonscored['sig_id']
    
    return train, test, scored, nonscored, drug

In [None]:
def qt_transform(train, test, SEED = 42):
    qt = QuantileTransformer(n_quantiles=100, random_state=SEED, output_distribution='normal')
    len_train = train.shape[0]
    len_test = test.shape[0]

    features_g = train.columns[3:775]
    features_c = train.columns[775:875]
    
    for columns in (features_g, features_c):
        qt.fit(train[columns])
        train[columns] = qt.transform(train[columns])
        test[columns] = qt.transform(test[columns])
        
    return train, test

In [None]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return (self.features.shape[0])

    def __getitem__(self, idx):
        dct = {
            'x': torch.tensor(self.features[idx, :], dtype=torch.float),
            'y': torch.tensor(self.targets[idx, :], dtype=torch.float),
        }

        return dct

class TestDataset:
    def __init__(self, features):
        self.features = features

    def __len__(self):
        return (self.features.shape[0])

    def __getitem__(self, idx):
        dct = {
            'x': torch.tensor(self.features[idx, :], dtype=torch.float)
        }

        return dct

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0

    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()

    final_loss /= len(dataloader)
    return final_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []

    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)

        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())

    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []

    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)

        preds.append(outputs.sigmoid().detach().cpu().numpy())

    preds = np.concatenate(preds)
    return preds

In [None]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1

        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [None]:
class Model_2l(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model_2l, self).__init__()
        self.hidden_size = [2400, 800]
        self.dropout_rate = 0.27


        self.batch_norm1 = nn.BatchNorm1d(num_features)
#         self.dropout1 = nn.Dropout(self.dropout_rate)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, self.hidden_size[0]))

        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0])
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.dense2 = nn.utils.weight_norm(nn.Linear(self.hidden_size[0], self.hidden_size[1]))

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_rate)
        self.dense3 = nn.utils.weight_norm(nn.Linear(self.hidden_size[1], num_targets))

    def forward(self, x):
        x = self.batch_norm1(x)
#         x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        return x

In [None]:
class Model_3l(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model_3l, self).__init__()
        self.hidden_size = [1700, 600, 1000]
        self.dropout_rate = 0.21

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(self.dropout_rate)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, self.hidden_size[0]))

        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0])
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.dense2 = nn.utils.weight_norm(nn.Linear(self.hidden_size[0], self.hidden_size[1]))

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_rate)
        self.dense3 = nn.utils.weight_norm(nn.Linear(self.hidden_size[1], self.hidden_size[2]))

        self.batch_norm4 = nn.BatchNorm1d(self.hidden_size[2])
        self.dropout4 = nn.Dropout(self.dropout_rate)
        self.dense4 = nn.utils.weight_norm(nn.Linear(self.hidden_size[2], num_targets))

    def recalibrate_layer(self, layer):
        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7


    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        self.recalibrate_layer(self.dense1)
        x = F.leaky_relu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        self.recalibrate_layer(self.dense2)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        self.recalibrate_layer(self.dense3)
        x = F.leaky_relu(self.dense3(x))

        x = self.batch_norm4(x)
        x = self.dropout4(x)
        x = self.dense4(x)

        return x

In [None]:
class Model_4l(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model_4l, self).__init__()
        self.hidden_size = [1400, 700, 300, 700]
        self.dropout_rate = 0.16

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(self.dropout_rate)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, self.hidden_size[0]))

        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0])
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.dense2 = nn.utils.weight_norm(nn.Linear(self.hidden_size[0], self.hidden_size[1]))

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_rate)
        self.dense3 = nn.utils.weight_norm(nn.Linear(self.hidden_size[1], self.hidden_size[2]))

        self.batch_norm4 = nn.BatchNorm1d(self.hidden_size[2])
        self.dropout4 = nn.Dropout(self.dropout_rate)
        self.dense4 = nn.utils.weight_norm(nn.Linear(self.hidden_size[2], self.hidden_size[3]))

        self.batch_norm5 = nn.BatchNorm1d(self.hidden_size[3])
        self.dropout5 = nn.Dropout(self.dropout_rate)
        self.dense5 = nn.utils.weight_norm(nn.Linear(self.hidden_size[3], num_targets))

    def recalibrate_layer(self, layer):
        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7


    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        self.recalibrate_layer(self.dense1)
        x = F.leaky_relu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        self.recalibrate_layer(self.dense2)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        self.recalibrate_layer(self.dense3)
        x = F.leaky_relu(self.dense3(x))

        x = self.batch_norm4(x)
        x = self.dropout4(x)
        self.recalibrate_layer(self.dense4)
        x = F.leaky_relu(self.dense4(x))

        x = self.batch_norm5(x)
        x = self.dropout5(x)
        x = self.dense5(x)

        return x

In [None]:
class Model_5l(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model_5l, self).__init__()
        self.hidden_size = [1100, 1900, 100, 200, 2000]
        self.dropout_rate = 0.18

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(self.dropout_rate)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, self.hidden_size[0]))

        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0])
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.dense2 = nn.utils.weight_norm(nn.Linear(self.hidden_size[0], self.hidden_size[1]))

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_rate)
        self.dense3 = nn.utils.weight_norm(nn.Linear(self.hidden_size[1], self.hidden_size[2]))

        self.batch_norm4 = nn.BatchNorm1d(self.hidden_size[2])
        self.dropout4 = nn.Dropout(self.dropout_rate)
        self.dense4 = nn.utils.weight_norm(nn.Linear(self.hidden_size[2], self.hidden_size[3]))

        self.batch_norm5 = nn.BatchNorm1d(self.hidden_size[3])
        self.dropout5 = nn.Dropout(self.dropout_rate)
        self.dense5 = nn.utils.weight_norm(nn.Linear(self.hidden_size[3], self.hidden_size[4]))

        self.batch_norm6 = nn.BatchNorm1d(self.hidden_size[4])
        self.dropout6 = nn.Dropout(self.dropout_rate)
        self.dense6 = nn.utils.weight_norm(nn.Linear(self.hidden_size[4], num_targets))

    def recalibrate_layer(self, layer):
        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7


    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        self.recalibrate_layer(self.dense1)
        x = F.leaky_relu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        self.recalibrate_layer(self.dense2)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        self.recalibrate_layer(self.dense3)
        x = F.leaky_relu(self.dense3(x))

        x = self.batch_norm4(x)
        x = self.dropout4(x)
        self.recalibrate_layer(self.dense4)
        x = F.leaky_relu(self.dense4(x))

        x = self.batch_norm5(x)
        x = self.dropout5(x)
        self.recalibrate_layer(self.dense5)
        x = F.leaky_relu(self.dense5(x))

        x = self.batch_norm6(x)
        x = self.dropout6(x)
        x = self.dense6(x)

        return x

In [None]:
class Model_rs(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model_rs, self).__init__()
        self.hidden_size = [700, 900]
        self.dropout_rate = 0.34

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(self.dropout_rate)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, self.hidden_size[0]))

        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0] + num_features)
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.dense2 = nn.utils.weight_norm(nn.Linear(self.hidden_size[0] + num_features, self.hidden_size[1]))

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1] + self.hidden_size[0] + num_features)
        self.dropout3 = nn.Dropout(self.dropout_rate)
        self.dense3 = nn.utils.weight_norm(nn.Linear(self.hidden_size[1] + self.hidden_size[0] + num_features, num_targets))


    def recalibrate_layer(self, layer):

        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7

    def forward(self, x):
        x1 = x
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        self.recalibrate_layer(self.dense1)
        x = F.leaky_relu(self.dense1(x))
        x = torch.cat((x, x1), dim=1)

        x2 = x
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        self.recalibrate_layer(self.dense2)
        x = F.leaky_relu(self.dense2(x))
        x = torch.cat((x, x2), dim=1)

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        return x

In [None]:
def mean_log_loss(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    metrics = []
    for target in range(206):
        metrics.append(log_loss(y_true[:, target], y_pred[:, target]))
    return np.mean(metrics)

In [None]:
class LogitsLogLoss(Metric):
    """
    LogLoss with sigmoid applied
    """

    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        """
        Compute LogLoss of predictions.

        Parameters
        ----------
        y_true: np.ndarray
            Target matrix or vector
        y_score: np.ndarray
            Score matrix or vector

        Returns
        -------
            float
            LogLoss of predictions vs targets.
        """
        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
drug = pd.read_csv('../input/lish-moa/train_drug.csv')

sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [None]:
train_features.shape, test_features.shape

In [None]:
train_features.shape, test_features.shape

In [None]:
SEED = 42
FOLDS = 7
SEEDS = [1, 2, 3, 4, 5]

# Drug and MultiLabel Stratification
[https://www.kaggle.com/c/lish-moa/discussion/195195](https://www.kaggle.com/c/lish-moa/discussion/195195)

In [None]:
targets = scored.columns[1:]
scored = scored.merge(drug, on='sig_id', how='left')

# LOCATE DRUGS
vc = scored.drug_id.value_counts()
vc1 = vc.loc[vc<=18].index.sort_values()
vc2 = vc.loc[vc>18].index.sort_values()

# STRATIFY DRUGS 18X OR LESS
dct1 = {}; dct2 = {}
skf = MultilabelStratifiedKFold(n_splits=FOLDS, shuffle=True, 
          random_state=SEED)
tmp = scored.groupby('drug_id')[targets].mean().loc[vc1]
for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
    dd = {k:fold for k in tmp.index[idxV].values}
    dct1.update(dd)

# STRATIFY DRUGS MORE THAN 18X
skf = MultilabelStratifiedKFold(n_splits=FOLDS, shuffle=True, 
          random_state=SEED)
tmp = scored.loc[scored.drug_id.isin(vc2)].reset_index(drop=True)
for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
    dd = {k:fold for k in tmp.sig_id[idxV].values}
    dct2.update(dd)

# ASSIGN FOLDS
scored['fold'] = scored.drug_id.map(dct1)
scored.loc[scored.fold.isna(),'fold'] =\
    scored.loc[scored.fold.isna(),'sig_id'].map(dct2)
scored.fold = scored.fold.astype('int8')

# Delete ctrl_vehicle and preprocess

In [None]:
train_features, test_features, scored, nonscored, drug \
= mapping_filter(train_features, test_features, scored, nonscored, drug)

# re-assign fold index

In [None]:
fold_array = scored.fold.values

train_index = [0 for i in range(FOLDS)]
valid_index = [0 for i in range(FOLDS)]

for fold in range(FOLDS):
    train_index[fold] = np.where(fold_array != fold)[0]
    valid_index[fold] = np.where(fold_array == fold)[0]

In [None]:
scored.drop(columns=['drug_id', 'fold'], inplace=True)

# Gauss rank scale

In [None]:
train_features, test_features = qt_transform(train_features, test_features)

In [None]:
train = train_features.values
test = test_features.values
train_targets = scored.values

In [None]:
TARGET_COLUMNS = [col for col in targets]

In [None]:
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
FEATURES = train.shape[1]
NTARGETS = train_targets.shape[1]
EARLY_STOP = True

In [None]:
blend_oof_pred = []
blend_test_preds = []

In [None]:
%%time
# 2l_nn
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        valid_ds = MoADataset(valid_X, valid_y)

        validloader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

        test_ds = TestDataset(test)
        testloader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    
        filepath = '../input/moa-torch-train-2l-nn/'
        filename = f'2l_nn_seed_{SEED}_fold_{fold}.pth'
        checkpoint = os.path.join(filepath, filename)
    
        model = Model_2l(num_features=FEATURES,
                         num_targets=NTARGETS)
        
        model.load_state_dict(torch.load(checkpoint))

        model.to(DEVICE)

        models.append(model)
        
        loss_fn = nn.BCEWithLogitsLoss()
        
        valid_loss, valid_preds = valid_fn(models[fold], loss_fn, validloader, DEVICE)
        
        oof_pred_[valid_index[fold]] = valid_preds
        test_preds[j, fold, :, :] = inference_fn(models[fold], testloader, DEVICE)

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)

print('2L_NN')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
%%time
# 3l_nn
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        valid_ds = MoADataset(valid_X, valid_y)

        validloader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

        test_ds = TestDataset(test)
        testloader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    
        filepath = '../input/moa-torch-train-3l-nn/'
        filename = f'3l_nn_seed_{SEED}_fold_{fold}.pth'
        checkpoint = os.path.join(filepath, filename)
    
        model = Model_3l(num_features=FEATURES,
                         num_targets=NTARGETS)
        
        model.load_state_dict(torch.load(checkpoint))

        model.to(DEVICE)

        models.append(model)
        
        loss_fn = nn.BCEWithLogitsLoss()
        
        valid_loss, valid_preds = valid_fn(models[fold], loss_fn, validloader, DEVICE)
        
        oof_pred_[valid_index[fold]] = valid_preds
        test_preds[j, fold, :, :] = inference_fn(models[fold], testloader, DEVICE)

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)

print('3L_NN')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
%%time
# 4l_nn
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        valid_ds = MoADataset(valid_X, valid_y)

        validloader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

        test_ds = TestDataset(test)
        testloader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    
        filepath = '../input/moa-torch-train-4l-nn/'
        filename = f'4l_nn_seed_{SEED}_fold_{fold}.pth'
        checkpoint = os.path.join(filepath, filename)
    
        model = Model_4l(num_features=FEATURES,
                         num_targets=NTARGETS)
        
        model.load_state_dict(torch.load(checkpoint))

        model.to(DEVICE)

        models.append(model)
        
        loss_fn = nn.BCEWithLogitsLoss()
        
        valid_loss, valid_preds = valid_fn(models[fold], loss_fn, validloader, DEVICE)
        
        oof_pred_[valid_index[fold]] = valid_preds
        test_preds[j, fold, :, :] = inference_fn(models[fold], testloader, DEVICE)

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)

print('4L_NN')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
%%time
# 5l_nn
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        valid_ds = MoADataset(valid_X, valid_y)

        validloader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

        test_ds = TestDataset(test)
        testloader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    
        filepath = '../input/moa-torch-train-5l-nn/'
        filename = f'5l_nn_seed_{SEED}_fold_{fold}.pth'
        checkpoint = os.path.join(filepath, filename)
    
        model = Model_5l(num_features=FEATURES,
                         num_targets=NTARGETS)
        
        model.load_state_dict(torch.load(checkpoint))

        model.to(DEVICE)

        models.append(model)
        
        loss_fn = nn.BCEWithLogitsLoss()
        
        valid_loss, valid_preds = valid_fn(models[fold], loss_fn, validloader, DEVICE)
        
        oof_pred_[valid_index[fold]] = valid_preds
        test_preds[j, fold, :, :] = inference_fn(models[fold], testloader, DEVICE)

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)

print('4L_NN')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
%%time
# rs_nn
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        valid_ds = MoADataset(valid_X, valid_y)

        validloader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

        test_ds = TestDataset(test)
        testloader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    
        filepath = '../input/moa-torch-train-rs-nn/'
        filename = f'rs_nn_seed_{SEED}_fold_{fold}.pth'
        checkpoint = os.path.join(filepath, filename)
    
        model = Model_rs(num_features=FEATURES,
                         num_targets=NTARGETS)
        
        model.load_state_dict(torch.load(checkpoint))

        model.to(DEVICE)

        models.append(model)
        
        loss_fn = nn.BCEWithLogitsLoss()
        
        valid_loss, valid_preds = valid_fn(models[fold], loss_fn, validloader, DEVICE)
        
        oof_pred_[valid_index[fold]] = valid_preds
        test_preds[j, fold, :, :] = inference_fn(models[fold], testloader, DEVICE)

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)


print('RS_NN')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
%%time
# TabNet
oof_pred = []
oof_score = []
test_preds = np.zeros((len(SEEDS), FOLDS, test.shape[0], NTARGETS))

for j, SEED in enumerate(SEEDS):
    set_seed(SEED)
    
    models = []

    oof_pred_ = np.zeros((train.shape[0], NTARGETS))
    for fold in range(FOLDS):

#         train_X = train[train_index[fold]]
#         train_y = train_targets[train_index[fold]]

        valid_X = train[valid_index[fold]]
        valid_y = train_targets[valid_index[fold]]

        tabnet_params = dict(
            n_d = 32,
            n_a = 32,
            n_steps = 1,
            gamma = 1.3,
            lambda_sparse = 0,
            mask_type = "entmax",
            optimizer_fn = optim.Adam,
            optimizer_params = dict(lr = 2e-2, weight_decay = 1e-5),
            scheduler_params = dict(
                mode = "min", patience = 40, min_lr = 1e-5, factor = 0.9),
            scheduler_fn = optim.lr_scheduler.ReduceLROnPlateau,

            seed = SEED,
            verbose = 10
        )

#         model = TabNetRegressor(**tabnet_params)

#         models.append(model)

#         print(f'{"="*50} seed: {SEED} - fold: {fold+1} of {FOLDS}')
#         models[fold].fit(X_train = train_X,
#                          y_train = train_y,
#                          eval_set = [(valid_X, valid_y)],
#                          eval_name = ["val"],
#                          eval_metric = ["logits_ll"],
#                          max_epochs = EPOCHS,
#                          patience = 100,
#                          batch_size = 1024,
#                          virtual_batch_size = 32,
#                          num_workers = 1,
#                          drop_last = False,
#                          loss_fn = F.binary_cross_entropy_with_logits)
        filepath = '../input/moa-torch-train-tabnet/'
        filename = f'tn_nn_seed_{SEED}_fold_{fold}.zip'
        checkpoint = os.path.join(filepath, filename)
#         models[fold].save_model(filename)

        model = TabNetRegressor(**tabnet_params)
        model.load_model(checkpoint)

        models.append(model)

        models[fold] = model

        valid_preds = models[fold].predict(valid_X)
#         valid_preds = 1 / (1 + np.exp(-valid_preds))
        valid_preds = torch.sigmoid(torch.as_tensor(valid_preds)).detach().cpu().numpy()
        oof_pred_[valid_index[fold]] = valid_preds

#         test_preds[j, fold, :, :] = models[fold].predict(test)
        test_pred = models[fold].predict(test)
        test_pred = torch.sigmoid(torch.as_tensor(test_pred)).detach().cpu().numpy()
        test_preds[j, fold, :, :] = test_pred

    oof_score_ = mean_log_loss(train_targets, oof_pred_)
    oof_score.append(oof_score_)
    oof_pred.append(oof_pred_)

oof_pred = np.mean(oof_pred, axis = 0, dtype=np.float64)
mean_preds = np.mean(test_preds, axis = (0, 1), dtype=np.float64)
    
seed_log_loss = mean_log_loss(train_targets, oof_pred)

blend_oof_pred.append(oof_pred)
blend_test_preds.append(mean_preds)

print('TabNet')
for j, SEED in enumerate(SEEDS):
    print(f'SEED:{SEED} Our out of folds mean log loss score is {oof_score[j]}')
    
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

In [None]:
# blend_mean_oof_pred = np.mean(blend_oof_pred, axis = 0, dtype=np.float64)
# blend_mean_preds = np.mean(blend_test_preds, axis = 0, dtype=np.float64)

In [None]:
def objective(trial):
    w = np.zeros(6, dtype=np.float64)
    for i in range(6):
        w[i] = trial.suggest_float(f'w{i}', 0, 1)
    
#     w1 = trial.suggest_float('w1', 0, 1)
#     w2 = trial.suggest_float('w2', 0, 1)
#     w3 = trial.suggest_float('w3', 0, 1)
#     w4 = trial.suggest_float('w4', 0, 1)
#     w5 = trial.suggest_float('w5', 0, 1)
#     w6 = trial.suggest_float('w6', 0, 1)
#     sum_w = w1 + w2 + w3 + w4 + w5 + w6

    sum_w = 0
    for i in range(6):
        sum_w += w[i]

    weighted_oof = 0
    for i in range(6):
        weighted_oof += blend_oof_pred[i] * w[i] / sum_w
    
    log_loss = mean_log_loss(train_targets, weighted_oof)
    
    return log_loss

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials=500)

In [None]:
study.best_params

In [None]:
study.best_value

In [None]:
study.best_trial

In [None]:
# blend_log_loss = mean_log_loss(train_targets, blend_mean_oof_pred)
# print(f'Our out of folds log loss for our ensemble model is {blend_log_loss}')

In [None]:
# #submit
# df_test = pd.read_csv('../input/lish-moa/test_features.csv')
# submission = pd.DataFrame(df_test['sig_id'])
# submission[TARGET_COLUMNS] = blend_mean_preds
# # 'cp_type': 'ctl_vehicle': 1
# submission.loc[df_test['cp_type']=='ctl_vehicle', TARGET_COLUMNS] = 0
# submission.to_csv('submission.csv', index=False)