- incorporate drug_id validation in pytorch mlp (mlp version 37)
- tabnet version 45
- svm version 7
- all models are based on new validation scheme
- decide weight ratio by optimization

In [1]:
!pip install --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl pytorch-tabnet

Looking in links: /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl
Processing /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-2.0.0


In [2]:
import sys
!cp ../input/rapids/rapids.0.15.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

In [3]:
import sys
import os
import pickle
import warnings
import numpy as np
import pandas as pd 
from sklearn import preprocessing
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from tqdm import tqdm_notebook as tqdm
from category_encoders import CountEncoder
from xgboost import XGBClassifier
from sklearn.feature_selection import VarianceThreshold
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import KFold, StratifiedKFold
sys.path.append('../input/multilabelstraifier/')
from ml_stratifiers import MultilabelStratifiedKFold
warnings.filterwarnings('ignore')
from scipy.optimize import minimize, fsolve

import time
import torch
import random
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import tensorflow as tf
from torch.nn.modules.loss import _WeightedLoss

from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

from sklearn.linear_model import LogisticRegression
from cuml.svm import SVC, SVR

# Preprocess

In [4]:
DATA_DIR = '/kaggle/input/lish-moa/'
train = pd.read_csv(DATA_DIR + 'train_features.csv')
targets = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
test = pd.read_csv(DATA_DIR + 'test_features.csv')
drug = pd.read_csv(DATA_DIR + 'train_drug.csv')

In [5]:
target_feats = [ i for i in targets.columns if i != "sig_id"]
g_feats = [i for i in train.columns if "g-" in i]
c_feats = [i for i in train.columns if "c-" in i]

In [6]:
cons_train_index = train[train.cp_type!="ctl_vehicle"].index
noncons_test_index = test[test.cp_type=="ctl_vehicle"].index
cons_test_index = test[test.cp_type!="ctl_vehicle"].index

test = test[test.index.isin(cons_test_index)].reset_index(drop=True)
train = train[train.index.isin(cons_train_index)].reset_index(drop=True)
y = targets.drop("sig_id", axis=1).copy()
targets = targets[targets.index.isin(cons_train_index)].reset_index(drop=True)
fn_targets = targets.copy().drop("sig_id", axis=1).to_numpy()

In [7]:
def make_fold(NB_SPLITS, seed):   
    folds = []
    # LOAD FILES
    train_score = targets.merge(drug, on='sig_id', how='left') 

    # LOCATE DRUGS
    vc = train_score.drug_id.value_counts()
    vc1 = vc.loc[vc <= 19].index.sort_values()
    vc2 = vc.loc[vc > 19].index.sort_values()
    
    # STRATIFY DRUGS 18X OR LESS
    dct1 = {}; dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits = NB_SPLITS, shuffle = True, random_state = seed)
    tmp = train_score.groupby('drug_id')[target_feats].mean().loc[vc1]
    for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_feats])):
        dd = {k:fold for k in tmp.index[idxV].values}
        dct1.update(dd)

    # STRATIFY DRUGS MORE THAN 18X
    skf = MultilabelStratifiedKFold(n_splits = NB_SPLITS, shuffle = True, random_state = seed)
    tmp = train_score.loc[train_score.drug_id.isin(vc2)].reset_index(drop = True)
    for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_feats])):
        dd = {k:fold for k in tmp.sig_id[idxV].values}
        dct2.update(dd)

    # ASSIGN FOLDS
    train_score['fold'] = train_score.drug_id.map(dct1)
    train_score.loc[train_score.fold.isna(),'fold'] = train_score.loc[train_score.fold.isna(),'sig_id'].map(dct2)
    train_score.fold = train_score.fold.astype('int8')
    folds.append(train_score.fold.values)
    
    return np.array(folds).reshape(-1,1)

In [8]:
mlp_fold = make_fold(7,34)
tab_fold = make_fold(5,14)

In [9]:
check_dist = targets.copy()
check_dist["mlp_fold"] = np.array(mlp_fold).reshape(-1,1)

unbalanced_feats = []
for i in target_feats:
    if np.max(check_dist.loc[:,[i]+["mlp_fold"]].groupby("mlp_fold").sum()).values[0] == check_dist[i].sum() and check_dist[i].sum()>=5:
        unbalanced_feats.append(i)

# Feature engineering

In [10]:
def fe_simple(df, remove_features):
    tmp = df.copy()
    tmp.loc[:, 'cp_dose'] = tmp.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    tmp.drop(remove_features, axis=1, inplace=True)
    return tmp

def fe_simple2(df):
    tmp = df.copy()
    tmp = pd.get_dummies(tmp, columns=['cp_time','cp_dose'])
    tmp.drop(["cp_type", "sig_id"], axis=1, inplace=True) 
    return tmp

def fe_mlp(df_train, df_test):
    tmp_train = df_train.copy()
    tmp_test = df_test.copy()
    X = tmp_train.iloc[:,4:].copy().values
    select = VarianceThreshold(threshold=0.7)
    X_new = select.fit_transform(X)
    drop_feats = list(np.array(tmp_train.iloc[:,4:].columns)[select.get_support()==False])
    
    tmp_train.drop(drop_feats, axis=1, inplace=True)
    tmp_test.drop(drop_feats, axis=1, inplace=True)

    modg_feats = [i for i in tmp_train.columns if "g-" in i]
    modc_feats = [i for i in tmp_train.columns if "c-" in i]
    
    for i in modc_feats + modg_feats:
        ss = preprocessing.QuantileTransformer(n_quantiles=1000, random_state=0, output_distribution="normal")
        ss.fit(tmp_train[i].values.reshape(-1,1))
        tmp_train[i] = ss.transform(tmp_train[i].values.reshape(-1,1))
        tmp_test[i] = ss.transform(tmp_test[i].values.reshape(-1,1))
    
    c_num = 10
    pca_c_cols = ["pca-c"+str(i+1) for i in range(c_num)]
    pca = PCA(n_components=c_num,random_state=42)
    c_train = pca.fit_transform(tmp_train[modc_feats])
    c_test = pca.transform(tmp_test[modc_feats])
    c_train = pd.DataFrame(c_train, columns=pca_c_cols)
    c_test = pd.DataFrame(c_test, columns=pca_c_cols)

    g_num = 60
    pca_g_cols = ["pca-g"+str(i+1) for i in range(g_num)]
    pca = PCA(n_components=g_num, random_state=42)
    g_train = pca.fit_transform(tmp_train[modg_feats])
    g_test = pca.transform(tmp_test[modg_feats])
    g_train = pd.DataFrame(g_train, columns=pca_g_cols)
    g_test = pd.DataFrame(g_test, columns=pca_g_cols)

    tmp_train = pd.concat([tmp_train, c_train],axis=1)
    tmp_test = pd.concat([tmp_test, c_test],axis=1)
    tmp_train = pd.concat([tmp_train, g_train],axis=1)
    tmp_test = pd.concat([tmp_test, g_test],axis=1)
    
    return tmp_train, tmp_test

def fe_stats(df):
    tmp = df.copy()
    modg_feats = [i for i in tmp.columns if "g-" in i]
    modc_feats = [i for i in tmp.columns if "c-" in i]
    tmp['g_kurt'] = tmp[modg_feats].kurtosis(axis = 1)
    tmp['g_skew'] = tmp[modg_feats].skew(axis = 1)
    tmp['c_kurt'] = tmp[modc_feats].kurtosis(axis = 1)
    tmp['c_skew'] = tmp[modc_feats].skew(axis = 1)
    return tmp

remove_features = ["cp_type" , "sig_id"]

In [11]:
fn_train = fe_simple(train, remove_features)
fn_test = fe_simple(test, remove_features)

# pytorch mlp -----------------------------------
mlp_train, mlp_test = fe_mlp(train, test)
mlp_train = fe_stats(mlp_train)
mlp_test = fe_stats(mlp_test)
mlp_train = fe_simple2(mlp_train)
mlp_test = fe_simple2(mlp_test)
mlp_train["fold"] = mlp_fold
mlp_train = mlp_train.to_numpy()
mlp_test = mlp_test.to_numpy()

# pytorch tabnet ----------------------------------
tab_train = fn_train.copy()
tab_test = fn_test.copy()
tab_train["fold"] = tab_fold

tab_train= tab_train.to_numpy()
tab_test = tab_test.to_numpy()

# svm-----------------------
for i in c_feats + g_feats:
    ss = preprocessing.StandardScaler()
    ss.fit(fn_train[i].values.reshape(-1,1))
    fn_train[i] = ss.transform(fn_train[i].values.reshape(-1,1))
    fn_test[i] = ss.transform(fn_test[i].values.reshape(-1,1))
fn_train["fold"] = tab_fold
fn_train = fn_train.to_numpy()
fn_test = fn_test.to_numpy()
    
print(mlp_train.shape, mlp_test.shape)
print(tab_train.shape, tab_test.shape)
print(fn_train.shape, fn_test.shape)

(21948, 919) (3624, 918)
(21948, 875) (3624, 874)
(21948, 875) (3624, 874)


# 1st mlp

In [12]:
class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets, n_classes, smoothing=0.0):
        assert 0 <= smoothing <= 1
        with torch.no_grad():
            targets = targets * (1 - smoothing) + torch.ones_like(targets).to(device) * smoothing / n_classes
        return targets

    def forward(self, inputs, targets):
        targets = SmoothCrossEntropyLoss()._smooth(targets, inputs.shape[1], self.smoothing)

        if self.weight is not None:
            inputs = inputs * self.weight.unsqueeze(0)

        loss = F.binary_cross_entropy_with_logits(inputs, targets)

        return loss

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 128
n_folds=7
train_epochs = 20
smoothing = 0.001
p_min = smoothing
p_max = 1 - smoothing

def mean_log_loss(y_true, y_pred):
    metrics = []
    for i, target in enumerate(target_feats):
        metrics.append(log_loss(y_true[:, i], y_pred[:, i].astype(float), labels=[0,1]))
    return np.mean(metrics)

def seed_everything(seed=1234): 
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
class MoaModel(nn.Module):
    def __init__(self, num_columns, last_num):
        super(MoaModel, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.1)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 1024))
        self.relu1 = nn.LeakyReLU()
        
        self.batch_norm2 = nn.BatchNorm1d(1024)
        self.dropout2 = nn.Dropout(0.1)
        self.dense2 = nn.utils.weight_norm(nn.Linear(1024, 1024))
        self.relu2 = nn.LeakyReLU()
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.1)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, last_num))
        
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = self.relu1(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.relu2(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x
    
def modelling_torch(tr, target, te, sample_seed, init_num, last_num):
    seed_everything(seed=sample_seed) 
    X_train = tr.copy()
    y_train = target.copy()
    X_test = te.copy()
    test_len = X_test.shape[0]
    
    mskf=MultilabelStratifiedKFold(n_splits = n_folds, shuffle=True, random_state=224)
    metric = lambda inputs, targets : F.binary_cross_entropy((torch.clamp(torch.sigmoid(inputs), p_min, p_max)), targets)

    models = []
    
    X_test2 = torch.tensor(X_test, dtype=torch.float32)
    test = torch.utils.data.TensorDataset(X_test2) 
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)
    
    oof = np.zeros([len(X_train),y_train.shape[1]])
    oof_targets = np.zeros([len(X_train),y_train.shape[1]])
    pred_value = np.zeros([test_len, y_train.shape[1]])
    
    scores = []
    for fold in range(n_folds):
        valid_index = X_train[:,-1] == fold
        train_index = X_train[:,-1] != fold
        print("Fold "+str(fold+1))
        X_train2 = torch.tensor(X_train[train_index,:], dtype=torch.float32)
        X_valid2 = torch.tensor(X_train[valid_index,:], dtype=torch.float32)
        X_train2 = X_train2[:,:-1]
        X_valid2 = X_valid2[:,:-1]
        
        y_train2 = torch.tensor(y_train[train_index], dtype=torch.float32)
        y_valid2 = torch.tensor(y_train[valid_index], dtype=torch.float32)
        
        train = torch.utils.data.TensorDataset(X_train2, y_train2)
        valid = torch.utils.data.TensorDataset(X_valid2, y_valid2)
        
        train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) 
        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
            
        clf = MoaModel(init_num, last_num)
        loss_fn = SmoothCrossEntropyLoss(smoothing=smoothing)

        optimizer = optim.Adam(clf.parameters(), lr = 0.001, weight_decay=1e-5) 
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=train_epochs, steps_per_epoch=len(train_loader))
        
        clf.to(device)
        
        best_val_loss = np.inf
        stop_counts = 0
        for epoch in range(train_epochs):
            start_time = time.time()
            clf.train()
            avg_loss = 0.
            sm_avg_loss = 0.
            
            for x_batch, y_batch in tqdm(train_loader, disable=True):
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                y_pred = clf(x_batch) 
                loss = loss_fn(y_pred, y_batch)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()
                avg_loss += loss.item() / len(train_loader)  
                sm_avg_loss += metric(y_pred, y_batch) / len(train_loader) 
                
            clf.eval()
            avg_val_loss = 0.
            sm_avg_val_loss = 0.
            for i, (x_batch, y_batch) in enumerate(valid_loader): 
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                y_pred = clf(x_batch).detach()
                avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
                sm_avg_val_loss += metric(y_pred, y_batch) / len(valid_loader)

            elapsed_time = time.time() - start_time 
            #scheduler.step() #avg_val_loss # maybe mistake
                    
            if sm_avg_val_loss < best_val_loss:
                best_val_loss = sm_avg_val_loss
                print('Best: Epoch {} \t loss={:.5f} \t val_loss={:.5f} \t sm_loss={:.5f} \t sm_val_loss={:.5f} \t time={:.2f}s'.format(
                    epoch + 1, avg_loss, avg_val_loss, sm_avg_loss, sm_avg_val_loss, elapsed_time))
                torch.save(clf.state_dict(), 'best-model-parameters.pt')
            else:
                stop_counts += 1
        
        pred_model = MoaModel(init_num, last_num)
        pred_model.load_state_dict(torch.load('best-model-parameters.pt'))         
        pred_model.eval()
        
        # validation check ----------------
        oof_epoch = np.zeros([X_valid2.size(0), y_train.shape[1]])
        target_epoch = np.zeros([X_valid2.size(0), y_train.shape[1]])
        for i, (x_batch, y_batch) in enumerate(valid_loader): 
                y_pred = pred_model(x_batch).detach()
                oof_epoch[i * batch_size:(i+1) * batch_size,:] = torch.clamp(torch.sigmoid(y_pred.cpu()), p_min, p_max)
                target_epoch[i * batch_size:(i+1) * batch_size,:] = y_batch.cpu().numpy()
        print("Fold {} log loss: {}".format(fold+1, mean_log_loss(target_epoch, oof_epoch)))
        scores.append(mean_log_loss(target_epoch, oof_epoch))
        oof[valid_index,:] = oof_epoch
        oof_targets[valid_index,:] = target_epoch
        #-----------------------------------
        
        # test predcition --------------
        test_preds = np.zeros([test_len, y_train.shape[1]])
        for i, (x_batch,) in enumerate(test_loader): 
            y_pred = pred_model(x_batch).detach()
            test_preds[i * batch_size:(i+1) * batch_size, :] = torch.clamp(torch.sigmoid(y_pred.cpu()), p_min, p_max)
        pred_value += test_preds / n_folds
        # ------------------------------
        
    print("Seed {}".format(seed_))
    for i, ele in enumerate(scores):
        print("Fold {} log loss: {}".format(i+1, scores[i]))
    print("Std of log loss: {}".format(np.std(scores)))
    print("Total log loss: {}".format(mean_log_loss(oof_targets, oof)))
    
    return oof, pred_value

In [14]:
seeds = [0,1,2,3,4]
mlp1_oof = np.zeros([len(mlp_train),fn_targets.shape[1]])
mlp1_test = np.zeros([len(mlp_test),fn_targets.shape[1]])

for seed_ in seeds:
    oof, pytorch_pred = modelling_torch(mlp_train, fn_targets, mlp_test, seed_, mlp_train.shape[1]-1, fn_targets.shape[1])
    mlp1_oof += oof / len(seeds)
    mlp1_test += pytorch_pred / len(seeds)

Fold 1
Best: Epoch 1 	 loss=0.41376 	 val_loss=0.02255 	 sm_loss=0.41373 	 sm_val_loss=0.02252 	 time=1.11s
Best: Epoch 2 	 loss=0.02017 	 val_loss=0.01888 	 sm_loss=0.02011 	 sm_val_loss=0.01882 	 time=0.87s
Best: Epoch 3 	 loss=0.01819 	 val_loss=0.01834 	 sm_loss=0.01820 	 sm_val_loss=0.01839 	 time=0.85s
Best: Epoch 4 	 loss=0.01749 	 val_loss=0.01800 	 sm_loss=0.01757 	 sm_val_loss=0.01803 	 time=1.06s
Best: Epoch 5 	 loss=0.01735 	 val_loss=0.01791 	 sm_loss=0.01746 	 sm_val_loss=0.01790 	 time=0.88s
Best: Epoch 9 	 loss=0.01720 	 val_loss=0.01767 	 sm_loss=0.01733 	 sm_val_loss=0.01772 	 time=0.86s
Best: Epoch 11 	 loss=0.01691 	 val_loss=0.01770 	 sm_loss=0.01704 	 sm_val_loss=0.01771 	 time=0.87s
Best: Epoch 12 	 loss=0.01667 	 val_loss=0.01755 	 sm_loss=0.01682 	 sm_val_loss=0.01760 	 time=0.85s
Best: Epoch 13 	 loss=0.01645 	 val_loss=0.01740 	 sm_loss=0.01661 	 sm_val_loss=0.01742 	 time=0.91s
Best: Epoch 14 	 loss=0.01620 	 val_loss=0.01738 	 sm_loss=0.01637 	 sm_val_loss=

In [15]:
check_mlp = np.zeros([y.shape[0], y.shape[1]])
check_mlp[cons_train_index,:] = mlp1_oof
print('OOF log loss: ', log_loss(np.ravel(y), np.ravel(check_mlp)))

OOF log loss:  0.01571810721478206


In [16]:
aucs = []
for task_id in range(y.shape[1]):
    aucs.append(roc_auc_score(y_true=y.iloc[:, task_id].values,
                              y_score=check_mlp[:, task_id]))
print(f"Overall AUC : {np.mean(aucs)}")

Overall AUC : 0.6741695449519476


# 1st tabnet

In [17]:
class LogitsLogLoss(Metric):
    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        logits = 1 / (1 + np.exp(-y_pred))
        
        aux = (1-y_true)*np.log(1-logits+1e-15) + y_true*np.log(logits+1e-15)
        return np.mean(-aux)

In [18]:
MAX_EPOCH=200

def seed_tabnet_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
def modelling_tabnet(tr, target, te, sample_seed):
    seed_tabnet_everything(sample_seed) 
    tabnet_params = dict(n_d=12, n_a=12, n_steps=1, gamma=1.3, seed = sample_seed,
                     lambda_sparse=0, optimizer_fn=torch.optim.Adam,
                     optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
                     mask_type='entmax',
                     scheduler_params=dict(mode="min",
                                           patience=5,
                                           min_lr=1e-5,
                                           factor=0.9,),
                     scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                     verbose=10,
                     )
    test_cv_preds = []
    
    NB_SPLITS = 5
    mskf = MultilabelStratifiedKFold(n_splits=NB_SPLITS, random_state=0, shuffle=True)
    oof_preds = np.zeros([len(tr),target.shape[1]])
    for fold_nb in range(NB_SPLITS):
        print("FOLDS : ", fold_nb+1)
        
        ## model
        val_idx = tr[:,-1] == fold_nb
        train_idx = tr[:,-1] != fold_nb
        X_train, y_train = tr[train_idx, :], target[train_idx, :]
        X_val, y_val = tr[val_idx, :], target[val_idx, :]
        X_train = np.delete(X_train, -1, 1)
        X_val = np.delete(X_val, -1, 1)
        
        model = TabNetRegressor(**tabnet_params)
        
        model.fit(X_train=X_train,
              y_train=y_train,
              eval_set=[(X_val, y_val)],
              eval_name = ["val"],
              eval_metric = ["logits_ll"],
              max_epochs=MAX_EPOCH,
              patience=20, batch_size=1024, virtual_batch_size=128,
              num_workers=1, drop_last=False,
              # use binary cross entropy as this is not a regression problem
              loss_fn=torch.nn.functional.binary_cross_entropy_with_logits)
        
        preds_val = model.predict(X_val)
        preds =  1 / (1 + np.exp(-preds_val))
        oof_preds[val_idx,:] = preds
        
        # preds on test
        preds_test = model.predict(te)
        test_cv_preds.append(1 / (1 + np.exp(-preds_test)))

    test_preds_all = np.stack(test_cv_preds)
    return oof_preds, test_preds_all

In [19]:
tabnet1_oof = np.zeros([len(tab_train),fn_targets.shape[1]])
tabnet1_test = np.zeros([len(tab_test),fn_targets.shape[1]])
seeds = [0]
for seed_ in seeds:
    oof_preds, test_preds_all = modelling_tabnet(tab_train, fn_targets, tab_test, seed_)
    tabnet1_oof += oof_preds / len(seeds)
    tabnet1_test += test_preds_all.mean(axis=0) / len(seeds)

FOLDS :  1
Device used : cuda
epoch 0  | loss: 0.56822 | val_logits_ll: 0.32031 |  0:00:01s
epoch 10 | loss: 0.02073 | val_logits_ll: 0.02057 |  0:00:11s
epoch 20 | loss: 0.01911 | val_logits_ll: 0.01923 |  0:00:21s
epoch 30 | loss: 0.01795 | val_logits_ll: 0.01885 |  0:00:32s
epoch 40 | loss: 0.01726 | val_logits_ll: 0.01974 |  0:00:43s
epoch 50 | loss: 0.01696 | val_logits_ll: 0.01791 |  0:00:54s
epoch 60 | loss: 0.01648 | val_logits_ll: 0.01779 |  0:01:04s
epoch 70 | loss: 0.01624 | val_logits_ll: 0.01918 |  0:01:15s
epoch 80 | loss: 0.01608 | val_logits_ll: 0.01816 |  0:01:24s
epoch 90 | loss: 0.01591 | val_logits_ll: 0.01762 |  0:01:36s

Early stopping occured at epoch 92 with best_epoch = 72 and best_val_logits_ll = 0.01751
Best weights from best epoch are automatically used!
FOLDS :  2
Device used : cuda
epoch 0  | loss: 0.56335 | val_logits_ll: 0.30359 |  0:00:00s
epoch 10 | loss: 0.02052 | val_logits_ll: 0.02025 |  0:00:11s
epoch 20 | loss: 0.01904 | val_logits_ll: 0.01894 |  

In [20]:
check_tabnet = np.zeros([y.shape[0], y.shape[1]])
check_tabnet[cons_train_index,:] = tabnet1_oof
print('OOF log loss: ', log_loss(np.ravel(y), np.ravel(check_tabnet)))

OOF log loss:  0.016317946358678424


# 1st svm

In [21]:
N_STARTS = 1
N_SPLITS = 5

svm0_oof = np.zeros([len(fn_train), fn_targets.shape[1]])
svm0_test = np.zeros([len(fn_test), fn_targets.shape[1]])

svm1_test = np.zeros([len(fn_test),fn_targets.shape[1]])
svm1_oof = np.zeros([fn_targets.shape[0],fn_targets.shape[1]]) 

for ind in tqdm(range(fn_targets.shape[1])):
    ind_target_sum = fn_targets[:, ind].sum()
    if ind_target_sum >= N_SPLITS and ind not in unbalanced_feats:               
        for seed in range(N_STARTS):
            for n in range(N_SPLITS):
                val_index = fn_train[:,-1] == n
                train_index = fn_train[:,-1] != n
                x_tr, x_val = fn_train[train_index], fn_train[val_index]
                y_tr, y_val = fn_targets[train_index,ind], fn_targets[val_index,ind]
                x_tr = np.delete(x_tr, -1, 1)
                x_val = np.delete(x_val, -1, 1)

                model = SVC(C = 40, cache_size = 2000)
                model.fit(x_tr, y_tr)
                svm0_test[:, ind] += model.decision_function(fn_test) / (N_SPLITS * N_STARTS)
                svm0_oof[val_index, ind] += model.decision_function(x_val) / N_STARTS

        for seed in range(N_STARTS):
            for n in range(N_SPLITS):
                val_index = fn_train[:,-1] == n
                train_index = fn_train[:,-1] != n            

                x_tr, x_val = svm0_oof[train_index, ind].reshape(-1, 1), svm0_oof[val_index, ind].reshape(-1, 1)
                y_tr, y_val = fn_targets[train_index,ind], fn_targets[val_index,ind]

                model = LogisticRegression(C = 35, max_iter = 1000)
                model.fit(x_tr, y_tr)
                svm1_test[:, ind] += model.predict_proba(svm0_test[:, ind].reshape(-1, 1))[:, 1] / (N_SPLITS * N_STARTS)
                svm1_oof[val_index, ind] += model.predict_proba(x_val)[:, 1] / N_STARTS
        score1 = log_loss(fn_targets[:, ind], svm0_oof[:, ind])
        score2 = log_loss(fn_targets[:, ind], svm1_oof[:, ind])
        print('SVM Target ind {} score {}: {}'.format(ind, score1, score2))

HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))

SVM Target ind 0 score 0.0267522871657328: 0.0048122212123293704
SVM Target ind 1 score 0.028325951116658196: 0.006770445869106968
SVM Target ind 2 score 0.0377679348222106: 0.008648690214872171
SVM Target ind 3 score 0.2990344481290532: 0.048714773921495466
SVM Target ind 4 score 0.4736868044966427: 0.07136280954991497
SVM Target ind 5 score 0.11487746841755518: 0.02234595137756862
SVM Target ind 6 score 0.0849778533499726: 0.01722326624283357
SVM Target ind 7 score 0.15107877203184092: 0.027914981102869423
SVM Target ind 8 score 0.018891922657283466: 0.004205393265934258
SVM Target ind 9 score 0.4117833910145857: 0.06133994484728045
SVM Target ind 10 score 0.5665533919108652: 0.08282704855719118
SVM Target ind 11 score 0.09173731824932935: 0.018122995986820847



RuntimeError: Exception occured! file=/opt/conda/envs/rapids/conda-bld/libcuml_1598469299551/work/cpp/src/svm/svc_impl.cuh line=66: Only binary classification is implemented at the moment
Obtained 64 stack frames
#0 in /opt/conda/envs/rapids/lib/python3.7/site-packages/cuml/common/../../../../libcuml++.so(_ZN8MLCommon9Exception16collectCallStackEv+0x3e) [0x7fa14940e66e]
#1 in /opt/conda/envs/rapids/lib/python3.7/site-packages/cuml/common/../../../../libcuml++.so(_ZN8MLCommon9ExceptionC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x71) [0x7fa14940f1e1]
#2 in /opt/conda/envs/rapids/lib/python3.7/site-packages/cuml/common/../../../../libcuml++.so(_ZN2ML3SVM6svcFitIdEEvRKNS_10cumlHandleEPT_iiS6_RKNS0_12svmParameterERN8MLCommon6Matrix12KernelParamsERNS0_8svmModelIS5_EEPKS5_+0x1628) [0x7fa149733718]
#3 in /opt/conda/envs/rapids/lib/python3.7/site-packages/cuml/svm/svc.cpython-37m-x86_64-linux-gnu.so(+0x24994) [0x7fa125713994]
#4 in /opt/conda/envs/rapids/lib/python3.7/site-packages/cuml/svm/svc.cpython-37m-x86_64-linux-gnu.so(+0x28c49) [0x7fa125717c49]
#5 in /opt/conda/bin/python(PyObject_Call+0x6e) [0x55764c6b275e]
#6 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x1f6a) [0x55764c759d6a]
#7 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0x5da) [0x55764c6a0bda]
#8 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0x387) [0x55764c6ef9e7]
#9 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4cb9) [0x55764c75cab9]
#10 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0x2f9) [0x55764c6a08f9]
#11 in /opt/conda/bin/python(PyEval_EvalCodeEx+0x44) [0x55764c6a1824]
#12 in /opt/conda/bin/python(PyEval_EvalCode+0x1c) [0x55764c6a184c]
#13 in /opt/conda/bin/python(+0x1dcafd) [0x55764c767afd]
#14 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0xe9) [0x55764c6f0069]
#15 in /opt/conda/bin/python(_PyCFunction_FastCallKeywords+0x21) [0x55764c6f0301]
#16 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4904) [0x55764c75c704]
#17 in /opt/conda/bin/python(_PyGen_Send+0x2a2) [0x55764c6f8212]
#18 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x1add) [0x55764c7598dd]
#19 in /opt/conda/bin/python(_PyGen_Send+0x2a2) [0x55764c6f8212]
#20 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x1add) [0x55764c7598dd]
#21 in /opt/conda/bin/python(_PyGen_Send+0x2a2) [0x55764c6f8212]
#22 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0x8d) [0x55764c6f000d]
#23 in /opt/conda/bin/python(_PyMethodDescr_FastCallKeywords+0x4f) [0x55764c6f704f]
#24 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4e0d) [0x55764c75cc0d]
#25 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0xfb) [0x55764c6ef75b]
#26 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x416) [0x55764c758216]
#27 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0xfb) [0x55764c6ef75b]
#28 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x6a0) [0x55764c7584a0]
#29 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0x2f9) [0x55764c6a08f9]
#30 in /opt/conda/bin/python(_PyFunction_FastCallDict+0x400) [0x55764c6a1c60]
#31 in /opt/conda/bin/python(_PyObject_Call_Prepend+0x63) [0x55764c6bfe03]
#32 in /opt/conda/bin/python(PyObject_Call+0x6e) [0x55764c6b275e]
#33 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x1f6a) [0x55764c759d6a]
#34 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0x5da) [0x55764c6a0bda]
#35 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0x387) [0x55764c6ef9e7]
#36 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x14e7) [0x55764c7592e7]
#37 in /opt/conda/bin/python(+0x16ccd9) [0x55764c6f7cd9]
#38 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0xe9) [0x55764c6f0069]
#39 in /opt/conda/bin/python(_PyCFunction_FastCallKeywords+0x21) [0x55764c6f0301]
#40 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4904) [0x55764c75c704]
#41 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0xab8) [0x55764c6a10b8]
#42 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0x387) [0x55764c6ef9e7]
#43 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x6a0) [0x55764c7584a0]
#44 in /opt/conda/bin/python(+0x16ccd9) [0x55764c6f7cd9]
#45 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0xe9) [0x55764c6f0069]
#46 in /opt/conda/bin/python(_PyCFunction_FastCallKeywords+0x21) [0x55764c6f0301]
#47 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4904) [0x55764c75c704]
#48 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0xab8) [0x55764c6a10b8]
#49 in /opt/conda/bin/python(_PyFunction_FastCallKeywords+0x387) [0x55764c6ef9e7]
#50 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x416) [0x55764c758216]
#51 in /opt/conda/bin/python(+0x16ccd9) [0x55764c6f7cd9]
#52 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0xe9) [0x55764c6f0069]
#53 in /opt/conda/bin/python(_PyCFunction_FastCallKeywords+0x21) [0x55764c6f0301]
#54 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4904) [0x55764c75c704]
#55 in /opt/conda/bin/python(_PyEval_EvalCodeWithName+0xab8) [0x55764c6a10b8]
#56 in /opt/conda/bin/python(_PyFunction_FastCallDict+0x1d5) [0x55764c6a1a35]
#57 in /opt/conda/bin/python(_PyObject_Call_Prepend+0x63) [0x55764c6bfe03]
#58 in /opt/conda/bin/python(PyObject_Call+0x6e) [0x55764c6b275e]
#59 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x1f6a) [0x55764c759d6a]
#60 in /opt/conda/bin/python(_PyGen_Send+0x14c) [0x55764c6f80bc]
#61 in /opt/conda/bin/python(_PyMethodDef_RawFastCallKeywords+0x8d) [0x55764c6f000d]
#62 in /opt/conda/bin/python(_PyMethodDescr_FastCallKeywords+0x4f) [0x55764c6f704f]
#63 in /opt/conda/bin/python(_PyEval_EvalFrameDefault+0x4e0d) [0x55764c75cc0d]


In [22]:
check_svm = np.zeros([y.shape[0], y.shape[1]])
check_svm[cons_train_index,:] = svm1_oof
print('OOF log loss: ', log_loss(np.ravel(y), np.ravel(check_svm)))

OOF log loss:  0.10983873120223939


# submission

In [23]:
from numba import njit
from scipy.optimize import minimize, fsolve

def log_loss_numpy(y_pred):
    y_true_ravel = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = np.where(y_true_ravel == 1, - np.log(y_pred), - np.log(1 - y_pred))
    return loss.mean()

def func_numpy_metric(weights):
    oof_blend = np.tensordot(weights, oof, axes = ((0), (0)))
    return log_loss_numpy(oof_blend)

def grad_func(weights):
    oof_clip = np.clip(oof, 1e-15, 1 - 1e-15)
    gradients = np.zeros(oof.shape[0])
    for i in range(oof.shape[0]):
        a, b, c = y_true, oof_clip[i], np.zeros((oof.shape[1], oof.shape[2]))
        for j in range(oof.shape[0]):
            if j != i:
                c += weights[j] * oof_clip[j]
        gradients[i] = -np.mean((-a*b+(b**2)*weights[i]+b*c)/((b**2)*(weights[i]**2)+2*b*c*weights[i]-b*weights[i]+(c**2)-c))
    return gradients

@njit
def grad_func_jit(weights):
    oof_clip = np.minimum(1 - 1e-15, np.maximum(oof, 1e-15))
    gradients = np.zeros(oof.shape[0])
    for i in range(oof.shape[0]):
        a, b, c = y_true, oof_clip[i], np.zeros((oof.shape[1], oof.shape[2]))
        for j in range(oof.shape[0]):
            if j != i:
                c += weights[j] * oof_clip[j]
        gradients[i] = -np.mean((-a*b+(b**2)*weights[i]+b*c)/((b**2)*(weights[i]**2)+2*b*c*weights[i]-b*weights[i]+(c**2)-c))
    return gradients

In [24]:
y_true = pd.read_csv('../input/lish-moa/train_targets_scored.csv', index_col = 'sig_id').values

oof_dict = {'Model 1': check_mlp, 
            'Model 2': check_tabnet, 
            'Model 3': check_svm
           }

oof = np.zeros((len(oof_dict), y_true.shape[0], y_true.shape[1]))
for i in range(oof.shape[0]):
    oof[i] = list(oof_dict.values())[i]

In [25]:
tol = 1e-10
init_guess = [1 / oof.shape[0]] * oof.shape[0]
bnds = [(0, 1) for _ in range(oof.shape[0])]
cons = {'type': 'eq', 
        'fun': lambda x: np.sum(x) - 1, 
        'jac': lambda x: [1] * len(x)}

print('Inital Blend OOF:', func_numpy_metric(init_guess))
res_scipy = minimize(fun = func_numpy_metric, 
                     x0 = init_guess, 
                     method = 'SLSQP', 
                     jac = grad_func_jit, # grad_func 
                     bounds = bnds, 
                     constraints = cons, 
                     tol = tol)
print('Optimised Blend OOF:', res_scipy.fun)
print('Optimised Weights:', res_scipy.x)

Inital Blend OOF: 0.01602207441414796
Optimised Blend OOF: 0.01560899941528477
Optimised Weights: [0.72694778 0.27305222 0.        ]


In [26]:
check = 0
for i in range(oof.shape[0]):
    check += res_scipy.x[i] * list(oof_dict.values())[i]
print('OOF log loss: ', log_loss(np.ravel(y), np.ravel(check)))

OOF log loss:  0.01560899941528477


In [27]:
test_dict = {'Model 1': mlp1_test, 
            'Model 2': tabnet1_test, 
            'Model 3': svm1_test
           }
final = 0
for i in range(oof.shape[0]):
    final += res_scipy.x[i] * list(test_dict.values())[i]

sub = pd.read_csv(DATA_DIR + 'sample_submission.csv')
sub.loc[cons_test_index,target_feats] = final #0.15 * svm1_test + 0.35 * tabnet1_test + 0.5 * mlp1_test 
sub.loc[noncons_test_index,target_feats] = 0
sub.to_csv('submission.csv', index=False)