# Can we use non-scored targets to create synthetic variables?
 
I will try to use Rapids-SVC models to trained non-scored targets and the results of the predictions will be concatenate to the features, including g- c- PCA  and categorical. Then, a neural network (NN) model will be trained exclusively for the scored targets.

Referenced notebook for rapids and beginning of this notebook is forked from:
https://www.kaggle.com/gogo827jz/rapids-svm-on-gpu-6000-models-in-1-hour

NN referenced notebooks:
https://www.kaggle.com/kushal1506/moa-pytorch-0-01859-rankgauss-pca-nn
https://www.kaggle.com/riadalmadani/pytorch-cv-0-0145-lb-0-01839

predictors
https://www.kaggle.com/demetrypascal/t-test-pca-rfe-logistic-regression


V3- No controls were included in the complete analysis. Only a selection of the non-scored predicted features, based on log-loss lower or equal to 0.02 were concatenated.

In v5, I will concatenate all possible non-scored predicted features.

# RAPIDS SVC for MoA

RAPIDS cuML is a great library alows training sklearn models on GPU. Available classification models include Logistic Regresssion, SVC, Random Forest and KNN, etc..

Konrad has tried to train SVR models in [SVR Modeds][1]. In this notebook, I try training 3090 SVC models in 2 hours on GPU, which should take forever on CPU...

[1]: https://www.kaggle.com/konradb/build-model-svm

# Try to use the nonscored targets to make synthetic variables

In [None]:
import warnings, sys
warnings.filterwarnings("ignore")

# Thanks to Chris's RAPIDS dataset, it only takes around 1 min to install offline
!cp ../input/rapids/rapids.0.15.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

In [None]:
#libraries for SVC
import os
import gc
import pickle
import datetime
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold

from tqdm.notebook import tqdm
from time import time

In [None]:
#libraries for neural network

import random

import matplotlib.pyplot as plt

import copy
import seaborn as sns
import collections


from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler,MinMaxScaler,QuantileTransformer,PowerTransformer,RobustScaler,Normalizer

sys.path.append('../input/rank-gauss')
from gauss_rank_scaler import GaussRankScaler

from sklearn.decomposition import PCA

from sklearn.feature_selection import VarianceThreshold

from joblib import dump, load

sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

# Data Preparation

Make everything without controls: SVC of nonscored targets and NN integrating synthetic variables

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')

#copies from dataframes
df_train=train_features.copy()
df_train_targets=train_targets.copy()
df_test=test_features.copy()
df_train_targets_nonscored=train_targets_nonscored.copy()
ss = pd.read_csv('../input/lish-moa/sample_submission.csv')

cols = [c for c in ss.columns.values if c != 'sig_id']

In [None]:
#elimination of controls
df_train = df_train[df_train['cp_type']!='ctl_vehicle']
df_test = df_test[df_test['cp_type']!='ctl_vehicle']

df_train_targets = df_train_targets.iloc[df_train.index]
df_train_targets_nonscored=df_train_targets_nonscored.iloc[df_train.index]

df_train.reset_index(drop=True, inplace=True)
df_test.reset_index(drop=True, inplace=True)
df_train_targets.reset_index(drop=True, inplace=True)
df_train_targets_nonscored.reset_index(drop=True, inplace=True)

del df_train_targets['sig_id']
del df_train_targets_nonscored['sig_id']

In [None]:
def preprocess(df):
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    del df['sig_id']
    return df

def log_loss_metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)

def log_loss_metric_n(y_true, y_pred):
    metrics = []
    for _target in df_train_targets_nonscored.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)


train = preprocess(df_train)
test = preprocess(df_test)

#del train_targets['sig_id']
#del train_targets_nonscored['sig_id']


In [None]:
top_feats = [  0,   1,   2,   3,   5,   6,   8,   9,  10,  11,  12,  14,  15,
        16,  18,  19,  20,  21,  23,  24,  25,  27,  28,  29,  30,  31,
        32,  33,  34,  35,  36,  37,  39,  40,  41,  42,  44,  45,  46,
        48,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
        63,  64,  65,  66,  68,  69,  70,  71,  72,  73,  74,  75,  76,
        78,  79,  80,  81,  82,  83,  84,  86,  87,  88,  89,  90,  92,
        93,  94,  95,  96,  97,  99, 100, 101, 103, 104, 105, 106, 107,
       108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
       121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 132, 133, 134,
       135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
       149, 150, 151, 152, 153, 154, 155, 157, 159, 160, 161, 163, 164,
       165, 166, 167, 168, 169, 170, 172, 173, 175, 176, 177, 178, 180,
       181, 182, 183, 184, 186, 187, 188, 189, 190, 191, 192, 193, 195,
       197, 198, 199, 202, 203, 205, 206, 208, 209, 210, 211, 212, 213,
       214, 215, 218, 219, 220, 221, 222, 224, 225, 227, 228, 229, 230,
       231, 232, 233, 234, 236, 238, 239, 240, 241, 242, 243, 244, 245,
       246, 248, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259, 260,
       261, 263, 265, 266, 268, 270, 271, 272, 273, 275, 276, 277, 279,
       282, 283, 286, 287, 288, 289, 290, 294, 295, 296, 297, 299, 300,
       301, 302, 303, 304, 305, 306, 308, 309, 310, 311, 312, 313, 315,
       316, 317, 320, 321, 322, 324, 325, 326, 327, 328, 329, 330, 331,
       332, 333, 334, 335, 338, 339, 340, 341, 343, 344, 345, 346, 347,
       349, 350, 351, 352, 353, 355, 356, 357, 358, 359, 360, 361, 362,
       363, 364, 365, 366, 368, 369, 370, 371, 372, 374, 375, 376, 377,
       378, 379, 380, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391,
       392, 393, 394, 395, 397, 398, 399, 400, 401, 403, 405, 406, 407,
       408, 410, 411, 412, 413, 414, 415, 417, 418, 419, 420, 421, 422,
       423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435,
       436, 437, 438, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450,
       452, 453, 454, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465,
       466, 468, 469, 471, 472, 473, 474, 475, 476, 477, 478, 479, 482,
       483, 485, 486, 487, 488, 489, 491, 492, 494, 495, 496, 500, 501,
       502, 503, 505, 506, 507, 509, 510, 511, 512, 513, 514, 516, 517,
       518, 519, 521, 523, 525, 526, 527, 528, 529, 530, 531, 532, 533,
       534, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547,
       549, 550, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563,
       564, 565, 566, 567, 569, 570, 571, 572, 573, 574, 575, 577, 580,
       581, 582, 583, 586, 587, 590, 591, 592, 593, 595, 596, 597, 598,
       599, 600, 601, 602, 603, 605, 607, 608, 609, 611, 612, 613, 614,
       615, 616, 617, 619, 622, 623, 625, 627, 630, 631, 632, 633, 634,
       635, 637, 638, 639, 642, 643, 644, 645, 646, 647, 649, 650, 651,
       652, 654, 655, 658, 659, 660, 661, 662, 663, 664, 666, 667, 668,
       669, 670, 672, 674, 675, 676, 677, 678, 680, 681, 682, 684, 685,
       686, 687, 688, 689, 691, 692, 694, 695, 696, 697, 699, 700, 701,
       702, 703, 704, 705, 707, 708, 709, 711, 712, 713, 714, 715, 716,
       717, 723, 725, 727, 728, 729, 730, 731, 732, 734, 736, 737, 738,
       739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751,
       752, 753, 754, 755, 756, 758, 759, 760, 761, 762, 763, 764, 765,
       766, 767, 769, 770, 771, 772, 774, 775, 780, 781, 782, 783, 784,
       785, 787, 788, 790, 793, 795, 797, 799, 800, 801, 805, 808, 809,
       811, 812, 813, 816, 819, 820, 821, 822, 823, 825, 826, 827, 829,
       831, 832, 833, 834, 835, 837, 838, 839, 840, 841, 842, 844, 845,
       846, 847, 848, 850, 851, 852, 854, 855, 856, 858, 860, 861, 862,
       864, 867, 868, 870, 871, 873, 874]

print(len(top_feats))

# 3090 CuML SVC Models

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(train.values[:, top_feats])
x_tt = scaler.transform(test.values[:, top_feats])

In [None]:
from cuml.svm import SVC

N_STARTS =1 #3
N_SPLITS =5 #5

res = df_train_targets_nonscored.copy()
res.loc[:, df_train_targets_nonscored.columns] = 0

#make a dataframe to collect the sample data with dimensions of sample and columns of train_targets_nonscored
ss_non=pd.DataFrame(np.zeros((df_test.shape[0],len(df_train_targets_nonscored.columns))),columns=df_train_targets_nonscored.columns)
collect_tar_failure=[]
collect_tar_log012=[]
collect_tar=[]

for tar in tqdm(range(df_train_targets_nonscored.shape[1])):
    print(tar)
    
    start_time = time()
    targets = df_train_targets_nonscored.values[:, tar]
    
    if targets.sum() >= N_SPLITS:
        
        for seed in range(N_STARTS):

            skf = StratifiedKFold(n_splits = N_SPLITS, random_state = seed, shuffle = True)

            for n, (tr, te) in enumerate(skf.split(targets, targets)):

                x_tr, x_val = X[tr], X[te]
                y_tr, y_val = targets[tr], targets[te]
                
                if y_tr.sum() >= 5:

                    model = SVC(probability = True, cache_size = 2000)
                    model.fit(x_tr, y_tr)
                    ss_non.loc[:, df_train_targets_nonscored.columns[tar]] += model.predict_proba(x_tt)[:, 1] / (N_SPLITS * N_STARTS)
                    res.loc[te, df_train_targets_nonscored.columns[tar]] += model.predict_proba(x_val)[:, 1] / N_STARTS

                else:

                    print(f'Target {tar}: Seed {seed}: Fold {n}: SVC probabilistic output failure.')
                    collect_tar_failure.append(tar)
                    model = SVC(cache_size = 2000)
                    model.fit(x_tr, y_tr)
                    ss_non.loc[:, df_train_targets_nonscored.columns[tar]] += model.predict(x_tt) / (N_SPLITS * N_STARTS)
                    res.loc[te, df_train_targets_nonscored.columns[tar]] += model.predict(x_val) / N_STARTS
    
        score = log_loss(df_train_targets_nonscored.loc[:, df_train_targets_nonscored.columns[tar]], res.loc[:, df_train_targets_nonscored.columns[tar]])
        if (score<=0.02) and (tar not in collect_tar_failure):
            collect_tar_log012.append(tar)
        if tar not in collect_tar_failure:
            collect_tar.append(tar)
        
    print(f'[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] Target {tar}:', score)

In [None]:
print(f'Model OOF Metric: {log_loss_metric_n(df_train_targets_nonscored, res)}')


In [None]:
print(len(collect_tar_log012))

In [None]:
ss_non.shape,res.shape

In [None]:
#synthetic variables from SVC
df_train_svc = pd.DataFrame(np.array(res.iloc[:,collect_tar_log012]), columns=[f'svc-{i}' for i in range(len(collect_tar_log012))])
df_test_svc = pd.DataFrame(np.array(ss_non.iloc[:,collect_tar_log012]), columns=[f'svc-{i}' for i in range(len(collect_tar_log012))])

Prepare data for Neural Network.
Make analysis without controls

In [None]:

#svc datasets
df_train_svc=df_train_svc.iloc[df_train.index]
df_test_svc=df_test_svc.iloc[df_test.index]
df_train_svc.reset_index(drop=True, inplace=True)
df_test_svc.reset_index(drop=True, inplace=True)

print(df_train.shape,df_train_targets.shape,df_test.shape,df_train_svc.shape, df_test_svc.shape)

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
df_train_targets.head()

In [None]:
data_all = pd.concat([df_train, df_test], ignore_index=True)
print(data_all.shape)
data_all.head()

In [None]:
def process_data(data):
    #one hot encoding
    #no controls
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    #with controls
    #data = pd.get_dummies(data, columns=['cp_type','cp_time','cp_dose'])
    return data

In [None]:
#data_all = preprocess(data_all)
#one_hot
data_all=process_data(data_all)
data_all.head()

In [None]:
#change 

In [None]:
#change numericals for predictors tstudent significant
numerical_features=list(train_features.columns[4:])

print(numerical_features[:10],numerical_features[860:])

In [None]:
data_all_numerical=data_all.loc[:,numerical_features]

In [None]:
data_all_numerical.head()

Apply variance threshold of 0.9 to the numerical data, then apply scaling a scaler and then PCA

In [None]:
def variance_threshold(dataframe):   
    'return dataframe containing all features with higher variance are the threshold'
    #return numpy data
    selector=VarianceThreshold(threshold=0.90)
    selector.fit_transform(dataframe)
    mask=selector.get_support(indices=False)
    dataframe_s=dataframe.iloc[:,mask]
    print('number of features removed',dataframe.shape[1]-dataframe_s.shape[1])
    return dataframe_s

def scaling_data(scaler_name,dataframe):
    'Apply a scaling method to a dataframe, which include exclusively numerical data' 
    'quantile_normal, rank_gauss, min_max, standard, gaussian_yeo, normal_l2, robust_scaler'
    #return numpy data
    if scaler_name=='rank_gauss':
        #?same quantile transform normal
        scaler =GaussRankScaler()
        
    elif scaler_name=='standard':
        scaler= StandardScaler()
        
    elif scaler_name=='min_max':
        scaler= MinMaxScaler()
        
    elif scaler_name=='gaussian_yeo':
        scaler=PowerTransformer(method='yeo-johnson', standardize=True, copy=False)
        
    elif  scaler_name=='robust_scaler':
        scaler=RobustScaler(with_centering=True, with_scaling=True, quantile_range=(25.0, 75.0), copy=False)
        #scaler=RobustScaler(with_centering=True, with_scaling=True, quantile_range=(10.0, 90.0), copy=False)
    
    elif scaler=='quantile_normal':
        scaler=QuantileTransformer(n_quantiles=1000, output_distribution='normal', random_state=None, copy=False)
        
    elif scaler=='normal_l2':
        scaler=Normalizer(norm='l2',copy=False)
                
    dataframe.loc[:,:]=scaler.fit_transform( dataframe.loc[:,:])
    
    return dataframe

In [None]:
#features
data_all_var=variance_threshold(data_all_numerical)
scaler_name='rank_gauss'
#scaler_name='standard'
data_all_sc=scaling_data(scaler_name,data_all_numerical)


In [None]:
data_all_sc.head()

In [None]:
#check if there are nan values
#np.isnan(data_all_sc).any()
data_all_sc.isnull().values.any()

In [None]:
def PCA_descriptors(data,ncompo_genes,ncompo_cells):
    'introduce PCA descriptors'
    data_all=data.copy()
    #base_seed = 2020

    GENES = [col for col in data_all.columns if col.startswith('g-')]
    CELLS = [col for col in data_all.columns if col.startswith('c-')]

    pca_genes = PCA(n_components=ncompo_genes, random_state=42).fit_transform(data_all[GENES])
    pca_cells = PCA(n_components=ncompo_cells, random_state=42 ).fit_transform(data_all[CELLS])
    #pca_genes = PCA(n_components=ncompo_genes).fit_transform(data_all[GENES])
    #pca_cells = PCA(n_components=ncompo_cells).fit_transform(data_all[CELLS])
    pca_genes = pd.DataFrame(pca_genes, columns=[f'pca_g-{i}' for i in range(ncompo_genes)])
    pca_cells = pd.DataFrame(pca_cells, columns=[f'pca_c-{i}' for i in range(ncompo_cells)])
    data_pca = pd.concat([pca_genes, pca_cells], axis=1)
    
    return data_pca

In [None]:
#2 add PCA features

scaler_name='gaussian_yeo'

data_all_g=scaling_data(scaler_name,data_all_numerical)

ncompo_genes = 70#70
ncompo_cells = 15

data_pca=PCA_descriptors(data_all_g,ncompo_genes,ncompo_cells)
#data_pca=data_all_pca.iloc[:,872:]

In [None]:
data_pca.head()

In [None]:
#one hot categorical data
data_cat=data_all.iloc[:,873:]
data_cat.head()

In [None]:
#join all data
data_all_new=pd.concat([data_cat,data_all_sc, data_pca],axis=1)

data_all_new.head()

In [None]:
#Preparation for the neural network: separate data in train and test
rows_train=df_train.shape[0]
train=data_all_new[:rows_train]
test=data_all_new[df_train.shape[0]:]
test.reset_index(drop=True,inplace=True)

In [None]:
#join scv data
train=pd.concat([train,df_train_svc],axis=1)

test=pd.concat([test,df_test_svc],axis=1)
train.tail()

In [None]:
test.head()

In [None]:
train.shape,df_train_targets.shape

Add training model (NN) -Riad  & Kushal-

In [None]:
#preparation of the data. concatenate targets to the train data 

sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')
#del df_train_targets['sig_id']
train=pd.concat([train,df_train_targets],axis=1)
target = train[df_train_targets.columns]

target_cols = target.columns.values.tolist()


In [None]:
train.head()

In [None]:
len(target_cols)

In [None]:
train.shape,target.shape

In [None]:
#cv folds
folds = train.copy()

mskf = MultilabelStratifiedKFold(n_splits=7)

for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)):
    folds.loc[v_idx, 'kfold'] = int(f)

folds['kfold'] = folds['kfold'].astype(int)
folds

In [None]:

print(train.shape)
print(folds.shape)
print(test.shape)
print(target.shape)

print(sample_submission.shape)

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [None]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct
    

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
#         print(inputs.shape)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    return final_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds


In [None]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)
        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(0.26)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(0.26)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
        
    def forward(self, x):
        x = self.batch_norm1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        return x
    

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))    

In [None]:
#to change

feature_cols = [c for c in folds.columns if c not in target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold','sig_id']]
len(feature_cols)

In [None]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS =25 #25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 7           
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

num_features=len(feature_cols)
num_targets=len(target_cols)
hidden_size=1300
print(num_features,num_targets)

In [None]:
len(train)

In [None]:
def run_training(fold, seed):
    
    seed_everything(seed)
    
    train = folds
    test_ = test
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )
    
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    loss_fn = nn.BCEWithLogitsLoss()
    loss_tr = SmoothBCEwLogits(smoothing =0.001)
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
   
    oof = np.zeros((len(train), target.shape[1]))
    best_loss = np.inf
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"FOLD{fold}_.pth")
            
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )
    
    model.load_state_dict(torch.load(f"FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

In [None]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_training(fold, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
# Averaging on multiple SEEDS

SEED = [0,1,2] #<-- Update


oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))
#SEED = [0,1,2,3,4,5,6]
for seed in SEED:
    
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

train[target_cols] = oof
test[target_cols] = predictions

In [None]:
oof.shape

In [None]:
df_train_targets[target_cols].shape

In [None]:
test[target_cols].shape

In [None]:
#valid_results = df_train_targets.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
def log_loss_metric(y_true, y_pred):
    metrics = []
    for _target in df_train_targets.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)


y_true = df_train_targets[target_cols].values
y_pred = oof

score = 0
for i in range(len(target_cols)):
    score_ = log_loss(y_true[:, i], y_pred[:, i])
    print(i,target_cols[i],score_ )
    score += score_ / target.shape[1]
    
print("CV log_loss: ", score)

In [None]:
# submit
test_pred=test[target_cols]

sig_id = test_features[test_features['cp_type']!='ctl_vehicle'].sig_id.reset_index(drop=True)

test_pred['sig_id'] = sig_id

sub = pd.merge(test_features[['sig_id']], test_pred, on='sig_id', how='left')
sub.fillna(0, inplace=True)

sub.to_csv('submission.csv', index=False)

In [None]:
sub.shape

In [None]:
sub