In [1]:
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns
import pickle

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tqdm.notebook import tqdm
from notebook import notebookapp
import urllib
import json
import ipykernel
from pathlib import Path

from scipy.stats import norm
# from sklearn.naive_bayes import ComplementNB
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.kernel_ridge import KernelRidge
# from scipy.stats import ks_2samp
import pickle
import gc


import warnings
warnings.filterwarnings('ignore')

import bz2
import pickle
import _pickle as cPickle

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

TEST_PRIVATE = False
TEST_LOCAL = False

### Functions

In [2]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def log_loss_score(actual, predicted,  eps=1e-15):

        """
        :param predicted:   The predicted probabilities as floats between 0-1
        :param actual:      The binary labels. Either 0 or 1.
        :param eps:         Log(0) is equal to infinity, so we need to offset our predicted values slightly by eps from 0 or 1
        :return:            The logarithmic loss between between the predicted probability assigned to the possible outcomes for item i, and the actual outcome.
        """
        
        p1 = actual * np.log(predicted+eps)
        p0 = (1-actual) * np.log(1-predicted+eps)
        loss = p0 + p1

        return -loss.mean()
    
def log_loss_multi(y_true, y_pred):
    M = y_true.shape[1]
    results = np.zeros(M)
    for i in range(M):
        results[i] = log_loss_score(y_true[:,i], y_pred[:,i])
    return results.mean()

### DataBase

In [3]:
class CFG:
    num_folds = 10
    use_var_enc = True
    variance_thres = 0.60
    quantile_transform = True
    use_rankgauss = True
    
    selec_top = True
    original_feats = True #False remove also top features
    
    use_pca = True
    pca_comp_genes = 29 #30
    pca_comp_cells = 4 #18
    
    use_fastica = False
    fastica_comp_genes = 10
    fastica_comp_cells = 5
    
    use_kridge = True
    use_xgb = False
    use_bayes = False


### Load Databases

In [4]:
seed_everything(seed=42)

train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
train_drug = pd.read_csv('../input/lish-moa/train_drug.csv')

drug_dict = dict(zip(train_drug['sig_id'],train_drug['drug_id']))

test_features = pd.read_csv('../input/lish-moa/test_features.csv')

if TEST_PRIVATE:
    id1 = [test_features['sig_id'].values]
    for other_index in ['A', 'B', 'C']:
        id1.append(id1[0]+other_index)
    test_features = pd.concat([test_features, test_features, test_features, test_features],sort=False).reset_index(drop=True)
    test_features['sig_id'] = np.concatenate(id1)
    print(test_features.shape)

sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

train_orig = train_features.merge(train_targets_scored, on='sig_id')
train_orig = train_orig.merge(train_drug, on='sig_id', how='left')

train_noctl = train_orig[train_orig['cp_type']!='ctl_vehicle'].drop('cp_type', axis=1).reset_index(drop=True)
train_ctl = train_orig[train_orig['cp_type']=='ctl_vehicle'].drop('cp_type', axis=1).reset_index(drop=True)

test_noctl = test_features[test_features['cp_type']!='ctl_vehicle'].drop('cp_type', axis=1).reset_index(drop=True)
test_ctl = test_features[test_features['cp_type']=='ctl_vehicle'].drop('cp_type', axis=1).reset_index(drop=True)

target = train_noctl[train_targets_scored.columns]

GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]
target_cols = target.drop('sig_id', axis=1).columns.values.tolist()
other_feats = []

# folds = train_noctl.copy()
# mskf = MultilabelStratifiedKFold(n_splits=CFG.num_folds, shuffle=True, random_state=43)

# for f, (t_idx, v_idx) in enumerate(mskf.split(X=train_noctl, y=target)):
#     folds.loc[v_idx, 'kfold'] = int(f)

# folds['kfold'] = folds['kfold'].astype(int)

folds = train_noctl.copy()

print(train_orig.shape)
print(target.shape)
print(sample_submission.shape)
print(folds.shape, test_noctl.shape)

(23814, 1083)
(21948, 207)
(3982, 207)
(21948, 1082) (3624, 875)


### Create Kridge Features

In [5]:
min_num_ones = 20
folds_kridge = pd.read_csv('../input/kridgefeats25oct/folds.csv')
test_kridge = test_noctl[['sig_id']].copy()

colsk_selec = []
min_num_ones = 20
for ncol, col_target in enumerate(tqdm(target_cols)):
    name_feat = f'kridge_{col_target}'
    if np.sum(folds[col_target].sum())>=min_num_ones:
        colsk_selec.append(name_feat)
        # Save all
        with bz2.BZ2File(f'../input/kridgefeats25oct/feats__{name_feat}.pbz2','rb') as file:
            model = cPickle.load(file)
            feats_trn_uncorr = cPickle.load(file)
            
#         with open(f'../input/kridgefeats/model__{name_feat}__{col_target}.pkl', 'rb') as file:
#             model = pickle.load(file)
#             feats_trn_uncorr = pickle.load(file)

        # With Test DB
        X_trn = folds[feats_trn_uncorr].astype(float).values
        X_test = test_noctl[feats_trn_uncorr].astype(float).values
        y = folds[col_target].astype(float).values 

        scaler = StandardScaler()
        X_trn = scaler.fit_transform(X_trn)
        X_test = scaler.transform(X_test)
        
        # Training matrix has been removed to save memory
        model.X_fit_ = X_trn
    #     model = KernelRidge(alpha = alpha, kernel = 'rbf')
    #     model.fit(X_trn, y)
        test_kridge[name_feat] = model.predict(X_test)

kridge_cols = ['sig_id']+colsk_selec
print(folds_kridge[kridge_cols].shape, test_kridge[kridge_cols].shape)

# Comprueba con original
if TEST_LOCAL and not TEST_PRIVATE:
    test_feat_orig = pd.read_csv('../input/kridgefeats25oct/test.csv')
    print(np.sum(test_feat_orig[colsk_selec].values - test_kridge[colsk_selec].values))
    del test_feat_orig
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


(21948, 154) (3624, 154)


In [6]:
# min_num_ones = 20
# folds_kridge = pd.read_csv('../input/kridgefeats/folds.csv')
# test_kridge = test_noctl['sig_id'].copy()

# colsk_selec = []
# min_num_ones = 20
# for ncol, col_target in enumerate(tqdm(target_cols)):
#     name_feat = f'kridge_{col_target}'
#     if np.sum(folds[col_target].sum())>=min_num_ones:
#         colsk_selec.append(name_feat)
#         # Save all
#         with open(f'../input/kridgefeats/model__{name_feat}__{col_target}.pkl', 'rb') as file:
#             model = pickle.load(file)
#             feats_trn_uncorr = pickle.load(file)

#         # With Test DB
#         X_trn = folds[feats_trn_uncorr].astype(float).values
#         X_test = test_noctl[feats_trn_uncorr].astype(float).values
#         y = folds[col_target].astype(float).values 

#         scaler = StandardScaler()
#         X_trn = scaler.fit_transform(X_trn)
#         X_test = scaler.transform(X_test)

#     #     model = KernelRidge(alpha = alpha, kernel = 'rbf')
#     #     model.fit(X_trn, y)
#         test_kridge[name_feat] = model.predict(X_test)

# kridge_cols = ['sig_id']+colsk_selec
# print(folds_kridge[kridge_cols].shape, test_kridge[kridge_cols].shape)

# # Comprueba con original
# if False:
#     test_feat_orig = pd.read_csv('../input/kridgefeats/test.csv')
#     print(np.sum(test_feat_orig[colsk_selec].values - test_kridge[colsk_selec].values))
#     del test_feat_orig
#     gc.collect()

In [7]:
# Create KRIDGE Features
# def trimm_correlated(df_in, threshold):
#     df_corr = df_in.corr(method='pearson', min_periods=1)
#     df_not_correlated = ~(df_corr.mask(np.tril(np.ones([len(df_corr)]*2, dtype=bool))).abs() > threshold).any()
#     un_corr_idx = df_not_correlated.loc[df_not_correlated[df_not_correlated.index] == True].index
#     return un_corr_idx

# from scipy.stats import norm
# from sklearn.naive_bayes import ComplementNB
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import roc_auc_score
# from sklearn.preprocessing import MinMaxScaler, StandardScaler
# from sklearn.kernel_ridge import KernelRidge
# from scipy.stats import ks_2samp
# import pickle

# folds_feat = folds.copy()
# test_feat = test_noctl.copy()

# folds_feat['cp_dose'] = (folds_feat['cp_dose']=='D2').astype(float)
# folds_feat['cp_time'] /= 72.0

# test_feat['cp_dose'] = (test_feat['cp_dose']=='D2').astype(float)
# test_feat['cp_time'] /= 72.0

# # C_logistic = 0.01
# thr= 0.05 #p-value thr
# thr2 = 0.93 #Correlation Thr

# # for thr in [0.05]: #1, 0.03, 0.05, 0.07, 0.09]:
# #     for thr2 in [0.90, 0.93, 0.95, 0.97, 1.00]:
# res = []
# res_best = []
# features_models = []
# list_target_cols = []
# list_new_cols = []
# scaler = StandardScaler()
# list_AUC = []
# list_loss = []

# for ncol, col_target in enumerate(tqdm(target_cols)):
# #             if ncol<=50: #00:
#     list_target_cols.append(col_target)
#     name_feat = f'kridge_{col_target}'
#     list_new_cols.append(name_feat)

#     # Compare distribution of positives and negatives with kolgomorov
#     pvalues = []
#     for col_feat in GENES+CELLS:
#         values_orig = train_noctl[col_feat].values
#         target_orig = train_noctl[col_target].values
#         target = target_orig[values_orig> -4]
#         values = values_orig[values_orig> -4]
#         positive = values[target==1]
#         negative = values[target==0]
#         if len(positive)>0:
#             pvalue = ks_2samp(positive, negative)[1]
#         else:
#             pvalue = 1.0
#         pvalues.append(pvalue)
#     pvalues = np.array(pvalues)

#     # Search best model with different thresholds
#     best_auc = 0.0
#     best_loss = 999999.9
#     best_thr = 0.0 # thr=pvalue
#     best_thr2 = 0.0 #th2=correlation
#     best_lenfeats = 0
#     best_lenfeats_uncorr = 0
#     best_list_feats = []
#     alpha = 100
#     folds_feat[name_feat] = 0.0
#     feats_trn = np.array(GENES+CELLS)[pvalues<thr]
#     feats_trn_uncorr = trimm_correlated(folds_feat[feats_trn], thr2).tolist()
#     if len(feats_trn_uncorr)>0:
#         X_trn = folds_feat.loc[:, feats_trn_uncorr].astype(float).values
#         X_trn = scaler.fit_transform(X_trn)

#         for fold in range(CFG.num_folds):
#             X_trn_fold = X_trn[folds_feat['kfold']!=fold, :]
#             X_val_fold = X_trn[folds_feat['kfold']==fold, :]
#             y_trn_fold = folds_feat.loc[folds_feat['kfold']!=fold, col_target].astype(float).values
#             if np.sum(y_trn_fold)>10:
#                 model = KernelRidge(alpha = alpha, kernel = 'rbf')
# #                 model = LogisticRegression(C=C_logistic)
#                 model.fit(X_trn_fold, y_trn_fold)
# #                 folds_feat.loc[folds_feat['kfold']==fold, name_feat] = model.predict_proba(X_val_fold)[:,1]
#                 folds_feat.loc[folds_feat['kfold']==fold, name_feat] = model.predict(X_val_fold)

#         auc = roc_auc_score(folds_feat[col_target].values, folds_feat[name_feat].values)
#         list_AUC.append(auc)
#         loss = log_loss(folds_feat[col_target].values, folds_feat[name_feat].values)
#         list_loss.append(loss)
#         lenfeats = len(feats_trn)
#         lenfeats_uncorr = len(feats_trn_uncorr)
#         res.append(dict({'ncol':ncol, 'col':col_target, 'thr':thr, 'thr2':thr2, \
#                          'lenfeats':lenfeats, 'lenfeats_uncorr':lenfeats_uncorr, \
#                          'sumones':folds_feat[col_target].sum(), 'AUC':auc, 'Mean_AUC':np.mean(list_AUC), \
#                          'loss':loss, 'Mean_loss':np.mean(list_loss)}))
# #                         print(f'{ncol+1}/{len(target_cols)}: col={col_target} thr={thr} thr2={thr2} lenfeats={lenfeats} lenfeats_uncorr={lenfeats_uncorr} sum_ones={folds_feat[col_target].sum()} loss={loss} AUC={auc} Mean_AUC={np.mean(list_AUC)}')
#         if loss < best_loss:
#             best_auc = auc
#             best_loss = loss
#             best_thr = thr
#             best_thr2 = thr2
#             best_lenfeats = lenfeats
#             best_lenfeats_uncorr = lenfeats_uncorr
#             preds = folds_feat[name_feat].values
#             best_list_feats = feats_trn_uncorr

#         # Get best
#         loss = best_loss
#         auc = best_auc
#         thr = best_thr
#         thr2 = best_thr2
#         lenfeats = best_lenfeats
#         lenfeats_uncorr = best_lenfeats_uncorr
#         folds_feat[name_feat] = preds
#         feats_trn_uncorr = best_list_feats

#         # Provisional mlogloss
#         valid_results_score = train_targets_scored[['sig_id']].merge(folds_feat[['sig_id']+list_new_cols], on='sig_id', how='left').fillna(0)
#         y_true = train_targets_scored[list_target_cols].values
#         valid_results_score.columns = ['sig_id']+list_target_cols
#         y_pred = valid_results_score[list_target_cols].values
#         score = 0
#         for i in range(len(list_target_cols)):
#             score_ = log_loss(y_true[:, i], y_pred[:, i])
#             score += score_ 
#         score /= len(list_target_cols)
# #         print(C_logistic, thr, thr2, np.mean(list_AUC))
#         print(f'BEST {ncol+1}/{len(target_cols)}: col={col_target} thr={thr} thr2={thr2} lenfeats={lenfeats} lenfeats_uncorr={lenfeats_uncorr} sum_ones={folds_feat[col_target].sum()} loss={loss} AUC={auc} Mean_AUC={np.mean(list_AUC)} mlogloss={score}')
        
#         # With Test DB
#         X_trn = folds_feat[feats_trn_uncorr].astype(float).values
#         X_test = test_feat[feats_trn_uncorr].astype(float).values
#         y = folds_feat[col_target].astype(float).values 

#         scaler = StandardScaler()
#         X_trn = scaler.fit_transform(X_trn)
#         X_test = scaler.transform(X_test)

#         model = KernelRidge(alpha = alpha, kernel = 'rbf')
# #         model = LogisticRegression(C=C_logistic)
#         model.fit(X_trn, y)
# #         test_feat[name_feat] = model.predict_proba(X_test)[:,1]
#         test_feat[name_feat] = model.predict(X_test)

#         # Save all
#         with open(output_dir / f'model__{name_feat}__{col_target}.pkl', 'wb') as file:
#             pickle.dump(model, file)
#             pickle.dump(feats_trn_uncorr, file)
        
#         folds_feat.to_csv( output_dir / 'folds.csv',index=False)
#         test_feat.to_csv(output_dir / 'test.csv',index=False)
        
#         res_best.append(dict({'ncol':ncol, 'col':col_target, 'thr':thr, 'thr2':thr2, \
#                      'lenfeats':lenfeats, 'lenfeats_uncorr':lenfeats_uncorr, \
#                      'sumones':folds_feat[col_target].sum(), 'AUC':auc, 'loss':loss, 'mlogloss':score}))
#         res_df = pd.DataFrame(res_best)
#         res_df.to_csv(output_dir / 'res.csv',index=False)

### RankGauss

In [8]:
#From: https://www.kaggle.com/kushal1506/moa-pytorch-0-01859-rankgauss-pca-nn
folds_cp = folds.copy()
test_noctl_cp = test_noctl.copy()

if CFG.use_rankgauss:
    for col in (GENES + CELLS):
        transformer = QuantileTransformer(n_quantiles=100,random_state=42, output_distribution="normal")
        vec_len = len(folds_cp[col].values)
        vec_len_test = len(test_noctl_cp[col].values)
        raw_vec = folds_cp[col].values.reshape(vec_len, 1)
        transformer.fit(raw_vec)

        folds_cp[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
        test_noctl_cp[col] = transformer.transform(test_noctl_cp[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

### KMeans Features

In [9]:
from sklearn.cluster import KMeans
import gc

def create_cluster(train, test, features, kind = 'g', n_clusters = 35):
    train_ = train[features].copy()
    test_ = test[features].copy()
    
    scaler = StandardScaler()
    train_ = scaler.fit_transform(train_)
    test_ = scaler.transform(test_)
    model = KMeans(n_clusters = n_clusters, random_state = 123).fit(train_)
    name_cols = [f'kmeans_{kind}_{i}' for i in range(n_clusters)]
    train = pd.concat([train, pd.DataFrame(model.transform(train_), columns=name_cols)], axis=1)
    test = pd.concat([test, pd.DataFrame(model.transform(test_), columns=name_cols)], axis=1)
    print(train.shape, test.shape)
    return train, test
    
folds_cp2 = folds.copy()
test_noctl_cp2 = test_noctl.copy()
folds_cp2, test_noctl_cp2 = create_cluster(folds_cp2,test_noctl_cp2, GENES, kind = 'g', n_clusters = 7)
folds_cp2, test_noctl_cp2 = create_cluster(folds_cp2,test_noctl_cp2, CELLS, kind = 'c', n_clusters = 7)

folds_3models = folds_cp2[list(folds_cp2.columns.values[np.where(folds_cp2.columns=='kmeans_g_0')[0][0]:])]
test_noctl_3model = test_noctl_cp2[list(folds_cp2.columns.values[np.where(folds_cp2.columns=='kmeans_g_0')[0][0]:])]
del folds_cp2, test_noctl_cp2
gc.collect()

# train_features ,test_features=fe_cluster(train_features,test_features)

(21948, 1089) (3624, 882)
(21948, 1096) (3624, 889)


12

### PCA Features

In [10]:
if CFG.use_pca:
    seed_everything(seed=42)
    etiq = ['G','C']
    num_pca = [CFG.pca_comp_genes, CFG.pca_comp_cells]
    for niter, cols in enumerate([GENES, CELLS]):
        # PCA for train with folds
        num_comp = num_pca[niter]
        columns_pca = [f'pca_{etiq[niter]}-{i}' for i in range(num_comp)]
        other_feats += columns_pca

        # PCA for train
        pca = PCA(n_components=num_comp, random_state=42).fit(folds_cp[cols])
        train_pca = pca.transform(folds_cp[cols])
        train_pca = pd.DataFrame(train_pca, columns=columns_pca)
        folds = pd.concat((folds, train_pca), axis=1)
        
        # PCA for train
        test_pca = pca.transform(test_noctl_cp[cols])
        test_pca = pd.DataFrame(test_pca, columns=columns_pca)
        test_noctl = pd.concat((test_noctl, test_pca), axis=1)
        print(folds.shape, test_noctl.shape)

(21948, 1111) (3624, 904)
(21948, 1115) (3624, 908)


In [11]:
# if CFG.use_pca:
#     seed_everything(seed=42)
#     etiq = ['G','C']
#     num_pca = [CFG.pca_comp_genes, CFG.pca_comp_cells]
#     for niter, cols in enumerate([GENES, CELLS]):
#         # PCA for train with folds
#         train_pca = []
#         train_pca_sig_id = []
#         num_comp = num_pca[niter]
#         columns_pca = [f'pca_{etiq[niter]}-{i}' for i in range(num_comp)]
#         other_feats += columns_pca
#         for fold in tqdm(range(CFG.num_folds)):
#             pca = PCA(n_components=num_comp, random_state=42).fit(folds.loc[folds['kfold']!=fold, cols])
#             train_pca.append(pca.transform(folds.loc[folds['kfold']==fold, cols]))
#             train_pca_sig_id.append(folds.loc[folds['kfold']==fold, 'sig_id'])
#         train_pca = np.concatenate(train_pca)
#         train_pca = pd.DataFrame(train_pca, columns=columns_pca)
#         train_pca['sig_id'] = np.concatenate(train_pca_sig_id)
#         folds = pd.merge(folds, train_pca, on='sig_id')

#         # PCA for test
#         pca = PCA(n_components=num_comp, random_state=42).fit(folds[cols])
#         test_pca = pca.transform(test_noctl[cols])
#         test_pca = pd.DataFrame(test_pca, columns=columns_pca)
#         test_noctl = pd.concat((test_noctl, test_pca), axis=1)
#         print(folds.shape, test_noctl.shape)

### Select Top Features

In [12]:
# from sklearn.utils import check_random_state  # type: ignore

# ### from eli5
# def iter_shuffled(X, columns_to_shuffle=None, pre_shuffle=False, random_state=None):
#     rng = check_random_state(random_state)

#     if columns_to_shuffle is None:
#         columns_to_shuffle = range(X.shape[1])

#     if pre_shuffle:
#         X_shuffled = X.copy()
#         rng.shuffle(X_shuffled)

#     X_res = X.copy()
#     for columns in tqdm(columns_to_shuffle):
#         if pre_shuffle:
#             X_res[:, columns] = X_shuffled[:, columns]
#         else:
#             rng.shuffle(X_res[:, columns])
#         yield X_res
#         X_res[:, columns] = X[:, columns]

# def get_score_importances(
#         score_func,  # type: Callable[[Any, Any], float]
#         X,
#         y,
#         n_iter=5,  # type: int
#         columns_to_shuffle=None,
#         random_state=None
#     ):
#     rng = check_random_state(random_state)
#     base_score = score_func(X, y)
#     scores_decreases = []
#     for i in range(n_iter):
#         scores_shuffled = _get_scores_shufled(
#             score_func, X, y, columns_to_shuffle=columns_to_shuffle,
#             random_state=rng, base_score=base_score
#         )
#         scores_decreases.append(scores_shuffled)

#     return base_score, scores_decreases

# def _get_scores_shufled(score_func, X, y, base_score, columns_to_shuffle=None,
#                         random_state=None):
#     Xs = iter_shuffled(X, columns_to_shuffle, random_state=random_state)
#     res = []
#     for X_shuffled in Xs:
#         res.append(-score_func(X_shuffled, y) + base_score)
#     return res

# def metric(y_true, y_pred):
#     metrics = []
#     for i in range(y_pred.shape[1]):
#         if y_true[:, i].sum() > 1:
#             metrics.append(log_loss(y_true[:, i], y_pred[:, i].astype(float)))
#     return np.mean(metrics)   

# perm_imp = np.zeros(train.shape[1])
# all_res = []
# for n, (tr, te) in enumerate(KFold(n_splits=7, random_state=0, shuffle=True).split(train_targets)):
#     print(f'Fold {n}')

#     model = create_model(len(train.columns))
#     checkpoint_path = f'repeat:{seed}_Fold:{n}.hdf5'
#     reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')
#     cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 0, save_best_only = True,
#                                      save_weights_only = True, mode = 'min')
#     model.fit(train.values[tr],
#                   train_targets.values[tr],
#                   validation_data=(train.values[te], train_targets.values[te]),
#                   epochs=35, batch_size=128,
#                   callbacks=[reduce_lr_loss, cb_checkpt], verbose=2
#                  )
        
#     model.load_weights(checkpoint_path)
        
#     def _score(X, y):
#         pred = model.predict(X)
#         return metric(y, pred)

#     base_score, local_imp = get_score_importances(_score, train.values[te], train_targets.values[te], n_iter=1, random_state=0)
#     all_res.append(local_imp)
#     perm_imp += np.mean(local_imp, axis=0)
#     print('')
    
# top_feats = np.argwhere(perm_imp < 0).flatten()
# top_feats


In [13]:
if CFG.selec_top:
    seed_everything(seed=42)
    # https://www.kaggle.com/simakov/keras-multilabel-neural-network-v1-2/data
    top_feats = [  1,   2,   3,   4,   5,   6,   7,   9,  11,  14,  15,  16,  17,
            18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  29,  30,  31,
            32,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  46,
            47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  58,  59,  60,
            61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
            74,  75,  76,  78,  79,  80,  81,  82,  83,  84,  86,  87,  88,
            89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
           102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
           115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128,
           129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143,
           144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157,
           158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
           171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
           184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 197,
           198, 199, 200, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212,
           213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226,
           227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
           240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
           254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266,
           267, 268, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280,
           281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 294,
           295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309,
           310, 311, 312, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,
           324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
           337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,
           350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,
           363, 364, 365, 366, 367, 368, 369, 370, 371, 374, 375, 376, 377,
           378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 390, 391,
           392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
           405, 406, 407, 408, 409, 411, 412, 413, 414, 415, 416, 417, 418,
           419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431,
           432, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,
           447, 448, 449, 450, 453, 454, 456, 457, 458, 459, 460, 461, 462,
           463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475,
           476, 477, 478, 479, 481, 482, 483, 484, 485, 486, 487, 488, 489,
           490, 491, 492, 493, 494, 495, 496, 498, 500, 501, 502, 503, 505,
           506, 507, 509, 510, 511, 512, 513, 514, 515, 518, 519, 520, 521,
           522, 523, 524, 525, 526, 527, 528, 530, 531, 532, 534, 535, 536,
           538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 549, 550, 551,
           552, 554, 557, 559, 560, 561, 562, 565, 566, 567, 568, 569, 570,
           571, 572, 573, 574, 575, 577, 578, 580, 581, 582, 583, 584, 585,
           586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 599,
           600, 601, 602, 606, 607, 608, 609, 611, 612, 613, 615, 616, 617,
           618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630,
           631, 632, 633, 634, 635, 636, 637, 638, 639, 641, 642, 643, 644,
           645, 646, 647, 648, 649, 650, 651, 652, 654, 655, 656, 658, 659,
           660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672,
           673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685,
           686, 687, 688, 689, 691, 692, 693, 694, 695, 696, 697, 699, 700,
           701, 702, 704, 705, 707, 708, 709, 710, 711, 713, 714, 716, 717,
           718, 720, 721, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732,
           733, 734, 735, 737, 738, 739, 740, 742, 743, 744, 745, 746, 747,
           748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 759, 760, 761,
           762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774,
           775, 776, 777, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788,
           789, 790, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803,
           804, 805, 806, 808, 809, 811, 813, 814, 815, 816, 817, 818, 819,
           821, 822, 823, 825, 826, 827, 828, 829, 830, 831, 832, 834, 835,
           837, 838, 839, 840, 841, 842, 845, 846, 847, 848, 850, 851, 852,
           854, 855, 856, 858, 859, 860, 861, 862, 864, 866, 867, 868, 869,
           870, 871, 872, 873, 874]
    # print(len(top_feats))
    selected_features = np.array(['cp_type','cp_time', 'cp_dose']+GENES+CELLS)[top_feats].tolist()
    GENES = [col for col in selected_features if 'g-' in col]
    CELLS = [col for col in selected_features if 'c-' in col]
    numeric_cols = other_feats + GENES + CELLS
    folds = folds[['sig_id','cp_time','cp_dose']+numeric_cols+target_cols]
    test_noctl = test_noctl[['sig_id','cp_time','cp_dose']+numeric_cols]
    print(folds.shape, test_noctl.shape)

(21948, 1025) (3624, 819)


In [14]:
folds_novar_enc = folds.copy()
test_noctl_novar_enc = test_noctl.copy()

### Reduce Dataset using Variance

In [15]:
class VarianceThreshold:
    def __init__(self, threshold):
        self.threshold = threshold
    def fit(self, df, cont_cols):
        self.cont_cols = cont_cols
        self.var = folds[cont_cols].var()
        good_cols = self.var[self.var > self.threshold]
        self.index = good_cols.index.to_list()
        self.dropcols = [x for x in cont_cols if x not in self.var[self.var > self.threshold].index.to_list()]
        self.validcols = [x for x in cont_cols if x in self.var[self.var > self.threshold].index.to_list()]
    def transform(self, df):
        return df.drop(self.dropcols, axis=1)
    def fit_transform(self, df, cont_cols):
        self.fit(df, cont_cols)
        return self.transform(df), self.validcols

In [16]:
if CFG.use_var_enc:
    cont_cols_ini = [i for i in test_noctl.columns if i not in ['sig_id', 'cp_time', 'cp_dose']]
    print('Variance Threshold:', CFG.variance_thres)
    print(folds.shape, test_noctl.shape)
    VarThres = VarianceThreshold(CFG.variance_thres)
    folds, cont_cols = VarThres.fit_transform(folds, cont_cols_ini)
    test_noctl = VarThres.transform(test_noctl)
    print(folds.shape, test_noctl.shape)

Variance Threshold: 0.6
(21948, 1025) (3624, 819)
(21948, 1011) (3624, 805)


### Include kridge features  removing with low num of ones

In [17]:
if CFG.use_kridge:
    folds = pd.merge(folds, folds_kridge[kridge_cols], on='sig_id')
    test_noctl = pd.merge(test_noctl, test_kridge[kridge_cols], on='sig_id')
    print(folds.shape, test_noctl.shape)
    
    folds_novar_enc = pd.merge(folds_novar_enc, folds_kridge[kridge_cols], on='sig_id')
    test_noctl_novar_enc =  pd.merge(test_noctl_novar_enc, test_kridge[kridge_cols], on='sig_id')
    
    del folds_kridge
    del test_kridge
    gc.collect()
    
# #     seed_everything(seed=42)
# #     folds_feat = pd.read_csv('../input/folds_feats.csv')
# #     test_feat = pd.read_csv('../input/test_feat.csv')
# #     kridge_cols = ['sig_id']+[col for col in folds_feat if 'kridge' in col]
# #     folds = pd.merge(folds, folds_feat[kridge_cols], on='sig_id')
# #     test_noctl = pd.merge(test_noctl, test_feat[kridge_cols], on='sig_id')
# #     print(folds.shape, test_noctl.shape)
    
#     min_num_ones = 20
#     folds_feat = pd.read_csv('../results/_29_Create_KRIDGE_feats_16oct_e30/folds.csv')
#     test_feat = pd.read_csv('../results/_29_Create_KRIDGE_feats_16oct_e30/test.csv')
    
#     colsk = [col.replace('kridge_','') for col in folds_feat if 'kridge' in col]
#     colsk_selec = []
#     target_selec = []
#     for colk in colsk:
#         if np.sum(folds[colk].sum())>=min_num_ones:
#             colsk_selec += ['kridge_'+colk]
#             target_selec += [colk]
#     kridge_cols = ['sig_id']+colsk_selec

#     folds = pd.merge(folds, folds_feat[kridge_cols], on='sig_id')
#     test_noctl = pd.merge(test_noctl, test_feat[kridge_cols], on='sig_id')
#     print(len(colsk), len(colsk_selec), folds.shape, test_noctl.shape)
    
#     kridge_cols = [col for col in folds if 'kridge_' in col]
# #     print('logloss=',log_loss_multi(folds[target_selec].values, folds[kridge_cols].values))    

(21948, 1164) (3624, 958)


### Quantile Transform of Features


In [18]:
if CFG.quantile_transform:
    qt = QuantileTransformer(output_distribution='normal')
    folds[cont_cols] = qt.fit_transform(folds[cont_cols])
    test_noctl[cont_cols] = qt.transform(test_noctl[cont_cols])

# 1. ANN

### Training Functions

In [19]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct
    

In [20]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [21]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    y_true = []
    y_pred = []
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
#         print(inputs.shape)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        if not scheduler.__class__ ==  torch.optim.lr_scheduler.ReduceLROnPlateau:
            scheduler.step()
        
        final_loss += loss.item()
        y_true.append(targets.cpu().detach().numpy())
        y_pred.append(outputs.sigmoid().cpu().detach().numpy())
    
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    
    control_loss = log_loss_multi(y_true, y_pred)
    final_loss /= len(dataloader)
    return final_loss, control_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    y_true = []
    y_pred = []
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        y_true.append(targets.cpu().detach().numpy())
        y_pred.append(outputs.sigmoid().cpu().detach().numpy())
        
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    control_loss = log_loss_multi(y_true, y_pred)
    final_loss /= len(dataloader)
    return final_loss, control_loss, y_true, y_pred

def inference_fn(model, dataloader, device):
    model.eval()
    y_pred = []
    for data in dataloader:
        inputs = data['x'].to(device)
        with torch.no_grad():
            outputs = model(inputs)
        y_pred.append(outputs.sigmoid().detach().cpu().numpy())
    y_pred = np.concatenate(y_pred)
    
    return y_pred

### Model

In [22]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(drop1_feat) #0.20
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(drop2_feat) #0.20
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(drop3_feat) #0.25
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
#         x = F.relu(self.dense1(x))
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
#         x = F.relu(self.dense2(x))
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x

### Add Kmeans Data

In [23]:
folds = pd.concat((folds, folds_3models), axis=1)
test_noctl = pd.concat((test_noctl, test_noctl_3model), axis=1)
print(folds.shape, test_noctl.shape)
del folds_3models, test_noctl_3model
gc.collect()

(21948, 1178) (3624, 972)


40

### Preprocessing Steps

In [24]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

In [25]:
folds = process_data(folds)
test_noctl = process_data(test_noctl)

feature_cols = [c for c in folds.columns if c not in target_cols]
if not CFG.original_feats: feature_cols = [c for c in feature_cols if c not in GENES+CELLS]
feature_cols_ini = [c for c in feature_cols if c not in ['kfold','sig_id']]#, 'cp_time_24', 'cp_time_48', 'cp_time_72', 'cp_dose_D1', 'cp_dose_D2','drug_id']] #,'cp_dose','cp_time']]
len(feature_cols_ini)

974

In [26]:
feature_cols_ini_nocps = [c for c in feature_cols if c not in ['kfold','sig_id', 'cp_time_24', 'cp_time_48', 'cp_time_72', 'cp_dose_D1', 'cp_dose_D2','drug_id']]
feature_cols_ini_nocps = [c for c in feature_cols_ini_nocps if 'kmeans_' not in c]
len(feature_cols_ini_nocps)

955

In [27]:
feature_cols_ini

['pca_G-0',
 'pca_G-1',
 'pca_G-2',
 'pca_G-3',
 'pca_G-4',
 'pca_G-5',
 'pca_G-6',
 'pca_G-7',
 'pca_G-8',
 'pca_G-9',
 'pca_G-10',
 'pca_G-11',
 'pca_G-12',
 'pca_G-13',
 'pca_G-14',
 'pca_G-15',
 'pca_G-16',
 'pca_G-17',
 'pca_G-18',
 'pca_G-19',
 'pca_G-20',
 'pca_G-21',
 'pca_G-22',
 'pca_G-23',
 'pca_G-24',
 'pca_G-25',
 'pca_G-26',
 'pca_G-27',
 'pca_G-28',
 'pca_C-0',
 'pca_C-1',
 'pca_C-2',
 'pca_C-3',
 'g-0',
 'g-1',
 'g-2',
 'g-3',
 'g-4',
 'g-6',
 'g-8',
 'g-11',
 'g-12',
 'g-13',
 'g-14',
 'g-16',
 'g-17',
 'g-19',
 'g-20',
 'g-21',
 'g-22',
 'g-24',
 'g-26',
 'g-27',
 'g-28',
 'g-29',
 'g-30',
 'g-32',
 'g-33',
 'g-34',
 'g-35',
 'g-36',
 'g-37',
 'g-38',
 'g-39',
 'g-40',
 'g-41',
 'g-43',
 'g-44',
 'g-45',
 'g-46',
 'g-47',
 'g-48',
 'g-49',
 'g-50',
 'g-51',
 'g-52',
 'g-53',
 'g-55',
 'g-56',
 'g-57',
 'g-58',
 'g-59',
 'g-60',
 'g-61',
 'g-62',
 'g-63',
 'g-64',
 'g-65',
 'g-66',
 'g-67',
 'g-68',
 'g-69',
 'g-70',
 'g-71',
 'g-72',
 'g-73',
 'g-75',
 'g-76',
 'g-77'

In [28]:
# Standarize
for colname in feature_cols_ini:
    valor_fold = folds[colname].values
    valor_tst = test_noctl[colname].values
    mean_v = np.mean(valor_fold)
    std_v = np.std(valor_fold)
    if std_v==0:
        std_v=1e-5
    folds[colname] = (valor_fold-mean_v)/std_v
    test_noctl[colname] = (valor_tst-mean_v)/std_v
    print(colname, mean_v, std_v)

pca_G-0 -3.165934989349514e-05 1.000022942477717
pca_G-1 2.319063523304048e-05 0.9995899673134725
pca_G-2 -5.652831749957289e-05 0.9998962469475887
pca_G-3 0.0001490332009944081 1.0001572798115808
pca_G-4 3.974275730490028e-05 0.9997665499591255
pca_G-5 7.361036404477721e-05 0.9997785695567266
pca_G-6 -3.722530770014762e-05 0.999715481686792
pca_G-7 -4.176801412012035e-05 1.000241038168113
pca_G-8 -3.368941635790669e-05 0.9998038447404771
pca_G-9 5.615480974268078e-05 0.9997265629134643
pca_G-10 -9.040019011924138e-05 0.9996772931738682
pca_G-11 -6.078795307598129e-05 0.9997559892025472
pca_G-12 -5.8613588758447095e-05 0.9994258399589678
pca_G-13 -9.893185187003101e-05 0.9997849369040596
pca_G-14 -6.201536405669977e-05 0.9997168847993001
pca_G-15 -8.278464554968567e-05 0.999498292430778
pca_G-16 -3.8333825188847665e-06 1.0007720562770313
pca_G-17 -1.2060393554003437e-05 0.9995915837385392
pca_G-18 -6.393268281031218e-05 1.0000080139572307
pca_G-19 4.308745677197324e-05 0.99993752822325

### Train

In [29]:
# def run_training(fold, seed, display=2):
#     seed_everything(seed)
#     train = folds.copy()
#     test_ = test_noctl.copy()
    
#     trn_idx = train[train['kfold'] != fold].index
#     val_idx = train[train['kfold'] == fold].index
    
#     train_df = train[train['kfold'] != fold].reset_index(drop=True)
#     valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
#     x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
#     x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
#     train_dataset = MoADataset(x_train, y_train)
#     valid_dataset = MoADataset(x_valid, y_valid)
#     trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
#     validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
#     model = Model(
#         num_features=num_features,
#         num_targets=num_targets,
#         hidden_size=hidden_size,
#     )
#     model.to(DEVICE)
    
#     optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
#     scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
#                                               max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
# #     scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3)
    
# #     loss_fn = nn.BCEWithLogitsLoss()
#     loss_val = nn.BCEWithLogitsLoss()
#     loss_tr = SmoothBCEwLogits(smoothing =0.001)
    
#     early_stopping_steps = EARLY_STOPPING_STEPS
#     early_step = 0
    
#     oof = np.zeros((len(train), num_targets))
#     best_loss = np.inf
#     best_epoch = 0
#     res = []
#     for epoch in range(EPOCHS):
        
#         train_loss, mloss_train = train_fn(model, optimizer, scheduler, loss_tr, trainloader, DEVICE)
                
# #         print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
#         valid_loss, mloss_valid, y_true, valid_preds = valid_fn(model, loss_val, validloader, DEVICE)
    
#         if mloss_valid < best_loss:
#             best_loss = mloss_valid
#             best_epoch = epoch
#             oof[val_idx] = valid_preds
#             torch.save(model.state_dict(), output_dir / f'ann_model_seed{seed}_fold__{fold}.pth')
            
#         elif(EARLY_STOP == True):
#             early_step += 1
#             if (early_step >= early_stopping_steps):
#                 break
#         if scheduler.__class__ !=  torch.optim.lr_scheduler.ReduceLROnPlateau:
#             lr = scheduler.get_last_lr()
#         else:
#             lr = [0.0]
        
#         # Save results
#         # ------------
#         res.append(dict({'epoch':epoch, 'lr':lr[0], 'trn_loss':mloss_train, 'val_loss':mloss_valid, 'best_epoch':best_epoch, 'best_loss':best_loss}))
        
#         res_df = pd.DataFrame(res)
#         res_df.to_csv(output_dir / f'res_seed{seed}_fold{fold}_.csv')
        
#         fig, ax = plt.subplots(nrows=1, ncols=1 )
#         min_val = res_df[['trn_loss','val_loss']].min().min()
#         ax.plot(res_df['trn_loss'])
#         ax.plot(res_df['val_loss'])
#         plt.ylim((min_val,0.020))
#         plt.title(f"logloss in fold={fold} min={res_df['val_loss'].min()}")
#         fig.savefig(output_dir / f'loss_seed{seed}_fold{fold}_.png')
#         plt.close(fig)
        
#         fig, ax = plt.subplots(nrows=1, ncols=1 )
#         ax.plot(res_df['lr'])
#         plt.title(f"lr in fold={fold}")
#         fig.savefig(output_dir / f'lr_seed{seed}_fold{fold}_.png')
#         plt.close(fig)
        
#         if display==2:
#             print(f"SEED:{seed} FOLD:{fold}, EPOCH:{epoch:2d}, lr:{lr[0]:.9f} trn_loss:{mloss_train:.6f}, val_loss:{mloss_valid:.6f}, best_epoch:{best_epoch}, best_loss:{best_loss:.6f}")

#     if display==1:
#         print(f"SEED:{seed} FOLD:{fold}, EPOCH:{epoch:2d}, lr:{lr[0]:.9f} trn_loss:{mloss_train:.6f}, val_loss:{mloss_valid:.6f}, best_epoch:{best_epoch}, best_loss:{best_loss:.6f}")
        
    
#     #--------------------- PREDICTION---------------------
#     x_test = test_[feature_cols].values
#     testdataset = TestDataset(x_test)
#     testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
#     model = Model(
#         num_features=num_features,
#         num_targets=num_targets,
#         hidden_size=hidden_size,
#     )
    
#     model.load_state_dict(torch.load(output_dir / f'ann_model_seed{seed}_fold__{fold}.pth'))
#     model.to(DEVICE)
    
#     predictions_tst = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
#     predictions_tst = inference_fn(model, testloader, DEVICE)
    
#     return oof, predictions_tst, best_loss


In [30]:
def run_inference_ANN(fold, seedfold, seedrun, display=2):
    seed_everything(seedrun)
    test_ = test_noctl.copy()
    
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )

    model.load_state_dict(torch.load(f'{directorio}/ann_model_seedfold{seedfold}_seedrun{seedrun}_fold__{fold}.pth'))
    model.to(DEVICE)
    
    predictions_tst = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions_tst = inference_fn(model, testloader, DEVICE)
    
    return predictions_tst 

In [31]:
def run_k_fold(NFOLDS, seed_fold, seed_run, display=2):
    predictions_kfold = np.zeros((len(test_noctl), len(target_cols)))
    for fold in range(NFOLDS):
        pred_ = run_inference_ANN(fold, seed_fold, seed_run, display)
        predictions_kfold += pred_ / NFOLDS
    return predictions_kfold

In [32]:
len(feature_cols_ini)

974

> ### Train ANN models 29 y 28 de nov

In [33]:
directorio = '../input/annrandom' #'../input/annstratkmean7' #'../input/annrandomnosmooth' #'../input/annstratnosmooth' #
folds_cp = folds.copy()
feature_cols = feature_cols_ini.copy()
NFOLDS = CFG.num_folds
num_features=len(feature_cols)
num_targets=len(target_cols)

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False
hidden_size = 256
res = []
drop1_feat = 0.05
drop2_feat = 0.05
drop3_feat = 0.25


# Averaging on multiple SEEDS
oof_seed = np.zeros((len(folds), len(target_cols)))
predictions = np.zeros((len(test_noctl), len(target_cols)))
losses_list = []
# SEED = [[0,1],[1,1],[2,1],[3,1]]
SEED = np.arange(16)
for seed in tqdm(SEED):
    seed_fold = seed // 4
    seed_run = seed % 4
    predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
    predictions += predictions_

# FINAL CV LOGLOSS
for col in target_cols:
    test_noctl[col] = 0.0
test_noctl[target_cols] = predictions / len(SEED)

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




In [34]:
submission = sample_submission.drop(columns=target_cols)\
.merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
.fillna(0.0).reset_index(drop=True)
name_sub = 'submission.csv'
submission.to_csv(name_sub, index=False)
# print(name_sub)
print("ANN_1dic")
submission_ANN_30nov = submission.copy()
submission.head()

ANN_1dic


Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001293,0.001952,0.003056,0.010624,0.019323,0.005754,0.002078,0.008484,0.000576,0.009351,0.010256,0.00056,0.001248,0.000601,0.001181,0.001198,0.002772,0.007842,0.008646,0.002889,0.002346,0.00418,0.000809,0.002408,0.000691,0.001159,0.001085,0.001526,0.003318,0.002066,0.001038,0.005662,0.005636,0.000492,0.000659,0.001413,0.005596,0.000596,0.000802,0.000697,0.005372,0.010667,0.001946,0.006122,0.008474,0.007262,0.001139,0.003841,0.000831,0.003551,0.001926,0.004378,0.000727,0.000888,0.017088,0.0019,0.001913,0.002342,0.001705,0.003083,0.001676,0.006628,0.002108,0.000439,0.00461,0.000766,0.00241,0.003694,0.007166,0.000846,0.001487,0.048415,0.005257,0.00109,0.003756,0.000912,0.00377,0.013462,0.00728,0.01123,0.0006,0.001294,0.000645,0.023284,0.001801,0.005585,0.000867,0.001499,0.000746,0.000464,0.00127,0.001792,0.002579,0.021474,0.02141,0.00161,0.001968,0.001535,0.007545,0.02448,0.001743,0.000986,0.006838,0.002138,0.005685,0.007557,0.000707,0.001515,0.00558,0.000673,0.000918,0.000779,0.002087,0.002967,0.002203,0.002474,0.000828,0.003685,0.000802,0.000512,0.001015,0.000964,0.005826,0.001423,0.004561,0.001155,0.000983,0.000633,0.006477,0.002535,0.002024,0.004833,0.000998,0.000826,0.004893,0.000675,0.007152,0.001236,0.001405,0.001315,0.001397,0.00082,0.001595,0.002367,0.005865,0.001601,0.000766,0.001178,0.0027,0.000433,0.001566,0.017764,0.002714,0.001373,0.001391,0.002461,0.010842,0.001684,0.002669,0.017354,0.002287,0.005105,0.009152,0.000901,0.003887,0.000647,0.002988,0.001009,0.004401,0.001089,0.001049,0.002638,0.000733,0.00094,0.00104,0.002114,0.010523,0.00482,0.004235,0.003681,0.001466,0.001454,0.024195,0.003137,0.000704,0.000826,0.000792,0.001469,0.000347,0.000955,0.001216,0.001664,0.001052,0.00406,0.001427,0.001354,0.001092,0.003016,0.005292,0.001437,0.000815,0.001008,0.001007,0.001937,0.017152,0.001353
1,id_001897cda,0.0006,0.000982,0.002453,0.003012,0.001086,0.001342,0.003638,0.007281,0.007204,0.002598,0.005166,0.002789,0.000592,0.005871,0.000945,0.000797,0.000869,0.00149,0.001269,0.003112,0.004863,0.002779,0.000642,0.001386,0.000728,0.001441,0.000911,0.000661,0.001959,0.001709,0.000936,0.002872,0.001208,0.001603,0.000692,0.000495,0.002719,0.0038,0.003445,0.002121,0.001352,0.001164,0.000411,0.002364,0.000914,0.002018,0.000715,0.001252,0.001507,0.004954,0.00165,0.004989,0.015709,0.000555,0.004497,0.004327,0.004,0.001451,0.003282,0.000923,0.000905,0.004806,0.000964,0.004178,0.001686,0.0033,0.001144,0.003149,0.000794,0.000718,0.000902,0.005624,0.001985,0.000808,0.001497,0.00063,0.001146,0.001388,0.001172,0.00089,0.000806,0.000901,0.000775,0.003633,0.002827,0.006085,0.000545,0.002604,0.005746,0.003658,0.000798,0.000821,0.00103,0.001836,0.003731,0.002719,0.001368,0.001805,0.00117,0.005067,0.001279,0.00487,0.003908,0.001176,0.000699,0.002022,0.001197,0.003728,0.001324,0.000902,0.000896,0.006655,0.002116,0.001316,0.002518,0.003568,0.034316,0.007652,0.005062,0.001664,0.000701,0.000663,0.0041,0.001557,0.003264,0.000753,0.000986,0.001821,0.001725,0.001357,0.00068,0.001381,0.001155,0.002692,0.000874,0.001639,0.001186,0.000669,0.000736,0.001049,0.000645,0.000609,0.000932,0.001069,0.001701,0.001357,0.000784,0.00385,0.005033,0.002633,0.001905,0.019721,0.000686,0.012107,0.00376,0.002891,0.001715,0.062141,0.015617,0.001812,0.020423,0.001514,0.002731,0.000663,0.002592,0.000953,0.000756,0.001471,0.000798,0.000332,0.001317,0.001048,0.002002,0.031636,0.001611,0.002826,0.007226,0.000977,0.00054,0.000993,0.001112,0.00335,0.004546,0.000992,0.011445,0.000905,0.003275,0.0028,0.002143,0.00067,0.000481,0.001097,0.001606,0.002478,0.008366,0.001368,0.001116,0.000956,0.004205,0.000292,0.007264,0.000654,0.006777,0.001188,0.002213,0.003306
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.00143,0.001407,0.002114,0.012372,0.016655,0.004947,0.00431,0.003902,0.000672,0.022006,0.038018,0.002439,0.000812,0.004235,0.001726,0.002532,0.002995,0.004966,0.004777,0.002271,0.003049,0.001858,0.001313,0.003773,0.002277,0.001571,0.001835,0.002996,0.009067,0.003298,0.002663,0.002317,0.003209,0.001484,0.000963,0.000874,0.002707,0.001094,0.001079,0.001297,0.004623,0.007712,0.003841,0.014111,0.008289,0.007538,0.001247,0.001069,0.001107,0.003542,0.002335,0.000666,0.001524,0.001763,0.036773,0.0023,0.005779,0.002486,0.002852,0.001729,0.002065,0.007399,0.001546,0.001198,0.002893,0.000843,0.005719,0.004632,0.003475,0.001281,0.000969,0.013453,0.004872,0.002229,0.001535,0.000993,0.003872,0.020781,0.014512,0.050883,0.009712,0.001099,0.000981,0.005044,0.003914,0.00163,0.001457,0.001583,0.002478,0.001027,0.000863,0.001484,0.001692,0.004172,0.005392,0.001181,0.000718,0.001798,0.005102,0.0134,0.002115,0.0012,0.003953,0.001125,0.002991,0.035045,0.001751,0.002229,0.006921,0.002629,0.000798,0.001992,0.001131,0.003431,0.002777,0.002216,0.001318,0.001805,0.00062,0.001065,0.001221,0.001413,0.002288,0.001419,0.00273,0.001767,0.000726,0.001158,0.005411,0.003185,0.001172,0.008032,0.001218,0.002003,0.00347,0.00427,0.003867,0.001093,0.002983,0.001158,0.002968,0.001526,0.000808,0.006882,0.009011,0.0039,0.006757,0.001477,0.001657,0.001065,0.001197,0.006758,0.002499,0.005329,0.002835,0.004064,0.007908,0.001148,0.001728,0.002154,0.001294,0.002829,0.003569,0.000878,0.002523,0.001126,0.008218,0.002115,0.003892,0.001146,0.001726,0.00061,0.001181,0.001721,0.001719,0.001574,0.016589,0.033095,0.004247,0.003331,0.00468,0.001929,0.009061,0.002179,0.002163,0.001608,0.001651,0.004551,0.001321,0.002336,0.003241,0.00268,0.001107,0.001919,0.001484,0.001931,0.001049,0.001164,0.002884,0.007775,0.003948,0.00109,0.001821,0.002346,0.001199,0.003573
4,id_0027f1083,0.001982,0.001953,0.002447,0.010806,0.010668,0.003265,0.005332,0.002125,0.001034,0.008886,0.016536,0.002629,0.00068,0.001442,0.001742,0.001945,0.003516,0.004058,0.002043,0.002003,0.005007,0.00513,0.001067,0.003841,0.001354,0.001315,0.00132,0.001598,0.007314,0.003499,0.002384,0.004572,0.00336,0.000941,0.000789,0.000942,0.0022,0.001002,0.000893,0.001143,0.008206,0.008327,0.002908,0.011414,0.008019,0.008563,0.001021,0.003033,0.001026,0.003758,0.002237,0.001592,0.001402,0.001212,0.005894,0.002617,0.002878,0.002837,0.003195,0.00165,0.001487,0.005434,0.001963,0.002082,0.004264,0.00087,0.005746,0.002924,0.002568,0.001,0.00123,0.012123,0.006268,0.002338,0.002085,0.001158,0.005282,0.032889,0.004241,0.004579,0.00114,0.000837,0.00077,0.010795,0.002381,0.001761,0.001016,0.002507,0.001902,0.001325,0.000574,0.001457,0.000943,0.005665,0.004663,0.000594,0.001012,0.00275,0.003626,0.016677,0.001957,0.000984,0.004967,0.001748,0.002432,0.008318,0.000681,0.001464,0.004294,0.000853,0.00145,0.000737,0.003442,0.002124,0.003104,0.003309,0.004686,0.00248,0.00121,0.001034,0.001207,0.001191,0.002684,0.001371,0.002842,0.000844,0.000755,0.000969,0.006635,0.001691,0.001201,0.004352,0.000928,0.002775,0.003327,0.001673,0.006007,0.001044,0.003002,0.001679,0.001881,0.000966,0.001432,0.003259,0.004755,0.003669,0.001483,0.001594,0.002534,0.00153,0.001403,0.01052,0.001349,0.002463,0.001705,0.006297,0.002895,0.009822,0.002143,0.004684,0.002638,0.003661,0.003498,0.001336,0.00233,0.001447,0.005499,0.001754,0.00484,0.001983,0.001049,0.001115,0.001592,0.002834,0.004353,0.002216,0.008453,0.007754,0.00207,0.001912,0.001388,0.002598,0.014903,0.002363,0.001144,0.001743,0.001765,0.003861,0.001317,0.001322,0.0034,0.002431,0.001161,0.001767,0.004136,0.00115,0.001198,0.000859,0.003719,0.001564,0.001667,0.001086,0.002338,0.002116,0.000526,0.002297


## ANN del 30 de noviembre

In [35]:
# directorio = '../input/annrandomnosmooth' 
# folds_cp = folds.copy()
# feature_cols = feature_cols_ini.copy()
# NFOLDS = CFG.num_folds
# num_features=len(feature_cols)
# num_targets=len(target_cols)

# DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
# EPOCHS = 25
# BATCH_SIZE = 128
# LEARNING_RATE = 1e-3
# WEIGHT_DECAY = 1e-5
# EARLY_STOPPING_STEPS = 10
# EARLY_STOP = False
# hidden_size = 256
# res = []
# drop1_feat = 0.05
# drop2_feat = 0.05
# drop3_feat = 0.25


# # Averaging on multiple SEEDS
# oof_seed = np.zeros((len(folds), len(target_cols)))
# predictions = np.zeros((len(test_noctl), len(target_cols)))
# losses_list = []
# # SEED = [[0,1],[1,1],[2,1],[3,1]]
# SEED = np.arange(16)
# for seed in tqdm(SEED):
#     seed_fold = seed // 4
#     seed_run = seed % 4
#     predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
#     predictions += predictions_

# # FINAL CV LOGLOSS
# for col in target_cols:
#     test_noctl[col] = 0.0
# test_noctl[target_cols] = predictions / len(SEED)

In [36]:
# submission = sample_submission.drop(columns=target_cols)\
# .merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
# .fillna(0.0).reset_index(drop=True)
# name_sub = 'submission.csv'
# submission.to_csv(name_sub, index=False)
# print("ANN_30nov")
# submission_ANN_30nov = submission.copy()
# submission.head()

## ANN del 28 de noviembre

In [37]:
# directorio = '../input/annrandom28nov3'
# folds_cp = folds.copy()
# feature_cols = feature_cols_ini_nocps.copy()
# NFOLDS = CFG.num_folds
# num_features=len(feature_cols)
# num_targets=len(target_cols)

# DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
# EPOCHS = 25
# BATCH_SIZE = 128
# LEARNING_RATE = 1e-3
# WEIGHT_DECAY = 1e-5
# EARLY_STOPPING_STEPS = 10
# EARLY_STOP = False
# hidden_size = 256
# res = []
# drop1_feat = 0.05
# drop2_feat = 0.05
# drop3_feat = 0.25


# # Averaging on multiple SEEDS
# oof_seed = np.zeros((len(folds), len(target_cols)))
# predictions = np.zeros((len(test_noctl), len(target_cols)))
# losses_list = []
# # SEED = [[0,1],[1,1],[2,1],[3,1]]
# SEED = np.arange(16)
# for seed in tqdm(SEED):
#     seed_fold = seed // 4
#     seed_run = seed % 4
#     predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
#     predictions += predictions_

# # FINAL CV LOGLOSS
# for col in target_cols:
#     test_noctl[col] = 0.0
# test_noctl[target_cols] = predictions / len(SEED)

In [38]:
# submission = sample_submission.drop(columns=target_cols)\
# .merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
# .fillna(0.0).reset_index(drop=True)
# # sub.to_csv('submission.csv', index=False)
# # name_sub = 'submission.csv'
# # submission.to_csv(name_sub, index=False)
# print("ANN_28nov")
# submission_ANN_28nov = submission.copy()
# submission.head()

# 2. RESNET

In [39]:
hidden_size1 = 1024
hidden_size2 = hidden_size1 // 2
drop_h1 = 0.2
drop_h2 = 0.2
drop_h3 = 0.2

class Model(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model, self).__init__()
        # h1
        self.batch_norm1_h1 = nn.BatchNorm1d(num_features)
        self.dropout1_h1 = nn.Dropout(drop_h1) 
        self.dense1_h1 = nn.Linear(num_features, hidden_size1)
        
        self.batch_norm2_h1 = nn.BatchNorm1d(hidden_size1)
        self.dense2_h1 = nn.Linear(hidden_size1, hidden_size2)

        # h2
        self.batch_norm1_h2 = nn.BatchNorm1d(hidden_size2+num_features)
        self.dropout1_h2 = nn.Dropout(drop_h2) 
        self.dense1_h2 = nn.Linear(hidden_size2+num_features, hidden_size1)
        
        self.batch_norm2_h2 = nn.BatchNorm1d(hidden_size1)
        self.dense2_h2 = nn.Linear(hidden_size1,hidden_size1)
        
        self.batch_norm3_h2 = nn.BatchNorm1d(hidden_size1)
        self.dense3_h2 = nn.Linear(hidden_size1,hidden_size2)
               
        self.batch_norm4_h2 = nn.BatchNorm1d(hidden_size2)
        self.dense4_h2 = nn.Linear(hidden_size2,hidden_size2)
        
        # h3
#         self.batch_norm1_h3 = nn.BatchNorm1d(hidden_size2*2)
#         self.dropout1_h3 = nn.Dropout(drop_h3) 
#         self.dense1_h3 = nn.Linear(hidden_size2*2,hidden_size2)

        self.batch_norm1_h3 = nn.BatchNorm1d(hidden_size2)
        self.dropout1_h3 = nn.Dropout(drop_h3) 
        self.dense1_h3 = nn.Linear(hidden_size2,hidden_size2//2)
        
        self.batch_norm2_h3 = nn.BatchNorm1d(hidden_size2//2)
        self.dropout2_h3 = nn.Dropout(drop_h3) 
        self.dense2_h3 = nn.Linear(hidden_size2//2,num_targets)
        
#         self.batch_norm3_h3 = nn.BatchNorm1d(num_targets)
#         self.dense3_h3 = nn.Linear(num_targets,num_targets)
        
#     x2 = torch.log(1+x-torch.min(x))
#         x2 = self.batch_norm1B(x2)
#         x2 = self.dropout1B(x2)
#         x2 = self.dense1B(x2)
#         x2 = torch.exp(x2)
#         x2 = F.leaky_relu(x2)
        
        
    def forward(self, x):
        xmult = x-torch.min(x)
        
        h1 = self.batch_norm1_h1(x)
        h1 = self.dropout1_h1(h1)
#         h1 = F.elu(self.dense1_h1(h1))
        h1 = F.leaky_relu(self.dense1_h1(h1))
        h1 = self.batch_norm2_h1(h1)
#         h1 = F.elu(self.dense2_h1(h1))
        h1 = F.leaky_relu(self.dense2_h1(h1))
    
        combined = torch.cat((x.view(x.size(0), -1), h1.view(h1.size(0), -1)), dim=1)
        h2 = self.batch_norm1_h2(combined)
        h2 = self.dropout1_h2(h2)
#         h2 = F.relu(self.dense1_h2(h2))
        h2 = F.leaky_relu(self.dense1_h2(h2))
        h2 = self.batch_norm2_h2(h2)
#         h2 = F.elu(self.dense2_h2(h2))
        h2 = F.leaky_relu(self.dense2_h2(h2))
        h2 = self.batch_norm3_h2(h2)
#         h2 = F.relu(self.dense3_h2(h2))
        h2 = F.leaky_relu(self.dense3_h2(h2))
        h2 = self.batch_norm4_h2(h2)
#         h2 = F.elu(self.dense4_h2(h2))
        h2 = F.leaky_relu(self.dense4_h2(h2))
    
#         # Xmult
#         xmult = x-torch.min(x)
#         h1b = torch.log1p(xmult)
#         h1b = self.batch_norm1_h1(h1b)
#         h1b = self.dropout1_h1(h1b)
#         h1b = self.dense1_h1(h1b)
#         h1b = torch.expm1(h1b)
#         h1b = F.leaky_relu(h1b)
        
#         h1b = self.batch_norm2_h1(h1b)
# #         h1 = F.elu(self.dense2_h1(h1))
#         h1b = F.leaky_relu(self.dense2_h1(h1b))
    
#         combined_mult = torch.cat((x.view(x.size(0), -1), h1b.view(h1b.size(0), -1)), dim=1)
#         h2b = combined_mult-torch.min(combined_mult)
#         h2b = torch.log1p(h2b)
#         h2b = self.batch_norm1_h2(h2b)
#         h2b = self.dropout1_h2(h2b)
#         h2b = self.dense1_h2(h2b)
#         h2b = torch.expm1(h2b)
#         h2b = F.leaky_relu(h2b)

#         h2b = self.batch_norm2_h2(h2b)
# #         h2 = F.elu(self.dense2_h2(h2))
#         h2b = F.leaky_relu(self.dense2_h2(h2b))
#         h2b = self.batch_norm3_h2(h2b)
# #         h2 = F.relu(self.dense3_h2(h2))
#         h2b = F.leaky_relu(self.dense3_h2(h2b))
#         h2b = self.batch_norm4_h2(h2b)
# #         h2 = F.elu(self.dense4_h2(h2))
#         h2b = F.leaky_relu(self.dense4_h2(h2b))
    
    
#         combined_h1s = torch.cat((h1.view(h1.size(0), -1), h1b.view(h1b.size(0), -1)), dim=1)
#         combined_h2s = torch.cat((h2.view(h2.size(0), -1), h2b.view(h2b.size(0), -1)), dim=1)
    
#         average2heads= (combined_h1s+combined_h2s)/2.0
        average2heads = (h1+h2)/2.0
        h3 = self.batch_norm1_h3(average2heads)
#         h3 = self.dropout1_h3(h3)
        h3 = self.dense1_h3(h3)
        
        output = self.batch_norm2_h3(h3)
#         output = self.dropout2_h3(output)
        output = self.dense2_h3(output)
        return output

In [40]:
def run_inference_RESNET(fold, seedfold, seedrun, display=2):
    seed_everything(seedrun)
    test_ = test_noctl.copy()
    
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
#         hidden_size=hidden_size,
    )

    model.load_state_dict(torch.load(f'{directorio}/ann_model_seedfold{seedfold}_seedrun{seedrun}_fold__{fold}.pth'))
    model.to(DEVICE)
    
    predictions_tst = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions_tst = inference_fn(model, testloader, DEVICE)
    
    return predictions_tst

In [41]:
def run_k_fold(NFOLDS, seed_fold, seed_run, display=2):
    predictions_kfold = np.zeros((len(test_noctl), len(target_cols)))
    for fold in range(NFOLDS):
        pred_ = run_inference_RESNET(fold, seed_fold, seed_run, display)
        predictions_kfold += pred_ / NFOLDS
    return predictions_kfold

## Resnet del 30 de noviembre

In [42]:
directorio = '../input/resnetrandom' #'../input/resnetstrat' #'../input/resnetrandomnostrat' #'../input/resnetstratnosmooth' #'../input/resnetrandomnostrat'
folds_cp = folds.copy()
feature_cols = feature_cols_ini.copy()
NFOLDS = CFG.num_folds
num_features=len(feature_cols)
num_targets=len(target_cols)

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

res = []
# Averaging on multiple SEEDS
oof_seed = np.zeros((len(folds), len(target_cols)))
predictions = np.zeros((len(test_noctl), len(target_cols)))
losses_list = []
# SEED = [[0,3],[1,3],[2,2],[3,0]]
# for seed_fold, seed_run in tqdm(SEED):
SEED = np.arange(16)
for seed in tqdm(SEED):
    seed_fold = seed // 4
    seed_run = seed % 4
    predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
    predictions += predictions_

# FINAL CV LOGLOSS
for col in target_cols:
    test_noctl[col] = 0.0
test_noctl[target_cols] = predictions / len(SEED)

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




In [43]:
submission = sample_submission.drop(columns=target_cols)\
.merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
.fillna(0.0).reset_index(drop=True)
name_sub = 'submission.csv'
submission.to_csv(name_sub, index=False)
print("RESNET 30nov")
submission_RESNET_30nov = submission.copy()
submission.head()

RESNET 30nov


Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001017,0.00134,0.003699,0.010391,0.014362,0.004175,0.002167,0.006994,0.000471,0.008558,0.011338,0.000703,0.001393,0.000627,0.001432,0.001308,0.002041,0.005685,0.006751,0.002637,0.002864,0.004606,0.000822,0.00281,0.000802,0.001233,0.001154,0.001996,0.003463,0.002157,0.001031,0.005534,0.004967,0.000542,0.00072,0.001592,0.006512,0.000532,0.000987,0.000667,0.004276,0.010717,0.00144,0.007091,0.007154,0.005985,0.001434,0.003479,0.000593,0.002486,0.002146,0.003741,0.000531,0.000993,0.018831,0.001698,0.001932,0.002163,0.001586,0.00249,0.001722,0.005543,0.001892,0.000399,0.004165,0.000563,0.002747,0.005081,0.005517,0.000827,0.001082,0.055824,0.006146,0.001246,0.002956,0.000845,0.005736,0.017876,0.008564,0.012604,0.000449,0.001486,0.000673,0.017718,0.001815,0.007837,0.000986,0.001493,0.000565,0.000339,0.00153,0.001627,0.002181,0.02257,0.021721,0.002108,0.001585,0.001485,0.00805,0.023278,0.002046,0.00104,0.00945,0.002073,0.005162,0.007772,0.000768,0.001257,0.004546,0.001078,0.001151,0.000856,0.001545,0.002979,0.002125,0.003197,0.001527,0.0045,0.001415,0.000302,0.000996,0.000954,0.005075,0.001878,0.0034,0.001488,0.000834,0.000773,0.008768,0.002825,0.002147,0.004123,0.000906,0.000795,0.005056,0.000778,0.005332,0.001117,0.001277,0.001157,0.001245,0.000785,0.001269,0.002011,0.005175,0.001579,0.000524,0.001119,0.002616,0.000247,0.001464,0.015,0.002421,0.002055,0.001201,0.003048,0.010169,0.002934,0.003232,0.010857,0.002476,0.006286,0.008062,0.000679,0.005327,0.000757,0.003248,0.000908,0.003469,0.000726,0.001328,0.002779,0.000871,0.000891,0.001412,0.002197,0.00924,0.004762,0.004286,0.003893,0.001235,0.001594,0.021606,0.003533,0.000407,0.001003,0.000695,0.001402,0.000272,0.000825,0.001651,0.001649,0.001297,0.005084,0.000719,0.001336,0.001293,0.00383,0.00472,0.001045,0.000551,0.001057,0.000797,0.001933,0.022528,0.001695
1,id_001897cda,0.000407,0.000601,0.001691,0.001471,0.000495,0.000994,0.002743,0.006281,0.008441,0.003315,0.004406,0.003003,0.000363,0.013655,0.00071,0.000717,0.000469,0.001147,0.00101,0.003252,0.003597,0.001821,0.000511,0.000744,0.000663,0.0011,0.000747,0.000384,0.001268,0.001352,0.001032,0.001334,0.00084,0.001671,0.00065,0.000288,0.002014,0.002973,0.003937,0.001598,0.000694,0.000436,0.000308,0.002123,0.000708,0.000733,0.00059,0.000823,0.002519,0.003602,0.001301,0.005441,0.016899,0.000548,0.002724,0.002791,0.002578,0.000939,0.004493,0.000556,0.0007,0.002991,0.000683,0.006828,0.001535,0.003461,0.00062,0.004905,0.000463,0.000678,0.000746,0.003685,0.001439,0.000379,0.001138,0.000548,0.000583,0.000496,0.00087,0.000694,0.001402,0.000864,0.000772,0.001613,0.001947,0.006391,0.000414,0.00292,0.012295,0.004076,0.000734,0.000694,0.000529,0.000749,0.003137,0.001459,0.000905,0.001077,0.000607,0.003128,0.001188,0.00456,0.002481,0.001363,0.000451,0.001431,0.002153,0.00414,0.000911,0.001327,0.000966,0.007523,0.001119,0.001489,0.001252,0.004194,0.044226,0.007635,0.00451,0.002743,0.000737,0.000692,0.003319,0.001079,0.002228,0.000739,0.000593,0.002256,0.001408,0.000836,0.000627,0.001101,0.000697,0.003892,0.000541,0.000932,0.000626,0.000636,0.000514,0.000842,0.000467,0.00071,0.000481,0.000658,0.001284,0.001163,0.001139,0.005373,0.003838,0.004854,0.0017,0.014334,0.000578,0.017689,0.003486,0.002171,0.001593,0.044096,0.016302,0.000512,0.016848,0.000974,0.001644,0.001124,0.001922,0.000691,0.000472,0.001676,0.00052,0.000308,0.001339,0.002244,0.001683,0.029939,0.000776,0.002489,0.00611,0.000666,0.000474,0.000777,0.000705,0.002467,0.003256,0.000513,0.017514,0.000804,0.003687,0.002645,0.005267,0.000568,0.00019,0.000981,0.00151,0.002423,0.003256,0.001386,0.000968,0.000493,0.004298,0.000151,0.007047,0.000454,0.010448,0.001022,0.001647,0.002949
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.001022,0.001162,0.002062,0.01352,0.020921,0.004904,0.003508,0.004053,0.000358,0.009724,0.029755,0.001438,0.000756,0.001358,0.00159,0.001839,0.003046,0.004659,0.003904,0.002306,0.002427,0.00244,0.001117,0.003322,0.001398,0.001065,0.00218,0.002666,0.005919,0.002444,0.002285,0.001912,0.002486,0.001284,0.000821,0.000513,0.001771,0.000688,0.000927,0.00116,0.003974,0.006602,0.002038,0.014078,0.005252,0.005643,0.000745,0.000755,0.001155,0.003622,0.002113,0.000615,0.000985,0.001452,0.029783,0.001814,0.004903,0.001895,0.002465,0.001269,0.001471,0.011056,0.001364,0.000937,0.002805,0.000528,0.004899,0.00266,0.00375,0.001031,0.000727,0.017596,0.00404,0.001647,0.001194,0.000957,0.002332,0.008574,0.011224,0.050859,0.009088,0.000902,0.000872,0.006908,0.004541,0.001587,0.000801,0.001081,0.001717,0.001091,0.00051,0.001486,0.001258,0.003887,0.004566,0.001084,0.000747,0.001323,0.003743,0.013898,0.002092,0.000712,0.003692,0.003298,0.003862,0.028316,0.00185,0.003487,0.003707,0.00393,0.000459,0.001497,0.000844,0.002643,0.002487,0.001735,0.001068,0.001709,0.000828,0.001188,0.00099,0.001027,0.001603,0.001426,0.001811,0.001415,0.000637,0.001169,0.004296,0.003246,0.00113,0.006485,0.001302,0.001997,0.003545,0.004693,0.005342,0.000875,0.002535,0.000826,0.002527,0.001139,0.000582,0.006602,0.005396,0.003,0.004955,0.001323,0.001267,0.000998,0.00091,0.006163,0.00245,0.004112,0.001219,0.004898,0.007492,0.001373,0.001408,0.001883,0.000871,0.002596,0.004307,0.000921,0.002834,0.000825,0.002859,0.001657,0.003142,0.001943,0.001577,0.000531,0.001117,0.000838,0.001753,0.00107,0.014396,0.066897,0.002809,0.002519,0.004595,0.002637,0.006834,0.002389,0.001972,0.001254,0.000676,0.004087,0.000662,0.001963,0.002141,0.002729,0.001006,0.001301,0.000578,0.001704,0.001005,0.001203,0.002461,0.00878,0.003745,0.000889,0.001287,0.002433,0.000727,0.002304
4,id_0027f1083,0.002083,0.002052,0.00247,0.0133,0.01724,0.00419,0.00442,0.002925,0.000749,0.008443,0.0165,0.002361,0.000722,0.000941,0.001588,0.001805,0.004191,0.00473,0.00287,0.002344,0.004108,0.004911,0.001102,0.003474,0.001374,0.001213,0.001298,0.001466,0.006666,0.003671,0.002214,0.004057,0.003204,0.000744,0.000837,0.000816,0.002106,0.000732,0.000492,0.001103,0.007115,0.006929,0.002565,0.01031,0.007714,0.012284,0.000958,0.002306,0.000699,0.004259,0.0022,0.001942,0.00112,0.00121,0.007174,0.002586,0.002785,0.003191,0.002798,0.001519,0.001643,0.005524,0.002158,0.00222,0.004346,0.000725,0.005279,0.00254,0.003548,0.001109,0.00143,0.022745,0.005488,0.001814,0.002441,0.001187,0.00474,0.038158,0.004245,0.007525,0.001571,0.000957,0.0008,0.015376,0.001957,0.001182,0.000967,0.001969,0.001199,0.000773,0.000544,0.001634,0.00103,0.005865,0.006326,0.000647,0.001458,0.002379,0.004645,0.019463,0.001958,0.001104,0.003836,0.001301,0.003085,0.007978,0.000583,0.001352,0.003749,0.000769,0.001315,0.00051,0.002308,0.002411,0.003079,0.002502,0.002827,0.001736,0.000826,0.000726,0.00123,0.001189,0.002517,0.001429,0.00337,0.000727,0.000947,0.00056,0.005096,0.001869,0.001183,0.006197,0.001001,0.002191,0.00333,0.0015,0.005466,0.001178,0.002711,0.001454,0.001949,0.001121,0.001226,0.003458,0.005111,0.003284,0.001735,0.00139,0.002644,0.000963,0.001332,0.011893,0.001842,0.002887,0.001566,0.005145,0.003928,0.005571,0.001901,0.006283,0.001817,0.003387,0.00522,0.001077,0.002005,0.001177,0.004902,0.001572,0.005146,0.002506,0.000817,0.000997,0.001453,0.002477,0.002705,0.002101,0.010018,0.01045,0.00255,0.002274,0.001711,0.001821,0.016114,0.002086,0.000646,0.001416,0.001754,0.003305,0.001053,0.001394,0.003251,0.002524,0.001195,0.001596,0.002692,0.001512,0.001169,0.001012,0.004528,0.001957,0.001566,0.001249,0.001583,0.002423,0.000587,0.001927


## Resnet del 28 de noviembre

In [44]:
# directorio = '../input/resnetrandom28nov'
# folds_cp = folds.copy()
# feature_cols = feature_cols_ini_nocps.copy()
# NFOLDS = CFG.num_folds
# num_features=len(feature_cols)
# num_targets=len(target_cols)

# DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
# EPOCHS = 25
# BATCH_SIZE = 128
# LEARNING_RATE = 1e-3
# WEIGHT_DECAY = 1e-5
# EARLY_STOPPING_STEPS = 10
# EARLY_STOP = False

# res = []
# # Averaging on multiple SEEDS
# oof_seed = np.zeros((len(folds), len(target_cols)))
# predictions = np.zeros((len(test_noctl), len(target_cols)))
# losses_list = []
# # SEED = [[0,3],[1,3],[2,2],[3,0]]
# # for seed_fold, seed_run in tqdm(SEED):
# SEED = np.arange(16)
# for seed in tqdm(SEED):
#     seed_fold = seed // 4
#     seed_run = seed % 4
#     predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
#     predictions += predictions_

# # FINAL CV LOGLOSS
# for col in target_cols:
#     test_noctl[col] = 0.0
# test_noctl[target_cols] = predictions / len(SEED)

In [45]:
# submission = sample_submission.drop(columns=target_cols)\
# .merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
# .fillna(0.0).reset_index(drop=True)
# # sub.to_csv('submission.csv', index=False)
# # name_sub = 'submission.csv'
# # submission.to_csv(name_sub, index=False)
# # print(name_sub)
# print("RESNET 28nov")
# submission_RESNET_28nov = submission.copy()
# submission.head()

# 3. TABNET

In [46]:
sys.path.insert(0, "../input/tabnetdevelop/tabnet-develop")
seed_everything(seed=42)

In [47]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

def process_data_tabnet(data):
    data.loc[:, 'cp_time'] = data.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})
    data.loc[:, 'cp_dose'] = data.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    return data    

In [48]:
# folds = process_data_tabnet(folds_novar_enc)
# test_noctl = process_data_tabnet(test_noctl_novar_enc)
# print(folds.shape, test_noctl.shape)

# del folds_novar_enc
# del test_noctl_novar_enc
# gc.collect()

# feature_cols = [c for c in folds.columns if c not in target_cols]
# if not CFG.original_feats: feature_cols = [c for c in feature_cols if c not in GENES+CELLS]
# feature_cols_ini = [c for c in feature_cols if c not in ['kfold','sig_id','cp_time','cp_dose']] #, 'cp_time_24', 'cp_time_48', 'cp_time_72', 'cp_dose_D1', 'cp_dose_D2']] #,'cp_dose','cp_time']]
# len(feature_cols_ini)
# categorical_feats = ['cp_time','cp_dose']
# numeric_feats = [c for c in feature_cols_ini if c not in ['cp_time','cp_dose']]

In [49]:
# # Standarize Numerical Features
# for colname in numeric_feats:
#     valor_fold = folds[colname].values
#     valor_tst = test_noctl[colname].values
#     mean_v = np.mean(valor_fold)
#     std_v = np.std(valor_fold)
#     if std_v==0:
#         std_v=1e-5
#     folds[colname] = (valor_fold-mean_v)/std_v
#     test_noctl[colname] = (valor_tst-mean_v)/std_v
#     print(colname, mean_v, std_v)

In [50]:
## TABNET https://www.kaggle.com/ludovick/introduction-to-tabnet-kfold-10-training
#from pytorch_tabnet.tab_model import TabModel
from scipy.sparse import csc_matrix
import time
from abc import abstractmethod
from pytorch_tabnet import tab_network
from pytorch_tabnet.multiclass_utils import unique_labels
from sklearn.metrics import roc_auc_score, mean_squared_error, accuracy_score
from torch.nn.utils import clip_grad_norm_
from pytorch_tabnet.utils import (PredictDataset,
                                  create_dataloaders,
                                  create_explain_matrix)
from sklearn.base import BaseEstimator
from torch.utils.data import DataLoader
from copy import deepcopy
import io
import json
import shutil
import zipfile

In [51]:
# def evals(model, X, y, verbose=True):
#     with torch.no_grad():
#         y_preds = model.predict(X)
#         y_preds = torch.clamp(y_preds, 0.0, 1.0).detach().numpy()
#     score = log_loss_multi(y, y_preds)
#     #print("Logloss = ", score)
#     return y_preds, score


# def inference_fn(model, X ,verbose=True):
#     with torch.no_grad():
#         y_preds = model.predict( X )
#         y_preds = torch.sigmoid(torch.as_tensor(y_preds)).numpy()
#     return y_preds


# def log_loss_score(actual, predicted,  eps=1e-15):

#         """
#         :param predicted:   The predicted probabilities as floats between 0-1
#         :param actual:      The binary labels. Either 0 or 1.
#         :param eps:         Log(0) is equal to infinity, so we need to offset our predicted values slightly by eps from 0 or 1
#         :return:            The logarithmic loss between between the predicted probability assigned to the possible outcomes for item i, and the actual outcome.
#         """

        
#         p1 = actual * np.log(predicted+eps)
#         p0 = (1-actual) * np.log(1-predicted+eps)
#         loss = p0 + p1

#         return -loss.mean()
    
# def log_loss_multi(y_true, y_pred):
#     M = y_true.shape[1]
#     results = np.zeros(M)
#     for i in range(M):
#         results[i] = log_loss_score(y_true[:,i], y_pred[:,i])
#     return results.mean()

def check_targets(targets):
    ### check if targets are all binary in training set
    
    for i in range(targets.shape[1]):
        if len(np.unique(targets[:,i])) != 2:
            return False
    return True
            

In [52]:
class TabModel(BaseEstimator):
    def __init__(self, n_d=8, n_a=8, n_steps=3, gamma=1.3, cat_idxs=[], cat_dims=[], cat_emb_dim=1,
                 n_independent=2, n_shared=2, epsilon=1e-15,  momentum=0.02,
                 lambda_sparse=1e-3, seed=0,
                 clip_value=1, verbose=1,
                 optimizer_fn=torch.optim.Adam,
                 optimizer_params=dict(lr=2e-2),
                 scheduler_params=None, scheduler_fn=None,
                 mask_type="sparsemax",
                 input_dim=None, output_dim=None,
                 device_name='auto'):
        """ Class for TabNet model
        Parameters
        ----------
            device_name: str
                'cuda' if running on GPU, 'cpu' if not, 'auto' to autodetect
        """

        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.cat_idxs = cat_idxs
        self.cat_dims = cat_dims
        self.cat_emb_dim = cat_emb_dim
        self.n_independent = n_independent
        self.n_shared = n_shared
        self.epsilon = epsilon
        self.momentum = momentum
        self.lambda_sparse = lambda_sparse
        self.clip_value = clip_value
        self.verbose = verbose
        self.optimizer_fn = optimizer_fn
        self.optimizer_params = optimizer_params
        self.device_name = device_name
        self.scheduler_params = scheduler_params
        self.scheduler_fn = scheduler_fn
        self.mask_type = mask_type
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.batch_size = 1024

        self.seed = seed
        torch.manual_seed(self.seed)
        # Defining device
        if device_name == 'auto':
            if torch.cuda.is_available():
                device_name = 'cuda'
            else:
                device_name = 'cpu'
        self.device = torch.device(device_name)
        print(f"Device used : {self.device}")

    @abstractmethod
    def construct_loaders(self, X_train, y_train, X_valid, y_valid,
                          weights, batch_size, num_workers, drop_last):
        """
        Returns
        -------
        train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader
            Training and validation dataloaders
        -------
        """
        raise NotImplementedError('users must define construct_loaders to use this base class')

    def init_network(
                     self,
                     input_dim,
                     output_dim,
                     n_d,
                     n_a,
                     n_steps,
                     gamma,
                     cat_idxs,
                     cat_dims,
                     cat_emb_dim,
                     n_independent,
                     n_shared,
                     epsilon,
                     virtual_batch_size,
                     momentum,
                     device_name,
                     mask_type,
                     ):
        self.network = tab_network.TabNet(
            input_dim,
            output_dim,
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            cat_idxs=cat_idxs,
            cat_dims=cat_dims,
            cat_emb_dim=cat_emb_dim,
            n_independent=n_independent,
            n_shared=n_shared,
            epsilon=epsilon,
            virtual_batch_size=virtual_batch_size,
            momentum=momentum,
            device_name=device_name,
            mask_type=mask_type).to(self.device)

        self.reducing_matrix = create_explain_matrix(
            self.network.input_dim,
            self.network.cat_emb_dim,
            self.network.cat_idxs,
            self.network.post_embed_dim)

    def fit(self, X_train, y_train, X_valid=None, y_valid=None, loss_fn=None,
            weights=0, max_epochs=100, patience=10, batch_size=1024,
            virtual_batch_size=128, num_workers=0, drop_last=False):
        """Train a neural network stored in self.network
        Using train_dataloader for training data and
        valid_dataloader for validation.
        Parameters
        ----------
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            weights : bool or dictionnary
                0 for no balancing
                1 for automated balancing
                dict for custom weights per class
            max_epochs : int
                Maximum number of epochs during training
            patience : int
                Number of consecutive non improving epoch before early stopping
            batch_size : int
                Training batch size
            virtual_batch_size : int
                Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size)
            num_workers : int
                Number of workers used in torch.utils.data.DataLoader
            drop_last : bool
                Whether to drop last batch during training
        """
        # update model name

        self.update_fit_params(X_train, y_train, X_valid, y_valid, loss_fn,
                               weights, max_epochs, patience, batch_size,
                               virtual_batch_size, num_workers, drop_last)

        train_dataloader, valid_dataloader = self.construct_loaders(X_train,
                                                                    y_train,
                                                                    X_valid,
                                                                    y_valid,
                                                                    self.updated_weights,
                                                                    self.batch_size,
                                                                    self.num_workers,
                                                                    self.drop_last)

        self.init_network(
            input_dim=self.input_dim,
            output_dim=self.output_dim,
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            cat_idxs=self.cat_idxs,
            cat_dims=self.cat_dims,
            cat_emb_dim=self.cat_emb_dim,
            n_independent=self.n_independent,
            n_shared=self.n_shared,
            epsilon=self.epsilon,
            virtual_batch_size=self.virtual_batch_size,
            momentum=self.momentum,
            device_name=self.device_name,
            mask_type=self.mask_type
        )

        self.optimizer = self.optimizer_fn(self.network.parameters(),
                                           **self.optimizer_params)

        if self.scheduler_fn:
            self.scheduler = self.scheduler_fn(self.optimizer, **self.scheduler_params)
        else:
            self.scheduler = None

        self.losses_train = []
        self.losses_valid = []
        self.learning_rates = []
        self.metrics_train = []
        self.metrics_valid = []

        if self.verbose > 0:
            print("Will train until validation stopping metric",
                  f"hasn't improved in {self.patience} rounds.")
            msg_epoch = f'| EPOCH |  train  |   valid  | total time (s)'
            print('---------------------------------------')
            print(msg_epoch)

        total_time = 0
        while (self.epoch < self.max_epochs and
               self.patience_counter < self.patience):
            starting_time = time.time()
            # updates learning rate history
            self.learning_rates.append(self.optimizer.param_groups[-1]["lr"])

            fit_metrics = self.fit_epoch(train_dataloader, valid_dataloader)

            # leaving it here, may be used for callbacks later
            self.losses_train.append(fit_metrics['train']['loss_avg'])
            self.losses_valid.append(fit_metrics['valid']['total_loss'])
            self.metrics_train.append(fit_metrics['train']['stopping_loss'])
            self.metrics_valid.append(fit_metrics['valid']['stopping_loss'])

            stopping_loss = fit_metrics['valid']['stopping_loss']
            if stopping_loss < self.best_cost:
                self.best_cost = stopping_loss
                self.patience_counter = 0
                # Saving model
                self.best_network = deepcopy(self.network)
                has_improved = True
            else:
                self.patience_counter += 1
                has_improved=False
            self.epoch += 1
            total_time += time.time() - starting_time
            if self.verbose > 0:
                if self.epoch % self.verbose == 0:
                    separator = "|"
                    msg_epoch = f"| {self.epoch:<5} | "
                    msg_epoch += f" {fit_metrics['train']['stopping_loss']:.5f}"
                    msg_epoch += f' {separator:<2} '
                    msg_epoch += f" {fit_metrics['valid']['stopping_loss']:.5f}"
                    msg_epoch += f' {separator:<2} '
                    msg_epoch += f" {np.round(total_time, 1):<10}"
                    msg_epoch += f" {has_improved}"
                    print(msg_epoch)

        if self.verbose > 0:
            if self.patience_counter == self.patience:
                print(f"Early stopping occured at epoch {self.epoch}")
            print(f"Training done in {total_time:.3f} seconds.")
            print('---------------------------------------')

        self.history = {"train": {"loss": self.losses_train,
                                  "metric": self.metrics_train,
                                  "lr": self.learning_rates},
                        "valid": {"loss": self.losses_valid,
                                  "metric": self.metrics_valid}}
        # load best models post training
        self.load_best_model()

        # compute feature importance once the best model is defined
        self._compute_feature_importances(train_dataloader)

    def save_model(self, path):
        """
        Saving model with two distinct files.
        """
        saved_params = {}
        for key, val in self.get_params().items():
            if isinstance(val, type):
                # Don't save torch specific params
                continue
            else:
                saved_params[key] = val

        # Create folder
        Path(path).mkdir(parents=True, exist_ok=True)

        # Save models params
        with open(Path(path).joinpath("model_params.json"), "w", encoding="utf8") as f:
            json.dump(saved_params, f)

        # Save state_dict
        torch.save(self.network.state_dict(), Path(path).joinpath("network.pt"))
        shutil.make_archive(path, 'zip', path)
        shutil.rmtree(path)
        print(f"Successfully saved model at {path}.zip")
        return f"{path}.zip"

    def load_model(self, filepath):

        try:
            with open(filepath + '/model_params.json') as f:
                loaded_params = json.load(f)
            saved_state_dict = torch.load(filepath + '/network.pt')
            
#             with zipfile.ZipFile(filepath) as z:
#                 with z.open("model_params.json") as f:
#                     loaded_params = json.load(f)
#                 with z.open("network.pt") as f:
#                     try:
#                         saved_state_dict = torch.load(f)
#                     except io.UnsupportedOperation:
#                         # In Python <3.7, the returned file object is not seekable (which at least
#                         # some versions of PyTorch require) - so we'll try buffering it in to a
#                         # BytesIO instead:
#                         saved_state_dict = torch.load(io.BytesIO(f.read()))
        except KeyError:
            raise KeyError("Your zip file is missing at least one component")

        self.__init__(**loaded_params)

        self.init_network(
            input_dim=self.input_dim,
            output_dim=self.output_dim,
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            cat_idxs=self.cat_idxs,
            cat_dims=self.cat_dims,
            cat_emb_dim=self.cat_emb_dim,
            n_independent=self.n_independent,
            n_shared=self.n_shared,
            epsilon=self.epsilon,
            virtual_batch_size=1024,
            momentum=self.momentum,
            device_name=self.device_name,
            mask_type=self.mask_type
        )
        self.network.load_state_dict(saved_state_dict)
        self.network.eval()
        return

    def fit_epoch(self, train_dataloader, valid_dataloader):
        """
        Evaluates and updates network for one epoch.
        Parameters
        ----------
            train_dataloader: a :class: `torch.utils.data.Dataloader`
                DataLoader with train set
            valid_dataloader: a :class: `torch.utils.data.Dataloader`
                DataLoader with valid set
        """
        train_metrics = self.train_epoch(train_dataloader)
        valid_metrics = self.predict_epoch(valid_dataloader)

        fit_metrics = {'train': train_metrics,
                       'valid': valid_metrics}

        return fit_metrics

    @abstractmethod
    def train_epoch(self, train_loader):
        """
        Trains one epoch of the network in self.network
        Parameters
        ----------
            train_loader: a :class: `torch.utils.data.Dataloader`
                DataLoader with train set
        """
        raise NotImplementedError('users must define train_epoch to use this base class')

    @abstractmethod
    def train_batch(self, data, targets):
        """
        Trains one batch of data
        Parameters
        ----------
            data: a :tensor: `torch.tensor`
                Input data
            target: a :tensor: `torch.tensor`
                Target data
        """
        raise NotImplementedError('users must define train_batch to use this base class')

    @abstractmethod
    def predict_epoch(self, loader):
        """
        Validates one epoch of the network in self.network
        Parameters
        ----------
            loader: a :class: `torch.utils.data.Dataloader`
                    DataLoader with validation set
        """
        raise NotImplementedError('users must define predict_epoch to use this base class')

    @abstractmethod
    def predict_batch(self, data, targets):
        """
        Make predictions on a batch (valid)
        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data
        Returns
        -------
            batch_outs: dict
        """
        raise NotImplementedError('users must define predict_batch to use this base class')

    def load_best_model(self):
        if self.best_network is not None:
            self.network = self.best_network

    @abstractmethod
    def predict(self, X):
        """
        Make predictions on a batch (valid)
        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data
        Returns
        -------
            predictions: np.array
                Predictions of the regression problem or the last class
        """
        raise NotImplementedError('users must define predict to use this base class')

    def explain(self, X):
        """
        Return local explanation
        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data
        Returns
        -------
            M_explain: matrix
                Importance per sample, per columns.
            masks: matrix
                Sparse matrix showing attention masks used by network.
        """
        self.network.eval()

        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size, shuffle=False)

        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            M_explain, masks = self.network.forward_masks(data)
            for key, value in masks.items():
                masks[key] = csc_matrix.dot(value.cpu().detach().numpy(),
                                            self.reducing_matrix)

            if batch_nb == 0:
                res_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(),
                                             self.reducing_matrix)
                res_masks = masks
            else:
                res_explain = np.vstack([res_explain,
                                         csc_matrix.dot(M_explain.cpu().detach().numpy(),
                                                        self.reducing_matrix)])
                for key, value in masks.items():
                    res_masks[key] = np.vstack([res_masks[key], value])
        return res_explain, res_masks

    def _compute_feature_importances(self, loader):
        self.network.eval()
        feature_importances_ = np.zeros((self.network.post_embed_dim))
        for data, targets in loader:
            data = data.to(self.device).float()
            M_explain, masks = self.network.forward_masks(data)
            feature_importances_ += M_explain.sum(dim=0).cpu().detach().numpy()

        feature_importances_ = csc_matrix.dot(feature_importances_,
                                              self.reducing_matrix)
        self.feature_importances_ = feature_importances_ / np.sum(feature_importances_)
        
        
class TabNetRegressor(TabModel):

    def construct_loaders(self, X_train, y_train, X_valid, y_valid, weights,
                          batch_size, num_workers, drop_last):
        """
        Returns
        -------
        train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader
            Training and validation dataloaders
        -------
        """
        if isinstance(weights, int):
            if weights == 1:
                raise ValueError("Please provide a list of weights for regression.")
        if isinstance(weights, dict):
            raise ValueError("Please provide a list of weights for regression.")

        train_dataloader, valid_dataloader = create_dataloaders(X_train,
                                                                y_train,
                                                                X_valid,
                                                                y_valid,
                                                                weights,
                                                                batch_size,
                                                                num_workers,
                                                                drop_last)
        return train_dataloader, valid_dataloader

    def update_fit_params(self, X_train, y_train, X_valid, y_valid, loss_fn,
                          weights, max_epochs, patience,
                          batch_size, virtual_batch_size, num_workers, drop_last):

        if loss_fn is None:
            self.loss_fn = torch.nn.functional.mse_loss
        else:
            self.loss_fn = loss_fn

        assert X_train.shape[1] == X_valid.shape[1], "Dimension mismatch X_train X_valid"
        self.input_dim = X_train.shape[1]

        if len(y_train.shape) == 1:
            raise ValueError("""Please apply reshape(-1, 1) to your targets
                                if doing single regression.""")
        assert y_train.shape[1] == y_valid.shape[1], "Dimension mismatch y_train y_valid"
        self.output_dim = y_train.shape[1]

        self.updated_weights = weights

        self.max_epochs = max_epochs
        self.patience = patience
        self.batch_size = batch_size
        self.virtual_batch_size = virtual_batch_size
        # Initialize counters and histories.
        self.patience_counter = 0
        self.epoch = 0
        self.best_cost = np.inf
        self.num_workers = num_workers
        self.drop_last = drop_last

    def train_epoch(self, train_loader):
        """
        Trains one epoch of the network in self.network
        Parameters
        ----------
            train_loader: a :class: `torch.utils.data.Dataloader`
                DataLoader with train set
        """

        self.network.train()
        total_loss = 0
        y_pred = []
        y_true = []
        for data, targets in train_loader:
            batch_outs = self.train_batch(data, targets)
            y_pred.append(batch_outs["y_preds"].sigmoid().cpu().detach().numpy())
            y_true.append(batch_outs["y"].cpu().detach().numpy())
            total_loss += batch_outs["loss"]

        y_pred = np.concatenate(y_pred)
        y_true = np.concatenate(y_true)

        #stopping_loss = mean_squared_error(y_true=ys, y_pred=y_preds)
        stopping_loss = log_loss_multi(y_true, y_pred)
        total_loss = total_loss / len(train_loader)
        epoch_metrics = {'loss_avg': total_loss,
                         'stopping_loss': total_loss,
                         }

        if self.scheduler is not None:
            self.scheduler.step()
        return epoch_metrics

    def train_batch(self, data, targets):
        """
        Trains one batch of data
        Parameters
        ----------
            data: a :tensor: `torch.tensor`
                Input data
            target: a :tensor: `torch.tensor`
                Target data
        """
        self.network.train()
        data = data.to(self.device).float()

        targets = targets.to(self.device).float()
        self.optimizer.zero_grad()

        output, M_loss = self.network(data)

        loss = loss_tr(output, targets) #self.loss_fn(output, targets)
        
        loss -= self.lambda_sparse*M_loss

        loss.backward()
        if self.clip_value:
            clip_grad_norm_(self.network.parameters(), self.clip_value)
        self.optimizer.step()

        loss_value = loss.item()
        batch_outs = {'loss': loss_value,
                      'y_preds': output,
                      'y': targets}
        return batch_outs

    def predict_epoch(self, loader):
        """
        Validates one epoch of the network in self.network
        Parameters
        ----------
            loader: a :class: `torch.utils.data.Dataloader`
                    DataLoader with validation set
        """
        
        self.network.eval()
        total_loss = 0

        y_pred = []
        y_true = []
        for data, targets in loader:
            batch_outs = self.predict_batch(data, targets)
            total_loss += batch_outs["loss"]
            y_pred.append(batch_outs["y_preds"].sigmoid().cpu().detach().numpy())
            y_true.append(batch_outs["y"].cpu().detach().numpy())

        y_pred = np.concatenate(y_pred)
        y_true = np.concatenate(y_true)

        stopping_loss = log_loss_multi(y_true, y_pred)

        total_loss = total_loss / len(loader)
        epoch_metrics = {'total_loss': total_loss,
                         'stopping_loss': stopping_loss}

        return epoch_metrics

    def predict_batch(self, data, targets):
        """
        Make predictions on a batch (valid)
        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data
        Returns
        -------
            batch_outs: dict
        """
        self.network.eval()
        data = data.to(self.device).float()
        targets = targets.to(self.device).float()

        output, M_loss = self.network(data)
        
        loss = loss_val(output, targets) #self.loss_fn(output, targets)
        #print(self.loss_fn, loss)
        loss -= self.lambda_sparse*M_loss
        #print(loss)
        loss_value = loss.item()
        batch_outs = {'loss': loss_value,
                      'y_preds': output,
                      'y': targets}
        return batch_outs

    def predict(self, X):
        """
        Make predictions on a batch (valid)
        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data
        Returns
        -------
            predictions: np.array
                Predictions of the regression problem
        """
        self.network.eval()
        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size, shuffle=False)

        y_pred = []
        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss = self.network(data)
            predictions = output.sigmoid().cpu().detach().numpy()
            y_pred.append(predictions)
        y_pred = np.concatenate(y_pred)
        return y_pred

In [53]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [54]:
loss_tr = SmoothBCEwLogits(smoothing =0.001)
loss_val = torch.nn.functional.binary_cross_entropy_with_logits

In [55]:
def run_inference_TABNET(fold, seed_fold, seed_run, display=2):
    seed_everything(seed_run)
    test_ = test_noctl.copy()

#     where_categorical = [i for i, feat in enumerate(feature_cols) if feat in categorical_feats]
    model = TabNetRegressor(n_d=tabnet_parameters['n_d'], n_a=tabnet_parameters['n_a'], 
                        n_steps=tabnet_parameters['n_steps'], gamma=tabnet_parameters['gamma'], 
                        lambda_sparse=tabnet_parameters['lambda_sparse'],mask_type=tabnet_parameters['mask_type'],
#                         cat_dims=[3,2], cat_emb_dim=[1,1], cat_idxs=where_categorical, 
                        optimizer_fn=torch.optim.Adam,
                        optimizer_params=dict(lr=tabnet_parameters['lr'], weight_decay=tabnet_parameters['weight_decay']),
                        device_name='cuda', scheduler_params=dict(milestones=[ 100,150], gamma=0.9), 
                        scheduler_fn=torch.optim.lr_scheduler.MultiStepLR,
                        seed=int(seed_run), verbose=1)
    model.load_model(f'{directorio}/tabnet_model_seedfold{seed_fold}_seedrun{seed_run}_fold__{fold}')
    #--------------------- TEST PREDICTION---------------------
    x_test = test_[feature_cols].values
    x_test = torch.as_tensor(x_test)
    predictions_tst = model.predict(x_test)
    return predictions_tst

In [56]:
def run_k_fold(NFOLDS, seed_fold, seed_run, display=2):
    predictions_kfold = np.zeros((len(test_noctl), len(target_cols)))
    for fold in range(NFOLDS):
        pred_ = run_inference_TABNET(fold, seed_fold, seed_run, display)
        predictions_kfold += pred_ / NFOLDS
    return predictions_kfold

In [57]:
len(feature_cols_ini)

974

## TABNET del 30 de noviembre

In [58]:
directorio = '../input/tabnetrandommean7' #'../input/tabnetstratkmean7' #'../input/tabnetrandomnosmooth' #'../input/tabnetstratnosmooth' #'../input/tabnetrandomnosmooth' #'../input/tabnetstratnosmooth' #'../input/tabnetrandommean7'
tabnet_parameters = {'n_d':35, 'n_a':30, 'n_steps':1, 'gamma':1.3, 'lambda_sparse':0, 'mask_type':'entmax',
                    'max_epochs':200, 'batch_size':1024, 'lr':2e-2, 'weight_decay':1e-5, 'patience':50}


feature_cols = feature_cols_ini.copy()

folds_cp = folds.copy()
feature_cols = feature_cols_ini.copy()
NFOLDS = CFG.num_folds
num_features=len(feature_cols)
num_targets=len(target_cols)

# Averaging on multiple SEEDS
SEED = np.arange(16)
oof_seed = np.zeros((len(folds), len(target_cols)))
predictions = np.zeros((len(test_noctl), len(target_cols)))
losses_list = []
# SEED = [[0,3],[1,1],[2,1],[3,2]]
# for seed_fold, seed_run in tqdm(SEED):
SEED = np.arange(16)
for seed in tqdm(SEED):
    seed_fold = seed // 4
    seed_run = seed % 4

    predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
    predictions += predictions_

# FINAL CV LOGLOSS
for col in target_cols:
    test_noctl[col] = 0.0
test_noctl[target_cols] = predictions / len(SEED)

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used 

In [59]:
submission = sample_submission.drop(columns=target_cols)\
.merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
.fillna(0.0).reset_index(drop=True)
name_sub = 'submission.csv'
submission.to_csv(name_sub, index=False)
print('TABNET 30nov')
submission_TABNET_30nov = submission.copy()
submission.head()

TABNET 30nov


Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001706,0.001714,0.002338,0.011531,0.017788,0.004932,0.002702,0.006784,0.000621,0.008753,0.014869,0.001108,0.000942,0.000819,0.001456,0.001378,0.003852,0.008377,0.006118,0.002551,0.003672,0.00465,0.000876,0.002961,0.001027,0.001203,0.001143,0.001797,0.004038,0.002474,0.001517,0.004274,0.004625,0.000704,0.000741,0.001123,0.004346,0.000663,0.000707,0.000929,0.005232,0.008558,0.002252,0.008608,0.008052,0.006253,0.001028,0.003018,0.00091,0.003267,0.002182,0.003895,0.001006,0.000973,0.017827,0.002684,0.002784,0.002785,0.001913,0.002653,0.001361,0.005626,0.001922,0.000914,0.003252,0.000592,0.003955,0.003836,0.005447,0.000912,0.001558,0.042555,0.005787,0.001383,0.00272,0.000971,0.003433,0.019245,0.007351,0.014535,0.000881,0.001167,0.000691,0.022218,0.002678,0.004266,0.000882,0.002698,0.000983,0.00102,0.0008,0.001534,0.002032,0.012952,0.014834,0.001728,0.002896,0.001633,0.006145,0.02279,0.001717,0.001012,0.006313,0.001731,0.003955,0.010037,0.000863,0.001419,0.008027,0.001548,0.001412,0.000799,0.001691,0.002548,0.002534,0.003022,0.002586,0.002887,0.000806,0.001185,0.001112,0.000993,0.004539,0.001509,0.003328,0.001074,0.00086,0.000842,0.006121,0.00218,0.001787,0.00451,0.000991,0.001195,0.003514,0.001588,0.004501,0.001094,0.001842,0.001206,0.001583,0.000984,0.001227,0.002772,0.005161,0.002169,0.000906,0.001498,0.001998,0.001188,0.00112,0.017125,0.002093,0.002434,0.001154,0.003475,0.006979,0.005664,0.003264,0.022573,0.003234,0.003752,0.006964,0.00065,0.003388,0.000838,0.004942,0.001276,0.004086,0.001555,0.001144,0.004442,0.001015,0.00135,0.001646,0.0017,0.010093,0.009727,0.003255,0.002788,0.001775,0.001712,0.017888,0.002704,0.000885,0.001092,0.000915,0.002323,0.000564,0.00131,0.001828,0.002079,0.001236,0.00302,0.002147,0.001598,0.001091,0.001707,0.003963,0.001065,0.001072,0.001073,0.001696,0.00215,0.010352,0.002043
1,id_001897cda,0.000926,0.001344,0.002457,0.002232,0.001109,0.002004,0.002518,0.008517,0.004229,0.006849,0.004218,0.008136,0.001,0.009425,0.001217,0.001102,0.000916,0.002066,0.002089,0.003581,0.004545,0.002718,0.000886,0.001502,0.001014,0.001474,0.001078,0.000877,0.001616,0.001596,0.001333,0.004025,0.001727,0.001284,0.000978,0.000976,0.003356,0.003017,0.006667,0.001464,0.001923,0.001365,0.000806,0.002753,0.001334,0.00171,0.001256,0.002745,0.003927,0.004619,0.001777,0.024023,0.010079,0.000903,0.004375,0.003437,0.003075,0.001929,0.004005,0.001263,0.001131,0.003459,0.001222,0.009616,0.002234,0.002987,0.001168,0.004976,0.001439,0.001022,0.001427,0.009598,0.002514,0.000743,0.002134,0.001003,0.001543,0.002868,0.002224,0.001324,0.003678,0.001297,0.001082,0.003245,0.002164,0.007238,0.001521,0.003717,0.008738,0.010785,0.002721,0.001252,0.001336,0.002887,0.006597,0.005552,0.002038,0.001601,0.001701,0.00547,0.001486,0.004499,0.004839,0.00409,0.001033,0.001935,0.003433,0.003586,0.001928,0.001792,0.002815,0.007819,0.00438,0.001896,0.002157,0.004898,0.060602,0.006089,0.011179,0.005738,0.001176,0.00104,0.005489,0.001591,0.003766,0.001245,0.00203,0.001934,0.002311,0.001516,0.001351,0.001738,0.00137,0.005705,0.001176,0.001126,0.002477,0.00108,0.000888,0.002111,0.000967,0.001093,0.001668,0.001227,0.002824,0.001767,0.002159,0.00384,0.005718,0.009035,0.002368,0.016546,0.001066,0.016402,0.003888,0.002082,0.002462,0.086188,0.016163,0.001728,0.012698,0.001886,0.003181,0.001781,0.002755,0.001046,0.001399,0.002407,0.001182,0.001581,0.001528,0.004242,0.001788,0.025321,0.001177,0.003226,0.004458,0.001143,0.001161,0.001412,0.00115,0.00262,0.00586,0.001358,0.011716,0.001094,0.003244,0.00219,0.006242,0.000907,0.000574,0.001499,0.002104,0.003999,0.004119,0.001744,0.001219,0.001632,0.004489,0.001204,0.006695,0.001012,0.017373,0.001627,0.008225,0.003092
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.001329,0.001314,0.001712,0.011295,0.016534,0.004831,0.003385,0.005403,0.000603,0.009967,0.025317,0.001324,0.000908,0.002054,0.001346,0.001468,0.002865,0.004874,0.004623,0.00235,0.002109,0.002887,0.001153,0.00264,0.001351,0.00127,0.001586,0.001926,0.00505,0.002258,0.002079,0.002046,0.002636,0.001233,0.000803,0.000853,0.003539,0.000968,0.001072,0.001121,0.003248,0.004451,0.00245,0.011263,0.004364,0.005899,0.000845,0.001108,0.001155,0.003923,0.001774,0.001271,0.001598,0.001121,0.02315,0.002535,0.00398,0.002131,0.002354,0.001304,0.001263,0.006871,0.00145,0.001038,0.002968,0.000639,0.002696,0.002694,0.003052,0.001077,0.00115,0.013707,0.004542,0.001612,0.001564,0.000975,0.002299,0.010749,0.009946,0.035604,0.007875,0.00096,0.000827,0.005666,0.004029,0.001859,0.001002,0.001307,0.001174,0.000911,0.000715,0.001667,0.00184,0.00433,0.006776,0.001771,0.00137,0.001083,0.003878,0.01657,0.00167,0.001008,0.003642,0.001972,0.003992,0.020695,0.002071,0.002912,0.003776,0.002222,0.000833,0.00191,0.001029,0.002346,0.003168,0.001667,0.001046,0.002085,0.001024,0.000767,0.00095,0.001042,0.002653,0.00132,0.00241,0.001146,0.000617,0.001507,0.003696,0.002183,0.001032,0.005633,0.001414,0.001596,0.003154,0.004175,0.004672,0.000958,0.002209,0.000879,0.002316,0.001158,0.000861,0.005485,0.005248,0.002844,0.003117,0.001537,0.001838,0.000711,0.001127,0.00985,0.002338,0.002976,0.001532,0.003588,0.006406,0.001732,0.001416,0.002394,0.00096,0.002155,0.003427,0.000817,0.002628,0.000842,0.003541,0.001371,0.003083,0.000805,0.001588,0.000653,0.000976,0.000988,0.001481,0.00118,0.017397,0.036545,0.002791,0.002474,0.003792,0.002164,0.010258,0.001818,0.002267,0.001019,0.000841,0.004318,0.000969,0.002148,0.001936,0.00234,0.000925,0.001877,0.000654,0.001627,0.00096,0.001538,0.00244,0.010556,0.003511,0.00093,0.001645,0.002191,0.000675,0.002247
4,id_0027f1083,0.001748,0.001716,0.00197,0.012103,0.014137,0.004475,0.003584,0.003497,0.00087,0.009722,0.016013,0.001897,0.000904,0.001124,0.001499,0.001549,0.003101,0.004313,0.003543,0.002505,0.002975,0.004049,0.001079,0.003195,0.001382,0.001311,0.001312,0.001387,0.005118,0.002783,0.002105,0.003027,0.003206,0.000833,0.000845,0.000896,0.003002,0.0009,0.000709,0.001091,0.004009,0.005433,0.002454,0.009086,0.005322,0.007837,0.001022,0.002294,0.000925,0.004027,0.001893,0.002583,0.00138,0.001172,0.008188,0.002818,0.00262,0.002645,0.002397,0.001599,0.001342,0.004952,0.001835,0.002601,0.003942,0.000745,0.003009,0.002723,0.003632,0.001037,0.001346,0.018507,0.005936,0.002416,0.002071,0.001142,0.004085,0.03641,0.004874,0.008497,0.001823,0.001095,0.000827,0.00863,0.001878,0.002111,0.001125,0.00174,0.000956,0.002684,0.000897,0.001621,0.001316,0.00602,0.007135,0.001231,0.001791,0.001432,0.004433,0.019416,0.00186,0.001451,0.003854,0.004271,0.00355,0.009497,0.000783,0.001589,0.003428,0.001343,0.00206,0.001003,0.00187,0.002395,0.003455,0.002416,0.002149,0.002416,0.001022,0.00216,0.001124,0.001087,0.003624,0.001432,0.00372,0.000934,0.001052,0.000903,0.004909,0.001843,0.001232,0.005153,0.001297,0.00205,0.003246,0.001761,0.006137,0.001161,0.002304,0.001411,0.002134,0.00108,0.001456,0.003091,0.005493,0.002703,0.001181,0.001497,0.003599,0.002387,0.001411,0.011848,0.002007,0.00314,0.001885,0.004039,0.004165,0.004556,0.002092,0.004248,0.001331,0.003015,0.004119,0.001271,0.00225,0.001329,0.005443,0.001382,0.004201,0.001862,0.001081,0.001152,0.001193,0.001558,0.003432,0.002272,0.010038,0.010832,0.002553,0.002307,0.001772,0.001876,0.016798,0.001979,0.001237,0.001228,0.001362,0.003095,0.00074,0.001569,0.003964,0.00222,0.001179,0.002444,0.002367,0.001484,0.001136,0.001366,0.003077,0.002589,0.001882,0.001141,0.001426,0.002336,0.00082,0.002327


## TABNET del 28 de noviembre

In [60]:
# directorio = '../input/tabnetrandom28nov2'
# tabnet_parameters = {'n_d':35, 'n_a':30, 'n_steps':1, 'gamma':1.3, 'lambda_sparse':0, 'mask_type':'entmax',
#                     'max_epochs':200, 'batch_size':1024, 'lr':2e-2, 'weight_decay':1e-5, 'patience':50}

# folds_cp = folds.copy()
# feature_cols = feature_cols_ini_nocps.copy()
# NFOLDS = CFG.num_folds
# num_features=len(feature_cols)
# num_targets=len(target_cols)

# # Averaging on multiple SEEDS
# SEED = np.arange(16)
# oof_seed = np.zeros((len(folds), len(target_cols)))
# predictions = np.zeros((len(test_noctl), len(target_cols)))
# losses_list = []
# # SEED = [[0,3],[1,1],[2,1],[3,2]]
# # for seed_fold, seed_run in tqdm(SEED):
# SEED = np.arange(16)
# for seed in tqdm(SEED):
#     seed_fold = seed // 4
#     seed_run = seed % 4

#     predictions_ = run_k_fold(NFOLDS, seed_fold, seed_run, display=2)
#     predictions += predictions_

# # FINAL CV LOGLOSS
# for col in target_cols:
#     test_noctl[col] = 0.0
# test_noctl[target_cols] = predictions / len(SEED)

In [61]:
# submission = sample_submission.drop(columns=target_cols)\
# .merge(test_noctl[['sig_id']+target_cols], on='sig_id', how='left')\
# .fillna(0.0).reset_index(drop=True)
# # sub.to_csv('submission.csv', index=False)
# # name_sub = 'submission.csv'
# # submission.to_csv(name_sub, index=False)
# # print(name_sub)
# print('TABNET 28nov')
# submission_TABNET_28nov = submission.copy()
# submission.head()

# BLENDING FINAL

In [62]:
# assert all(submission_TABNET_28nov.sig_id==submission_TABNET_30nov.sig_id)
# assert all(submission_TABNET_28nov.sig_id==submission_RESNET_30nov.sig_id)
# assert all(submission_TABNET_28nov.sig_id==submission_RESNET_28nov.sig_id)
# assert all(submission_TABNET_28nov.sig_id==submission_ANN_28nov.sig_id)
# assert all(submission_TABNET_28nov.sig_id==submission_ANN_30nov.sig_id)

In [63]:
# 229_blending_26nov
# names_models = ['../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_73002_model_TabNet_21nov_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics_resnet_poly/']

# Optimization terminated successfully.
#          Current function value: 0.016602
#          Iterations: 100
#          Function evaluations: 205
# 0.016602136014725388 [0.38940193 0.32797063 0.2861724 ]
# blending_models([0.389,0.328,0.286])
# 0.016602141087921556



# names_models = ['../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_73002_model_TabNet_21nov_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics_resnet_poly/',
#                '../results/_FINAL_ANN_CPS_STRAT/',
#                '../results/_FINAL_TABNET_CPS_STRAT/']
#0.01659238709151143
#[0.1075, 0.2098, 0.2107, 0.3352, 0.1414]



# w = [0.1075, 0.2098, 0.2107, 0.3352, 0.1414]



# names_models = ['../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_73002_model_TabNet_21nov_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics_resnet_poly/',
#                    '../results/_FINAL_ANN_CPS_STRAT/',
#                    '../results/_FINAL_TABNET_CPS_STRAT/',
#                    '../results/_FINAL_RESNET_CPS_STRAT_v2/']
# Optimization terminated successfully.
#          Current function value: 0.016591
#          Iterations: 373
#          Function evaluations: 620
# 0.016590586038811306 [ 0.11163874  0.2195801   0.35596777  0.34734814  0.1496238  -0.17911374]

# w = [ 0.11163874,  0.2195801,   0.35596777,  0.34734814,  0.1496238,  -0.17911374] #[0.112, 0.220, 0.356, 0.347, 0.150, -0.179]




# FINAL

# names_models = [
#                 '../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_73002_model_TabNet_21nov_4por4_newcalcfold_metrics/',
#                 '../results/_NEW_83005_20nov_difffolds_1E3_4por4_newcalcfold_metrics_resnet_poly/',
                
#                '../results/_FINAL_ANN_CPS_STRAT_KMEANS7/',
#                '../results/_FINAL_TABNET_CPS_STRAT_KMEANS7/',
#                '../results/_FINAL_RESNET_CPS_STRAT_KMEANS7/']

# STRATIFIED BLENDING!!!!
# Optimization terminated successfully.
#          Current function value: 0.016590
#          Iterations: 402
#          Function evaluations: 674
# 0.01659034997712169 [0.0914673  0.18184113 0.14183788 0.3465606  0.15979273 0.08306089]

# w = [0.0914673, 0.18184113, 0.14183788, 0.3465606, 0.15979273, 0.08306089]

# submission = submission_ANN_28nov.copy()
# blending = (w[0]*submission_ANN_28nov.values[:,1:]) + \
#             (w[1]*submission_TABNET_28nov.values[:,1:]) + \
#             (w[2]*submission_RESNET_28nov.values[:,1:]) + \
#             (w[3]*submission_ANN_30nov.values[:,1:]) + \
#             (w[4]*submission_TABNET_30nov.values[:,1:]) + \
#             (w[5]*submission_RESNET_30nov.values[:,1:])


# STRATIFIED BLENDING!!!!
# Optimization terminated successfully.
#          Current function value: 0.016476
#          Iterations: 113
#          Function evaluations: 228
# 0.016476294101737145 [0.37846299 0.37115626 0.25789995]

#                '../results/_FINAL_ANN_CPS_STRAT_KMEANS7_SMOOTH_NO/',
#                '../results/_FINAL_TABNET_CPS_STRAT_KMEANS7_SMOOTH_NO/',
#                '../results/_FINAL_RESNET_CPS_STRAT_KMEANS7_SMOOTH_NO/']
            
# w = [0.37846299, 0.37115626, 0.25789995]

# w = [1.0/3, 1.0/3, 1.0/3]
# w = [ 0.48450186, -0.01045436,  0.52783731]



# STRATIFIED BLENDING!!!!
# Optimization terminated successfully.
#          Current function value: 0.016596
#          Iterations: 91
#          Function evaluations: 183
# 0.01659632784193557 [0.45209143 0.31521697 0.23694659]

w =  [0.45209143, 0.31521697, 0.23694659]

submission = submission_ANN_30nov.copy()
blending = (w[0]*submission_ANN_30nov.values[:,1:]) + \
            (w[1]*submission_TABNET_30nov.values[:,1:]) + \
            (w[2]*submission_RESNET_30nov.values[:,1:])



blending = blending.clip(0.0, 1.0)
submission.iloc[:,1:] = blending
submission.to_csv('submission.csv', index=False)

In [64]:
submission.head()

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001363,0.00174,0.002995,0.0109,0.017746,0.005145,0.002305,0.007631,0.000568,0.009014,0.01201,0.000769,0.001191,0.000678,0.001332,0.001286,0.002951,0.007533,0.007437,0.002735,0.002897,0.004447,0.000837,0.002688,0.000826,0.001195,0.001124,0.001729,0.003593,0.002225,0.001192,0.005218,0.005183,0.000573,0.000702,0.00137,0.005443,0.000604,0.000819,0.000766,0.005091,0.010059,0.001931,0.007161,0.008064,0.006672,0.001179,0.003512,0.000803,0.003224,0.002067,0.004094,0.000771,0.000943,0.017807,0.002107,0.0022,0.002449,0.001749,0.00282,0.001595,0.006084,0.002007,0.000581,0.004096,0.000666,0.002987,0.004083,0.006264,0.000866,0.00142,0.048529,0.005657,0.001224,0.003256,0.000918,0.004145,0.016388,0.007638,0.012645,0.000656,0.001305,0.000669,0.021728,0.002088,0.005727,0.000904,0.001882,0.000781,0.000611,0.001189,0.001679,0.002323,0.019139,0.019502,0.001772,0.002178,0.001561,0.007255,0.023766,0.001814,0.001011,0.007321,0.002003,0.00504,0.008422,0.000774,0.00143,0.00613,0.001048,0.001133,0.000806,0.001843,0.002851,0.002298,0.002828,0.001551,0.003642,0.000952,0.000677,0.001045,0.000975,0.005267,0.001564,0.003916,0.001213,0.000913,0.000735,0.006935,0.002503,0.001987,0.004584,0.000978,0.000939,0.004518,0.00099,0.005916,0.001168,0.001518,0.001249,0.001426,0.000867,0.001408,0.00242,0.005504,0.001782,0.000756,0.00127,0.00247,0.000629,0.001408,0.016983,0.002461,0.001875,0.001277,0.00293,0.009511,0.003242,0.003001,0.017534,0.00264,0.00498,0.008243,0.000773,0.004087,0.000736,0.003678,0.001073,0.0041,0.001155,0.00115,0.003252,0.000858,0.001062,0.001324,0.002012,0.010128,0.006374,0.003956,0.003465,0.001515,0.001575,0.021697,0.003108,0.000694,0.000955,0.000812,0.001729,0.000399,0.00104,0.001517,0.001798,0.001173,0.003992,0.001492,0.001432,0.001144,0.002809,0.00476,0.001233,0.000837,0.001044,0.001179,0.002011,0.016355,0.001657
1,id_001897cda,0.00066,0.00101,0.002284,0.002414,0.000958,0.001474,0.003088,0.007465,0.00659,0.004119,0.004709,0.004537,0.000669,0.008861,0.000979,0.000877,0.000792,0.001597,0.001471,0.003306,0.004484,0.002545,0.000691,0.001276,0.000806,0.001377,0.000929,0.000666,0.001696,0.001596,0.001088,0.002883,0.00129,0.001525,0.000775,0.0006,0.002764,0.003373,0.004592,0.001799,0.001382,0.00106,0.000513,0.00244,0.001001,0.001625,0.000859,0.001626,0.002516,0.004549,0.001614,0.011117,0.014283,0.000666,0.004058,0.003701,0.003389,0.001487,0.003811,0.000947,0.000932,0.003972,0.000983,0.006538,0.00183,0.003253,0.001032,0.004154,0.000922,0.000807,0.001034,0.006441,0.002031,0.000689,0.001619,0.000731,0.001142,0.001649,0.001437,0.000984,0.001856,0.001021,0.000874,0.003048,0.002421,0.006547,0.000824,0.003041,0.008265,0.006019,0.001392,0.00093,0.001012,0.001918,0.00451,0.003325,0.001475,0.001576,0.001209,0.004756,0.001328,0.0047,0.00388,0.002144,0.000749,0.001863,0.002133,0.003797,0.001422,0.001287,0.001521,0.007256,0.002602,0.001546,0.002115,0.004151,0.045096,0.007188,0.006881,0.003211,0.000862,0.000791,0.00437,0.001461,0.003191,0.000908,0.001226,0.001967,0.001842,0.00129,0.000882,0.001433,0.001119,0.003938,0.000894,0.001317,0.001465,0.000793,0.000734,0.001339,0.000707,0.000788,0.001061,0.001026,0.001963,0.001446,0.001305,0.004224,0.004987,0.005188,0.00201,0.017528,0.000783,0.014835,0.003752,0.002477,0.001929,0.065709,0.016018,0.001485,0.017228,0.00151,0.002627,0.001128,0.002496,0.000924,0.000895,0.001821,0.000856,0.000722,0.001394,0.002343,0.001868,0.029378,0.001283,0.002884,0.00612,0.00096,0.000723,0.001078,0.001032,0.002925,0.004674,0.000998,0.013017,0.000944,0.003377,0.002583,0.004184,0.000723,0.000444,0.001201,0.001747,0.002955,0.005852,0.001497,0.001118,0.001064,0.004335,0.000547,0.007064,0.000722,0.011016,0.001292,0.003983,0.003168
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.001307,0.001326,0.001984,0.012357,0.017698,0.004921,0.003847,0.004427,0.000579,0.015394,0.032218,0.001861,0.000832,0.002884,0.001581,0.002043,0.002979,0.004885,0.004542,0.002314,0.002619,0.002328,0.001222,0.003325,0.001787,0.001363,0.001846,0.002593,0.007094,0.002782,0.002401,0.002145,0.00287,0.001364,0.000883,0.000785,0.002759,0.000963,0.001045,0.001214,0.004056,0.006454,0.002991,0.013265,0.006367,0.006604,0.001007,0.001012,0.001138,0.003696,0.002115,0.000848,0.001426,0.001495,0.030979,0.002269,0.005029,0.002245,0.002615,0.001493,0.00168,0.00813,0.001479,0.001091,0.002908,0.000708,0.004596,0.003574,0.003421,0.001163,0.000973,0.014572,0.004592,0.001906,0.00147,0.000983,0.003028,0.014815,0.012356,0.046277,0.009026,0.001013,0.000911,0.005703,0.004115,0.001699,0.001164,0.001384,0.001897,0.00101,0.000736,0.001548,0.001643,0.004172,0.005656,0.001349,0.000934,0.001468,0.004416,0.014574,0.001978,0.001029,0.00381,0.001911,0.003526,0.029076,0.001883,0.002752,0.005198,0.00282,0.000732,0.001858,0.001036,0.002917,0.002843,0.001938,0.001179,0.001878,0.000799,0.001005,0.001086,0.001211,0.002251,0.001396,0.002423,0.001495,0.000674,0.001276,0.004629,0.002897,0.001123,0.006943,0.001305,0.001882,0.003403,0.004359,0.004487,0.001003,0.002645,0.000996,0.002671,0.001325,0.000775,0.006404,0.007006,0.003371,0.005211,0.001466,0.001629,0.000942,0.001112,0.007621,0.002447,0.004322,0.002053,0.004129,0.007369,0.00139,0.001561,0.002175,0.001094,0.002573,0.003714,0.000872,0.00264,0.00097,0.005509,0.001781,0.003476,0.001232,0.001655,0.000608,0.001106,0.001288,0.001659,0.001337,0.016395,0.042332,0.003465,0.002883,0.0044,0.002179,0.008949,0.002125,0.00216,0.001345,0.001172,0.004387,0.001059,0.002198,0.002583,0.002595,0.001031,0.001768,0.001014,0.001789,0.001015,0.001296,0.002656,0.008923,0.003779,0.000997,0.001647,0.002328,0.000927,0.002869
4,id_0027f1083,0.001941,0.00191,0.002313,0.011852,0.013364,0.00388,0.004587,0.002756,0.000919,0.009083,0.016433,0.002346,0.000764,0.001229,0.001636,0.001795,0.00356,0.004315,0.00272,0.00225,0.004175,0.004759,0.001084,0.003566,0.001373,0.001295,0.001318,0.001507,0.006499,0.003329,0.002266,0.003983,0.003289,0.000864,0.000821,0.000902,0.00244,0.00091,0.000744,0.001122,0.006659,0.007119,0.002696,0.010467,0.007131,0.009252,0.001011,0.002641,0.000921,0.003978,0.002129,0.001994,0.001334,0.001204,0.006945,0.002685,0.002787,0.002872,0.002863,0.00161,0.001484,0.005327,0.001977,0.002287,0.0042,0.0008,0.004797,0.002782,0.003147,0.001042,0.001319,0.016704,0.006005,0.002248,0.002174,0.001165,0.004799,0.035387,0.004459,0.006531,0.001462,0.00095,0.000798,0.011244,0.002132,0.001741,0.001043,0.002149,0.001445,0.001628,0.000671,0.001557,0.001085,0.005848,0.005856,0.00081,0.001367,0.002258,0.004137,0.018271,0.001935,0.001164,0.004369,0.002445,0.002949,0.008645,0.000693,0.001483,0.00391,0.000991,0.001617,0.00077,0.002692,0.002286,0.003222,0.002851,0.003466,0.002294,0.001065,0.00132,0.001191,0.001163,0.002952,0.00141,0.003256,0.000848,0.000897,0.000855,0.005754,0.001788,0.001211,0.00506,0.001065,0.00242,0.003316,0.001667,0.005945,0.001117,0.002725,0.001548,0.001985,0.001043,0.001397,0.003267,0.005092,0.003289,0.001454,0.001522,0.002907,0.001672,0.001395,0.011309,0.001679,0.002787,0.001736,0.005339,0.003553,0.007197,0.002079,0.004945,0.002043,0.003408,0.004117,0.00126,0.002238,0.001352,0.005363,0.001601,0.004731,0.002077,0.001009,0.001104,0.00144,0.002359,0.003691,0.002216,0.009359,0.009396,0.002345,0.00213,0.001592,0.002198,0.015851,0.002187,0.00106,0.00151,0.001643,0.003504,0.001078,0.001423,0.003557,0.002397,0.00118,0.001948,0.003254,0.001346,0.001177,0.001059,0.003724,0.001987,0.001718,0.001147,0.001882,0.002267,0.000635,0.002229
