<a href="https://colab.research.google.com/github/takapy0210/kaggle_MoA/blob/master/20201023_1_tf_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting

In [None]:
DEBUG = False  # 動作確認用
IS_KAGGLE = False  # kaggleのカーネルで実行する場合はTrue
PRETRAIN = False  # nonscoredで重みを計算し、転移学習させる場合はTrue
LABEL_SMOOTHING = True
SEED = 42
N_GENE_COMP = 70  # PCA, SVDなどの削減手法の次元数
N_CELL_COMP = 10  # PCA, SVDなどの削減手法の次元数
N_GENE_CLUSTER = 30  # k-meansのクラスタ数
C_CELL_CLUSTER = 5  # k-meansのクラスタ数

if LABEL_SMOOTHING:
    SMOOTHING = 0.001

In [None]:
! ls

drive  sample_data


In [None]:
import os
import sys
if IS_KAGGLE:
    DATA_DIR = '../input/lish-moa/'
    sys.path.append('../input/iterative-stratification/iterative-stratification-master')
else:
    ! pip install iterative-stratification
    DATA_DIR = '/content/drive/My Drive/ML/kaggle/MoA/data/raw/'

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold



In [None]:
print(os.listdir(DATA_DIR))

['sample_submission.csv', 'test_features.csv', 'train_features.csv', 'train_targets_nonscored.csv', 'train_targets_scored.csv', '.DS_Store']


# import

In [None]:
import json
import time
import re
import random
import datetime
import pickle
import gc
import warnings

import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm

# sklearn
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA, TruncatedSVD, FactorAnalysis
from sklearn.cluster import KMeans

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.modules.loss import _WeightedLoss

# tensorflow
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow.keras import layers, regularizers, Sequential, backend, callbacks, optimizers, metrics, losses

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_colwidth', 300)
pd.options.display.float_format = '{:.3f}'.format
%matplotlib inline

# Data load

In [None]:
train_features = pd.read_csv(DATA_DIR + 'train_features.csv')
train_targets_scored = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
train_targets_nonscored = pd.read_csv(DATA_DIR + 'train_targets_nonscored.csv')
test_features = pd.read_csv(DATA_DIR + 'test_features.csv')
sample_submission = pd.read_csv(DATA_DIR + 'sample_submission.csv')

In [None]:
display(train_features.shape, train_targets_scored.shape, train_targets_nonscored.shape, test_features.shape, sample_submission.shape)

(23814, 876)

(23814, 207)

(23814, 403)

(3982, 876)

(3982, 207)

# 特徴量生成

In [None]:
def data_filter(train, test):
    """cp_type = ctl_vehicleのデータは除外（unknownデータなので）
    """
    train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
    test = test[test['cp_type']!='ctl_vehicle'].reset_index(drop=True)
    train = train.drop('cp_type', axis=1)
    test = test.drop('cp_type', axis=1)
    return train, test


def one_hot_encoder(df, cols):
    """sklearnのOneHotEncoderでEncodingを行う
    """
    for col in cols:
        ohe = OneHotEncoder(sparse=False)
        ohe_df = pd.DataFrame(ohe.fit_transform(df[[col]])).add_prefix(col + '_ohe_')
        # 元のDFに結合
        df = pd.concat([df, ohe_df], axis=1)
        # oheしたカラムを除外
        df = df.drop(col, axis=1)
    return df


def kmeans(df, n_cluster):
    """k-meansで教師なし学習（クラスタ分類）
    """
    km = KMeans(
            n_clusters=n_cluster,
            init='k-means++',
            random_state=SEED
        )
    y_km = km.fit(df)

    return y_km.labels_


def feature_stats(df):
    """基礎統計量の追加
    """
    df.loc[:, 'g-sum'] = df[GENES].sum(axis=1)
    df.loc[:, 'g-mean'] = df[GENES].mean(axis=1)
    df.loc[:, 'g-std'] = df[GENES].std(axis=1)
    df.loc[:, 'g-kurt'] = df[GENES].kurtosis(axis=1)
    df.loc[:, 'g-skew'] = df[GENES].skew(axis=1)

    df.loc[:, 'c-sum'] = df[CELLS].sum(axis=1)
    df.loc[:, 'c-mean'] = df[CELLS].mean(axis=1)
    df.loc[:, 'c-std'] = df[CELLS].std(axis=1)
    df.loc[:, 'c-kurt'] = df[CELLS].kurtosis(axis=1)
    df.loc[:, 'c-skew'] = df[CELLS].skew(axis=1)

    df.loc[:, 'gc-sum'] = df[GENES + CELLS].sum(axis=1)
    df.loc[:, 'gc-mean'] = df[GENES + CELLS].mean(axis=1)
    df.loc[:, 'gc-std'] = df[GENES + CELLS].std(axis=1)
    df.loc[:, 'gc-kurt'] = df[GENES + CELLS].kurtosis(axis=1)
    df.loc[:, 'gc-skew'] = df[GENES + CELLS].skew(axis=1)
    return df


def feature_pca(df, col_list, n_comp, col_type='g'):
    """PCAの特徴量を生成
    """
    pca = (PCA(n_components=n_comp, random_state=SEED).fit_transform(df[col_list]))
    pca_df = pd.DataFrame(pca, columns=[f'{col_type}-pca_{i}' for i in range(n_comp)])
    df = pd.concat([df, pca_df], axis=1)
    return df


def feature_svd(df, col_list, n_comp, col_type='g'):
    """SVDの特徴量を生成
    """
    svd = (TruncatedSVD(n_components=n_comp, random_state=SEED).fit_transform(df[col_list]))
    svd_df = pd.DataFrame(svd, columns=[f'{col_type}-svd_{i}' for i in range(n_comp)])
    df = pd.concat([df, svd_df], axis=1)
    return df


def feature_fa(df, col_list, n_comp, col_type='g'):
    """FAの特徴量を生成
    """
    svd = (FactorAnalysis(n_components=n_comp, random_state=SEED).fit_transform(df[col_list]))
    svd_df = pd.DataFrame(svd, columns=[f'{col_type}-fa_{i}' for i in range(n_comp)])
    df = pd.concat([df, svd_df], axis=1)
    return df


def feature_squared(df, cells):
    """二乗を計算
    """
    for feature in cells:
        df.loc[:, f'{feature}_squared'] = df[feature] ** 2
    return df


def variance_threshold(df, n=0.4):
    """分散がしきい値以下の特徴量を捨てる
    """
    var_thresh = VarianceThreshold(threshold=n)
    df = pd.DataFrame(var_thresh.fit_transform(df))
    return df


def feature_engineering(train_features, test_features):

    global GENES, CELLS

    # カラムのリストを保持
    GENES = [col for col in train_features.columns if col.startswith('g-')]
    CELLS = [col for col in train_features.columns if col.startswith('c-')]

    # filter
    train, test = data_filter(train_features, test_features)

    df = pd.concat([train, test])
    df = df.reset_index(drop=True)

    # k-means cluster
    # df.loc[:, 'g-cluster'] = kmeans(df[GENES], n_cluster=N_GENE_CLUSTER)
    # df.loc[:, 'c-cluster'] = kmeans(df[CELLS], n_cluster=C_CELL_CLUSTER)

    # Stats feature
    df = feature_stats(df)

    # squared
    df = feature_squared(df, CELLS)

    # PCA feature
    df = feature_pca(df, GENES, n_comp=N_GENE_COMP, col_type='g')
    df = feature_pca(df, CELLS, n_comp=N_CELL_COMP, col_type='c')

    # SVD feature
    df = feature_svd(df, GENES, n_comp=N_GENE_COMP, col_type='g')
    df = feature_svd(df, CELLS, n_comp=N_CELL_COMP, col_type='c')

    # FA feature
    df = feature_fa(df, GENES, n_comp=N_GENE_COMP, col_type='g')
    df = feature_fa(df, CELLS, n_comp=N_CELL_COMP, col_type='c')

    # カテゴリのDFとnotカテゴリのDFに分割（標準化&エンコードのため）
    # cat_columns = ['cp_time', 'cp_dose', 'g-cluster', 'c-cluster']
    cat_columns = ['cp_time', 'cp_dose']
    cat_df = df[['sig_id'] + cat_columns]
    num_df = df.drop(['sig_id'] + cat_columns, axis=1)

    # VarianceThreshold
    # num_df = variance_threshold(num_df, n=0.4)

    # 正規化
    # sscaler = StandardScaler()
    # num_df.iloc[:, :] = sscaler.fit_transform(num_df)

    # Robust Scaler
    rscaler = RobustScaler()
    num_df.iloc[:, :] = rscaler.fit_transform(num_df)

    # min max Scaler
    """
    mmscaler = MinMaxScaler()
    num_df.iloc[:, :] = mmscaler.fit_transform(num_df)
    """

    # カテゴリ変数をone-hot-encode
    cat_df = one_hot_encoder(cat_df, cat_columns)

    # カテゴリDFとnotカテゴリDFを結合
    df = pd.concat([cat_df, num_df], axis=1)

    # trainとtestに再分割
    train = df.iloc[:len(train), :]
    test = df.iloc[len(train):, :]
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    return train, test

In [None]:
%%time
train, test = feature_engineering(train_features, test_features)

CPU times: user 44.3 s, sys: 5.05 s, total: 49.3 s
Wall time: 32.4 s


In [None]:
# カラムのリストを取得
target_cols = train_targets_scored.drop('sig_id', axis=1).columns.values.tolist()  # 目的変数のカラムリスト
target_cols_non_scored = train_targets_nonscored.drop('sig_id', axis=1).columns.values.tolist()  # pretrain用の目的変数カラムリスト
feature_cols = [c for c in train.columns if c not in ['sig_id']]  # 学習に使用するカラムリスト

# train用のデータセット生成
train = train.merge(train_targets_scored, on='sig_id')
target = train[train_targets_scored.columns]

# pretrain用のデータセット生成
train_non_scored = train[['sig_id'] + feature_cols].merge(train_targets_nonscored, on='sig_id')
target_non_scored = train_non_scored[train_targets_nonscored.columns]

In [None]:
display(train.shape, train.head(), test.shape, test.head(), target.shape, target.head())

(21948, 1429)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,...,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,1.0,0.0,0.0,1.0,0.0,1.079,0.622,-0.307,-0.698,-0.2,-0.896,-0.996,-0.033,0.547,-0.233,1.112,0.124,0.504,-0.377,0.207,-0.837,-0.76,0.667,0.287,-0.396,-0.537,-1.156,-1.354,0.841,0.403,0.318,0.199,0.091,-0.346,0.548,0.294,-0.717,-0.043,-0.294,-0.176,0.225,0.269,-0.466,-0.479,-0.317,0.021,-0.22,-1.316,0.328,-0.965,0.244,-0.198,-0.018,0.054,0.518,-0.101,-0.443,0.576,0.417,-0.097,-0.161,-0.581,0.248,0.58,1.006,0.524,-0.089,0.595,-0.341,-0.192,0.026,0.165,-0.367,0.617,-0.663,-0.737,-0.515,0.015,-0.465,-0.34,-0.574,-1.011,-0.365,-0.882,0.664,-0.173,-0.207,-0.104,-1.949,1.542,0.058,-0.142,0.44,-1.657,-0.352,0.186,-0.982,-1.618,-1.726,0.768,-0.98,0.119,0.52,0.493,-0.074,-0.473,0.422,-0.732,-0.813,-1.175,0.146,-0.653,-1.529,0.749,-0.772,-0.069,1.295,0.478,-1.002,0.425,-0.111,-0.034,0.44,2.151,0.118,1.24,1.229,0.038,-0.9,-1.234,-0.14,-0.342,-0.566,-0.029,-0.242,-0.642,0.507,-0.818,0.324,0.31,-0.322,-0.419,-0.437,-0.452,0.537,1.181,-1.125,0.2,1.107,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0.0,0.0,1.0,1.0,0.0,0.083,0.465,0.191,0.069,1.151,0.485,0.265,0.377,-0.36,0.66,-1.176,-0.409,-0.128,0.431,0.115,0.285,0.326,0.314,-0.591,0.349,0.953,-0.109,-0.303,-0.409,0.283,-0.418,0.482,0.134,-0.52,-0.155,-0.082,-0.35,1.074,-0.243,0.221,-0.191,0.143,0.509,-0.251,-0.11,0.179,-0.355,-1.288,0.476,1.904,0.993,0.458,-0.368,0.183,-0.835,1.226,-0.454,-0.293,-0.748,-0.494,-0.119,-0.543,0.499,0.332,-0.763,-0.594,1.906,-0.139,1.033,0.43,-0.635,-0.839,0.108,-0.439,0.002,0.106,0.11,-0.241,-0.059,0.376,-0.529,0.921,0.331,1.273,0.66,0.774,-1.57,-1.179,-0.034,-0.64,-0.359,0.612,0.175,0.268,0.216,0.037,-0.325,-0.664,-0.181,-0.056,-0.463,-1.222,0.495,-0.388,0.153,0.719,-0.132,-0.815,-0.393,-0.548,-0.134,-0.218,0.248,0.069,0.823,-0.005,-0.135,0.296,0.91,-0.301,-0.45,0.027,-1.251,1.918,0.824,0.03,-0.032,0.071,-0.844,1.166,-0.207,-0.216,-0.719,1.13,0.232,-0.041,0.13,-0.753,-0.31,-0.485,0.483,0.736,-0.025,0.934,-0.418,-1.327,0.21,-0.465,-0.537,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0.0,1.0,0.0,1.0,0.0,0.641,0.648,1.335,-0.085,-0.02,1.132,0.202,0.242,0.029,1.02,-0.517,-0.542,-0.071,-1.717,0.606,-0.421,0.387,-0.281,-0.2,-0.042,-0.056,-0.813,-1.746,0.28,0.452,0.542,-1.366,4.432,-0.037,1.258,-2.057,0.731,0.492,0.174,-0.028,-1.129,0.104,-0.105,4.85,1.196,0.413,-0.512,0.058,-0.393,-0.415,-0.654,0.192,-0.784,-0.452,0.551,-0.078,-0.218,0.569,0.221,-0.687,-0.48,1.163,0.083,-0.158,-1.488,0.476,0.917,-0.353,-0.32,-0.154,-1.033,-0.038,0.107,0.554,0.437,0.656,0.733,0.872,0.444,-0.079,-1.261,0.256,-0.404,-0.128,0.254,-0.367,-0.804,0.697,-0.099,-0.101,-0.117,0.344,0.748,-0.213,-0.986,0.837,0.4,0.38,1.591,-0.752,0.239,0.651,0.13,-0.304,0.779,-0.691,-0.348,0.732,0.73,-0.409,-0.828,-0.241,-0.629,0.162,-0.086,-0.045,-0.894,-0.092,-0.382,-0.133,-0.698,-0.44,-0.279,-0.032,0.311,-0.913,1.063,0.175,0.225,0.95,-0.016,-0.511,-0.315,0.295,-0.187,-0.302,-1.011,-0.645,0.612,-0.05,-0.063,-0.002,-0.38,-0.72,-1.032,-0.775,0.406,1.203,-0.021,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0.0,1.0,0.0,1.0,0.0,-0.511,-0.229,-0.324,0.597,4.54,-0.713,-1.937,0.202,-0.102,-1.151,-0.858,-0.351,-0.858,-0.548,-2.365,1.044,-0.151,-0.832,0.184,-2.64,2.109,-1.18,-0.82,-0.97,-1.416,0.026,-0.876,0.166,-0.298,-1.134,-0.857,0.086,-2.759,0.855,0.518,-1.607,-1.628,-1.184,2.742,-0.517,-0.685,0.029,0.168,-1.756,-0.689,-0.865,-0.263,0.086,-0.44,0.765,-2.67,-0.553,-1.46,1.913,-0.597,-1.468,-0.508,0.456,0.102,-1.578,0.324,1.572,-1.188,-1.279,-1.035,0.735,0.232,0.13,1.495,0.209,-0.455,1.038,-0.857,1.291,1.308,-3.127,-1.029,-0.714,1.116,-0.742,-0.892,0.115,0.156,0.464,0.777,1.067,0.014,-0.432,1.218,0.236,1.615,4.398,2.516,0.635,-1.727,1.347,0.185,-1.858,0.066,-0.837,1.03,-0.143,2.107,-0.805,1.593,-0.133,-0.884,-0.404,-0.252,-0.707,-0.441,-0.123,-1.787,0.295,-0.025,-0.503,-1.877,0.034,0.243,-0.63,-1.328,0.628,-0.281,1.424,-1.498,0.452,0.302,0.937,0.204,0.104,2.708,0.031,-0.373,-1.256,0.311,-0.482,-0.019,1.002,1.101,-0.328,-0.265,0.606,-0.498,0.429,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0.0,0.0,1.0,0.0,1.0,-0.321,-0.39,0.803,0.781,1.595,-0.727,-0.251,-0.163,-0.808,0.672,-0.265,-0.494,-0.961,1.103,0.231,0.25,-0.006,-1.691,-0.599,-0.562,-0.305,-0.221,0.872,0.249,1.293,0.758,0.805,-0.433,-1.526,0.26,-0.105,0.77,0.736,-0.077,1.082,-0.675,-1.411,1.009,0.684,-0.157,0.19,0.149,1.085,0.222,0.634,1.041,1.925,-0.971,-0.338,-0.758,-0.062,-0.333,0.654,-0.039,0.118,1.291,-0.078,1.24,0.768,0.71,0.695,1.757,0.137,1.352,1.141,0.498,-0.008,0.136,-0.19,-0.511,0.728,0.631,0.846,-0.687,-1.088,-0.72,0.193,-0.294,0.72,-1.673,0.443,-1.158,-0.8,0.511,-1.158,-0.11,-2.112,0.715,1.549,0.878,-0.82,0.733,1.39,1.002,-0.208,-0.755,1.116,-0.23,0.202,-0.446,0.131,-1.123,-1.259,0.615,1.127,-0.734,-0.26,0.461,-0.529,-0.197,-2.695,-0.913,0.323,0.885,-2.322,-0.611,0.048,-1.718,1.29,-3.648,0.711,-1.162,-1.463,-0.663,1.228,-1.263,1.582,0.719,0.611,1.64,1.791,0.738,-1.317,-0.196,-1.976,0.871,0.375,0.903,0.501,0.115,-2.314,-0.808,-0.401,-1.734,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(3624, 1223)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,...,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1117,1118,1119,1120,1121,1122,1123,1124,1125,1126,1127,1128,1129,1130,1131,1132,1133,1134,1135,1136,1137,1138,1139,1140,1141,1142,1143,1144,1145,1146,1147,1148,1149,1150,1151,1152,1153,1154,1155,1156,1157,1158,1159,1160,1161,1162,1163,1164,1165,1166,1167,1168,1169,1170,1171,1172,1173,1174,1175,1176,1177,1178,1179,1180,1181,1182,1183,1184,1185,1186,1187,1188,1189,1190,1191,1192,1193,1194,1195,1196,1197,1198,1199,1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211,1212,1213,1214,1215,1216
0,id_0004d9e33,1.0,0.0,0.0,1.0,0.0,-0.543,0.171,-0.55,0.498,1.742,-0.132,-0.185,0.25,-0.286,1.692,-0.273,-0.359,1.226,1.371,0.043,0.869,-0.296,-0.036,-0.347,-0.75,-0.591,0.208,-0.039,0.409,0.199,0.543,0.764,0.578,-0.298,-0.157,-0.558,-0.046,-0.791,-0.518,-1.457,-0.002,0.412,0.551,-0.289,-1.21,-0.091,-1.03,0.249,0.6,1.122,0.017,0.482,-1.056,-0.571,-0.607,-0.054,-1.467,0.886,0.626,-0.17,-0.108,0.439,5.563,0.334,0.152,-0.265,1.628,0.017,0.9,-0.301,1.413,0.149,-0.322,-0.055,-0.477,1.055,-0.335,0.72,0.348,0.564,0.408,0.38,-0.013,0.354,0.022,0.247,-0.591,-0.445,0.094,-0.849,0.231,0.601,-0.165,-0.033,-0.511,-0.088,-0.45,0.134,1.348,-0.279,0.753,-0.14,-0.29,-0.188,-0.264,-0.169,0.173,0.095,-0.616,-0.317,-0.485,-0.051,-0.533,0.309,0.697,-0.628,-0.1,-1.135,0.391,-0.036,-0.419,-0.666,-0.216,-0.097,0.139,-0.814,-0.219,-0.722,-0.588,0.528,0.062,0.355,-0.303,-0.205,0.123,-0.615,-0.196,1.032,-0.039,-0.213,0.64,0.109,-0.253,0.138,-0.021,-1.372,0.204,-0.577,0.138,...,-0.155,0.574,0.022,-0.194,-0.806,0.106,-0.662,-0.415,-0.104,0.61,0.44,-1.401,-0.793,0.714,-1.085,-0.519,0.922,-0.057,-0.575,0.795,0.047,-0.144,-0.528,0.616,-0.224,0.01,0.428,0.402,-1.081,0.204,0.274,-0.11,-1.188,0.268,-0.052,0.383,0.189,-0.386,0.645,-0.306,1.002,0.368,-0.7,0.303,-0.346,0.569,1.117,-1.114,0.145,0.348,0.359,0.479,0.444,-0.133,0.657,0.649,-0.205,-0.906,0.823,-0.103,0.206,-0.914,-1.013,-0.386,-0.329,0.494,-1.078,0.249,-0.823,-0.174,0.039,0.603,0.157,-0.304,-0.208,-0.23,0.301,-0.019,0.499,0.463,-0.257,0.482,-0.666,0.186,0.537,-0.233,-0.8,0.121,-0.402,0.279,1.775,1.168,-0.236,-0.632,-0.787,0.278,-0.207,0.569,0.465,-0.421,-0.273,-0.435,0.805,0.344,-0.321,0.01,0.492,0.009,-0.288,0.399,0.119,0.049,-0.817,0.852,0.873,0.131,0.439,1.217,-0.197,0.579,-0.13,-1.281,-0.25,-0.518,-0.147,-0.82,0.82,0.772,0.47,0.483,-1.102,0.318,-0.089,0.186,-0.441,0.069,-0.228,-0.564,-0.735,-0.407,0.197,-0.918,-1.086,-0.288,0.177,-0.906,0.61,-0.609,-0.557,0.089
1,id_001897cda,0.0,0.0,1.0,1.0,0.0,-0.177,0.278,1.02,-0.508,-0.39,-0.283,-1.86,0.449,-0.311,-0.025,0.864,0.276,-1.014,-0.302,0.096,-0.691,-0.765,-1.038,-0.172,-0.901,-0.618,0.803,0.598,0.0,0.464,-0.121,-0.13,1.967,-0.819,-0.643,-0.477,-0.096,-0.319,0.215,-0.702,1.311,-0.942,-1.112,0.313,-0.028,-0.127,-0.387,1.366,0.145,-1.025,0.716,0.0,2.472,-0.117,-1.031,-1.047,-0.074,0.012,-0.875,-1.088,0.624,1.21,0.309,-0.782,0.43,1.155,-0.627,0.48,-0.2,-0.014,0.655,0.874,0.931,0.752,-0.338,0.737,-1.287,-0.75,0.663,-1.198,-0.177,-1.281,-0.794,-0.735,-0.342,-0.811,1.014,0.376,0.534,0.869,1.557,-0.575,0.442,-0.664,-0.259,-0.041,-0.158,-0.141,0.361,0.821,-0.681,-1.534,0.681,-0.793,-0.1,0.743,0.792,-0.036,0.334,0.968,-0.748,-0.667,0.416,0.228,-0.811,-0.85,0.144,0.476,-0.939,0.287,0.645,1.323,1.29,1.502,0.319,0.182,-0.261,0.186,-0.488,0.586,-0.718,0.251,0.442,0.844,-0.317,0.359,-0.001,0.436,-0.227,0.477,-0.566,0.297,0.02,-0.793,0.81,0.106,-1.279,0.057,0.05,...,1.515,-0.2,-0.471,-0.401,-1.007,-0.1,-0.144,-0.018,-0.529,0.279,-0.34,1.637,-0.02,-0.678,-1.412,0.181,1.57,-0.255,0.295,-0.996,-0.522,-0.064,0.079,0.502,-1.323,0.031,-0.296,-0.153,-0.214,0.249,0.025,1.041,-0.313,0.496,0.14,-0.771,-0.454,0.091,0.221,0.92,0.274,0.778,0.006,-0.479,-0.238,-0.273,0.548,-0.288,0.608,0.287,-0.027,0.485,-0.037,-0.458,0.823,0.948,-0.33,-0.057,-0.799,1.346,1.076,0.333,1.239,0.286,-0.377,-0.303,-0.296,-1.647,-1.093,1.197,0.416,-1.376,-0.603,1.045,-0.399,-1.307,-2.011,0.677,-0.002,0.537,1.181,-0.587,-0.61,0.374,0.362,-0.147,0.344,-0.456,0.693,-0.354,-0.9,0.482,-0.704,-0.885,-1.657,-0.027,-0.514,-0.021,-0.974,1.275,-0.854,-0.261,0.848,-0.53,-0.083,0.08,0.555,0.814,-0.312,-1.142,0.566,0.009,-0.033,-0.593,0.58,0.498,0.16,0.819,-0.424,0.732,0.149,0.17,0.56,0.504,1.024,-0.282,0.233,-0.014,0.859,-0.189,0.196,1.115,-0.615,-1.086,0.192,0.016,0.099,-0.67,0.005,-0.479,1.065,0.413,1.132,0.253,-0.322,-0.398,0.388,1.086,0.898,1.898
2,id_00276f245,1.0,0.0,0.0,0.0,1.0,0.495,0.24,0.267,0.48,-0.636,-1.067,0.632,-0.196,0.907,0.133,1.324,-1.048,1.087,-0.763,0.109,-0.398,-0.923,1.749,2.023,-0.12,-0.557,-0.789,-0.771,0.0,-0.858,0.84,-0.076,0.44,1.243,-0.517,0.267,0.567,0.071,0.271,-0.706,0.697,0.369,-0.006,0.507,-0.211,-0.048,-0.017,-0.831,-0.432,0.279,-1.599,0.67,0.122,0.364,-0.237,0.63,0.095,-0.416,0.421,-0.41,-0.253,0.97,0.371,0.924,-0.379,-0.429,-0.482,0.853,-0.255,-0.597,-1.612,-0.765,1.017,-1.008,-0.405,-0.113,0.33,-0.332,0.072,0.087,0.624,-0.574,-0.626,-0.484,0.723,-0.627,-0.245,0.212,-0.008,0.825,0.801,-0.746,0.232,0.086,-1.186,-0.143,0.14,-0.27,-0.059,0.804,-0.768,0.113,-0.056,-0.096,0.296,-0.188,0.498,-0.359,-0.949,-0.827,0.704,-1.147,-0.023,0.46,1.241,0.379,1.633,0.188,0.444,0.519,0.056,-0.312,0.597,0.183,1.164,0.127,-0.306,-0.247,-0.331,-0.351,1.75,-1.223,-0.588,-0.248,4.502,-1.188,-1.266,0.267,0.99,0.388,-0.282,2.26,0.545,-1.078,1.041,0.931,0.028,-0.105,1.108,...,0.331,0.047,-0.785,0.347,0.053,-0.442,-0.765,-0.186,0.361,-1.551,-0.425,-1.275,0.673,0.339,-0.865,1.112,-1.008,-0.87,0.776,-1.315,-1.34,0.675,-1.017,-0.5,0.132,0.537,-0.004,0.457,-0.241,0.311,-1.13,-0.593,-0.634,0.567,0.395,-0.243,-0.616,1.22,-0.496,-0.461,-1.037,-0.877,-0.072,0.137,0.135,-0.691,0.74,1.315,0.014,-0.214,-1.453,-0.583,-0.574,1.479,2.606,2.55,-0.813,0.102,-0.527,-0.217,0.353,-0.018,-1.221,0.027,0.727,-0.146,0.209,0.748,1.069,0.588,0.533,-0.426,-0.931,0.942,-0.148,0.664,-0.202,-0.694,-0.976,-0.455,0.804,-0.205,-0.585,-0.506,-0.094,-0.384,-0.427,0.207,-1.353,-0.585,0.41,-0.269,0.645,0.022,0.392,-1.619,-0.024,0.021,-2.244,-0.527,-1.479,1.162,0.025,1.034,0.699,-0.216,0.08,-1.407,-0.144,-0.156,-0.047,0.093,-0.271,-0.497,-0.251,-0.328,-0.224,-0.775,0.594,-0.325,0.323,0.26,0.22,-1.066,0.092,0.659,0.71,0.375,0.417,0.431,-1.169,-0.001,-0.926,-0.798,-1.333,0.427,0.232,0.135,0.563,-0.474,0.326,-0.072,-1.187,0.042,0.476,0.656,-0.28,-0.943,0.619,-0.485
3,id_0027f1083,0.0,1.0,0.0,1.0,0.0,-0.394,-1.305,1.663,0.233,-0.637,0.001,0.545,0.709,0.269,-1.2,-0.219,-0.209,-0.036,0.024,0.206,-0.17,0.769,0.076,-0.135,0.168,0.161,0.888,-0.125,0.0,-1.061,-0.045,0.903,-0.36,0.574,-0.195,0.203,0.579,0.168,0.66,-0.892,-0.635,1.556,-0.076,-0.275,-0.162,1.859,1.983,-0.899,0.34,0.267,-1.145,-0.239,0.073,0.23,-0.508,0.197,-0.337,0.104,0.145,0.442,0.853,1.034,0.515,-0.009,0.223,-0.73,0.666,0.261,0.684,1.058,-0.758,-0.578,-0.431,0.078,1.153,0.213,0.024,1.555,-0.762,0.566,0.422,1.086,-0.318,0.505,-0.518,0.631,-0.395,-0.162,-0.696,-0.68,-1.381,-0.379,0.34,0.042,-0.266,-0.271,0.32,-0.404,0.144,-0.117,-0.219,0.458,0.378,0.57,0.339,-0.339,0.688,-0.462,-0.455,-0.225,0.37,0.656,0.527,0.413,1.073,0.407,-0.305,0.097,0.544,-0.334,-0.007,-0.231,-0.159,1.152,0.418,0.67,-0.27,0.098,-0.148,0.402,0.2,0.161,-0.806,-0.411,0.317,-0.435,-0.001,-0.137,0.938,-0.676,0.785,0.872,-0.833,0.539,-0.114,-0.423,0.984,0.887,-0.188,...,0.731,0.699,0.001,0.707,0.015,0.545,0.448,-0.276,0.3,0.091,-0.167,0.495,-0.086,-1.013,0.435,-0.394,-0.168,0.546,0.449,-0.137,-0.172,-0.474,-0.2,-0.077,-0.181,-0.114,0.334,0.656,-0.118,0.223,-0.262,-0.023,0.744,-0.791,0.259,-0.888,0.334,0.641,-0.122,0.849,0.034,0.009,0.608,0.499,-0.326,-1.139,0.351,1.036,-1.42,-0.383,-0.321,-1.18,0.713,-1.303,-0.11,0.632,0.561,0.795,0.793,-0.07,-0.95,1.423,-0.301,-0.647,-0.257,-1.371,0.516,-0.416,-0.357,0.604,-0.79,0.319,0.84,0.257,0.34,0.742,0.626,-0.942,-0.295,0.7,0.798,0.714,0.954,0.507,0.211,0.207,-0.159,0.274,0.258,0.027,-0.513,-0.761,-0.964,0.108,0.108,-0.084,-0.106,-0.575,-0.121,-0.009,-0.167,-0.179,-0.335,0.746,0.16,0.451,-0.198,0.806,0.681,-0.4,-0.196,0.218,-0.699,-0.276,0.031,0.951,-0.296,-0.568,1.384,0.519,0.653,-0.922,0.061,0.56,-1.18,1.021,-0.994,0.337,-0.218,0.071,-0.707,-0.42,-0.917,-0.438,0.304,0.601,-0.664,0.39,-0.281,0.351,-0.957,1.47,-0.486,-0.672,-0.837,0.847,0.157,0.601,0.447,0.478
4,id_006fc47b8,0.0,1.0,0.0,0.0,1.0,0.377,0.618,-0.71,-1.832,0.6,-0.329,-0.437,1.119,0.375,-0.069,-0.328,-0.686,-0.201,-0.588,-1.841,0.024,-0.865,0.71,-0.753,-1.38,-0.662,0.959,-0.481,0.513,0.534,-0.549,-0.318,-0.553,-0.398,-0.926,0.148,-0.18,0.286,-0.066,-0.952,0.809,-0.601,-0.933,-0.72,-1.282,0.005,-0.381,1.162,1.63,-0.702,0.346,-0.424,-0.73,0.031,-0.058,-0.236,0.176,-0.066,-0.794,-0.657,1.081,1.042,-0.092,0.282,1.151,-0.824,0.763,0.996,0.118,0.788,-0.233,-0.401,-0.888,-0.189,-0.897,-0.12,-1.966,-0.379,0.119,-1.026,0.224,-0.909,0.683,-1.047,0.499,-0.119,0.057,1.013,-1.071,-0.443,0.11,0.451,-0.142,-0.132,-1.186,1.046,-2.269,0.12,-1.237,-0.865,0.139,-0.83,0.155,1.158,-0.728,0.715,1.077,0.424,-0.738,-0.303,-0.202,-1.244,-0.019,1.237,0.419,0.252,0.754,-0.692,-0.254,-0.224,0.449,0.187,-0.254,0.6,0.632,1.025,-1.137,0.17,-0.976,0.337,-0.447,-0.135,-0.485,0.914,-1.293,0.64,-1.472,-1.315,-1.217,-0.853,-0.593,1.965,-0.665,-1.735,1.175,-0.561,-1.122,-0.417,-0.917,...,0.672,0.921,-0.866,-0.59,-0.558,-0.167,-0.863,-0.076,0.382,-0.77,0.689,-0.122,-0.421,0.369,0.864,-1.419,-0.247,-0.491,-0.252,-1.263,0.032,-1.593,0.494,-0.718,0.795,-1.012,-0.359,0.782,0.817,0.522,-0.842,0.478,0.657,-0.005,0.308,-0.488,0.33,-0.64,0.393,0.258,0.448,0.02,0.011,0.245,-0.483,-1.446,0.64,0.568,0.659,0.447,-0.418,-0.472,0.543,1.574,0.32,-0.274,-0.116,0.903,0.132,0.001,1.304,0.01,0.688,0.263,-0.318,0.276,1.157,0.345,1.469,-1.559,-0.067,0.057,0.453,1.867,-0.23,-1.623,-0.524,0.471,-0.836,0.034,1.019,0.624,-1.119,-0.313,-0.21,-0.675,-0.559,0.231,-0.492,0.612,-0.291,0.502,0.161,1.355,0.694,0.677,-0.051,0.441,-0.894,1.389,0.444,-0.828,-0.394,0.527,-0.2,0.253,-0.738,-0.138,-0.616,-0.418,-1.78,0.872,0.433,0.933,-0.484,0.664,1.111,-0.081,0.542,-0.26,1.005,-0.614,1.228,0.048,-1.031,0.754,-0.301,0.716,0.299,0.825,0.716,0.026,-0.557,-0.469,-0.64,0.675,-0.073,-0.941,-0.129,0.818,1.32,0.051,0.866,0.213,-0.59,0.301,-0.817,-0.083,-0.556,-1.948


(21948, 207)

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
display(train_non_scored.shape, train_non_scored.head(), target_non_scored.shape, target_non_scored.head())

(21948, 1625)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,...,neurotransmitter,neurotrophic_agent,nfkb_activator,niemann-pick_c1-like_1_protein_antagonist,nitric_oxide_scavenger,nitric_oxide_stimulant,nociceptin_orphanin_fq_(nop)_receptor_antagonist,non-nucleoside_reverse_transcriptase_inhibitor,nootropic_agent,nop_receptor_agonist,noradrenaline_uptake_inhibitor,norepinephrine_inhibitor,notch_signaling_inhibitor,ntpdase_inhibitor,nucleoside_reverse_transcriptase_inhibitor,oct_activator,omega_3_fatty_acid_stimulant,osteoclast_inhibitor,oxidizing_agent,oxidosqualene_cyclase_inhibitor,oxytocin_receptor_agonist,oxytocin_receptor_antagonist,p21_activated_kinase_inhibitor,p53_activator,p53_inhibitor,paba_antagonist,pdk1_inhibitor,penicillin_binding_protein_inhibitor,peptidase_inhibitor,perk_inhibitor,phosphatase_inhibitor,phosphofructokinase_inhibitor,phospholipase_activator,pim_inhibitor,pka_activator,pka_inhibitor,plasminogen_activator_inhibitor,platelet_activating_factor_receptor_antagonist,platelet_aggregation_inhibitor,plk_inhibitor,porcupine_inhibitor,potassium_channel_agonist,potassium_channel_blocker,prmt_inhibitor,progestogen_hormone,prolactin_inhibitor,prostacyclin_analog,prostanoid_receptor_agonist,prostanoid_receptor_inhibitor,protease_inhibitor,protein_kinase_activator,protein_synthesis_stimulant,psychoactive_drug,purine_antagonist,purinergic_receptor_antagonist,pxr_ligand,pyruvate_dehydrogenase_inhibitor,pyruvate_kinase_isozyme_activator,quorum_sensing_signaling_modulator,rad51_inhibitor,rage_receptor_antagonist,receptor_tyrosine_protein_kinase_inhibitor,reducing_agent,ret_inhibitor,ret_tyrosine_kinase_inhibitor,reverse_transcriptase_inhibitor,ribosomal_protein_inhibitor,ripk_inhibitor,rna_synthesis_inhibitor,ror_inverse_agonist,rsv_fusion_inhibitor,s100a9_inhibitor,sars_coronavirus_3c-like_protease_inhibitor,sedative,selective_estrogen_receptor_modulator_(serm),selective_serotonin_reuptake_inhibitor_(ssri),serine_protease_inhibitor,serine_threonine_kinase_inhibitor,serine_threonine_protein_phosphatase_activator,serotonin_release_inhibitor,sirt_activator,sirt_inhibitor,smoothened_receptor_agonist,sodium_calcium_exchange_inhibitor,sodium_channel_activator,sodium_channel_blocker,somatostatin_receptor_agonist,sphingosine_1_phosphate_receptor_agonist,sphingosine_kinase_inhibitor,src_activator,srebp_inhibitor,stat_inhibitor,stearoyl-coa_desaturase_inhibitor,steroid_sulfatase_inhibitor,steroidal_progestin,sterol_demethylase_inhibitor,sterol_regulatory_element_binding_protein_(srebp)_inhibitor,steryl_sulfatase_inhibitor,structural_glycoprotein_antagonist,succinimide_antiepileptic,sulfonylurea,synthetic_estrogen,t_cell_inhibitor,tankyrase_inhibitor,telomerase_inhibitor,testosterone_receptor_antagonist,thiazide_diuretic,thioredoxin_inhibitor,thrombopoietin_receptor_agonist,thromboxane_receptor_antagonist,thromboxane_synthase_inhibitor,thyroid_hormone_inhibitor,thyroid_hormone_stimulant,thyrotropin_releasing_hormone_receptor_agonist,tie_inhibitor,tissue_transglutaminase_inhibitor,topical_anesthetic,topical_sunscreen_agent,trace_amine_associated_receptor_agonist,trace_amine_associated_receptor_antagonist,trail_modulator,transient_receptor_potential_channel_agonist,triacylglycerol_lipase_inhibitor,tricyclic_antidepressant,tryptophan_hydroxylase_inhibitor,tyrosinase_inhibitor,tyrosine_hydroxylase_inhibitor,tyrosine_phosphatase_inhibitor,ubiquitin-conjugating_enzyme_inhibitor,ubiquitin_ligase_inhibitor,urease_inhibitor,uric_acid_diuretic,uricase_inhibitor,uricosuric,urotensin_receptor_agonist,urotensin_receptor_antagonist,vasoconstrictor,vasodilator,vasopressin_receptor_agonist,vasopressin_receptor_antagonist,ve-cadherin_antagonist,vesicular_monoamine_transporter_inhibitor,vitamin_k_antagonist,voltage-gated_calcium_channel_ligand,voltage-gated_potassium_channel_activator,voltage-gated_sodium_channel_blocker,wdr5_mll_interaction_inhibitor,wnt_agonist,xanthine_oxidase_inhibitor,xiap_inhibitor
0,id_000644bb2,1.0,0.0,0.0,1.0,0.0,1.079,0.622,-0.307,-0.698,-0.2,-0.896,-0.996,-0.033,0.547,-0.233,1.112,0.124,0.504,-0.377,0.207,-0.837,-0.76,0.667,0.287,-0.396,-0.537,-1.156,-1.354,0.841,0.403,0.318,0.199,0.091,-0.346,0.548,0.294,-0.717,-0.043,-0.294,-0.176,0.225,0.269,-0.466,-0.479,-0.317,0.021,-0.22,-1.316,0.328,-0.965,0.244,-0.198,-0.018,0.054,0.518,-0.101,-0.443,0.576,0.417,-0.097,-0.161,-0.581,0.248,0.58,1.006,0.524,-0.089,0.595,-0.341,-0.192,0.026,0.165,-0.367,0.617,-0.663,-0.737,-0.515,0.015,-0.465,-0.34,-0.574,-1.011,-0.365,-0.882,0.664,-0.173,-0.207,-0.104,-1.949,1.542,0.058,-0.142,0.44,-1.657,-0.352,0.186,-0.982,-1.618,-1.726,0.768,-0.98,0.119,0.52,0.493,-0.074,-0.473,0.422,-0.732,-0.813,-1.175,0.146,-0.653,-1.529,0.749,-0.772,-0.069,1.295,0.478,-1.002,0.425,-0.111,-0.034,0.44,2.151,0.118,1.24,1.229,0.038,-0.9,-1.234,-0.14,-0.342,-0.566,-0.029,-0.242,-0.642,0.507,-0.818,0.324,0.31,-0.322,-0.419,-0.437,-0.452,0.537,1.181,-1.125,0.2,1.107,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0.0,0.0,1.0,1.0,0.0,0.083,0.465,0.191,0.069,1.151,0.485,0.265,0.377,-0.36,0.66,-1.176,-0.409,-0.128,0.431,0.115,0.285,0.326,0.314,-0.591,0.349,0.953,-0.109,-0.303,-0.409,0.283,-0.418,0.482,0.134,-0.52,-0.155,-0.082,-0.35,1.074,-0.243,0.221,-0.191,0.143,0.509,-0.251,-0.11,0.179,-0.355,-1.288,0.476,1.904,0.993,0.458,-0.368,0.183,-0.835,1.226,-0.454,-0.293,-0.748,-0.494,-0.119,-0.543,0.499,0.332,-0.763,-0.594,1.906,-0.139,1.033,0.43,-0.635,-0.839,0.108,-0.439,0.002,0.106,0.11,-0.241,-0.059,0.376,-0.529,0.921,0.331,1.273,0.66,0.774,-1.57,-1.179,-0.034,-0.64,-0.359,0.612,0.175,0.268,0.216,0.037,-0.325,-0.664,-0.181,-0.056,-0.463,-1.222,0.495,-0.388,0.153,0.719,-0.132,-0.815,-0.393,-0.548,-0.134,-0.218,0.248,0.069,0.823,-0.005,-0.135,0.296,0.91,-0.301,-0.45,0.027,-1.251,1.918,0.824,0.03,-0.032,0.071,-0.844,1.166,-0.207,-0.216,-0.719,1.13,0.232,-0.041,0.13,-0.753,-0.31,-0.485,0.483,0.736,-0.025,0.934,-0.418,-1.327,0.21,-0.465,-0.537,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0.0,1.0,0.0,1.0,0.0,0.641,0.648,1.335,-0.085,-0.02,1.132,0.202,0.242,0.029,1.02,-0.517,-0.542,-0.071,-1.717,0.606,-0.421,0.387,-0.281,-0.2,-0.042,-0.056,-0.813,-1.746,0.28,0.452,0.542,-1.366,4.432,-0.037,1.258,-2.057,0.731,0.492,0.174,-0.028,-1.129,0.104,-0.105,4.85,1.196,0.413,-0.512,0.058,-0.393,-0.415,-0.654,0.192,-0.784,-0.452,0.551,-0.078,-0.218,0.569,0.221,-0.687,-0.48,1.163,0.083,-0.158,-1.488,0.476,0.917,-0.353,-0.32,-0.154,-1.033,-0.038,0.107,0.554,0.437,0.656,0.733,0.872,0.444,-0.079,-1.261,0.256,-0.404,-0.128,0.254,-0.367,-0.804,0.697,-0.099,-0.101,-0.117,0.344,0.748,-0.213,-0.986,0.837,0.4,0.38,1.591,-0.752,0.239,0.651,0.13,-0.304,0.779,-0.691,-0.348,0.732,0.73,-0.409,-0.828,-0.241,-0.629,0.162,-0.086,-0.045,-0.894,-0.092,-0.382,-0.133,-0.698,-0.44,-0.279,-0.032,0.311,-0.913,1.063,0.175,0.225,0.95,-0.016,-0.511,-0.315,0.295,-0.187,-0.302,-1.011,-0.645,0.612,-0.05,-0.063,-0.002,-0.38,-0.72,-1.032,-0.775,0.406,1.203,-0.021,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0.0,1.0,0.0,1.0,0.0,-0.511,-0.229,-0.324,0.597,4.54,-0.713,-1.937,0.202,-0.102,-1.151,-0.858,-0.351,-0.858,-0.548,-2.365,1.044,-0.151,-0.832,0.184,-2.64,2.109,-1.18,-0.82,-0.97,-1.416,0.026,-0.876,0.166,-0.298,-1.134,-0.857,0.086,-2.759,0.855,0.518,-1.607,-1.628,-1.184,2.742,-0.517,-0.685,0.029,0.168,-1.756,-0.689,-0.865,-0.263,0.086,-0.44,0.765,-2.67,-0.553,-1.46,1.913,-0.597,-1.468,-0.508,0.456,0.102,-1.578,0.324,1.572,-1.188,-1.279,-1.035,0.735,0.232,0.13,1.495,0.209,-0.455,1.038,-0.857,1.291,1.308,-3.127,-1.029,-0.714,1.116,-0.742,-0.892,0.115,0.156,0.464,0.777,1.067,0.014,-0.432,1.218,0.236,1.615,4.398,2.516,0.635,-1.727,1.347,0.185,-1.858,0.066,-0.837,1.03,-0.143,2.107,-0.805,1.593,-0.133,-0.884,-0.404,-0.252,-0.707,-0.441,-0.123,-1.787,0.295,-0.025,-0.503,-1.877,0.034,0.243,-0.63,-1.328,0.628,-0.281,1.424,-1.498,0.452,0.302,0.937,0.204,0.104,2.708,0.031,-0.373,-1.256,0.311,-0.482,-0.019,1.002,1.101,-0.328,-0.265,0.606,-0.498,0.429,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0.0,0.0,1.0,0.0,1.0,-0.321,-0.39,0.803,0.781,1.595,-0.727,-0.251,-0.163,-0.808,0.672,-0.265,-0.494,-0.961,1.103,0.231,0.25,-0.006,-1.691,-0.599,-0.562,-0.305,-0.221,0.872,0.249,1.293,0.758,0.805,-0.433,-1.526,0.26,-0.105,0.77,0.736,-0.077,1.082,-0.675,-1.411,1.009,0.684,-0.157,0.19,0.149,1.085,0.222,0.634,1.041,1.925,-0.971,-0.338,-0.758,-0.062,-0.333,0.654,-0.039,0.118,1.291,-0.078,1.24,0.768,0.71,0.695,1.757,0.137,1.352,1.141,0.498,-0.008,0.136,-0.19,-0.511,0.728,0.631,0.846,-0.687,-1.088,-0.72,0.193,-0.294,0.72,-1.673,0.443,-1.158,-0.8,0.511,-1.158,-0.11,-2.112,0.715,1.549,0.878,-0.82,0.733,1.39,1.002,-0.208,-0.755,1.116,-0.23,0.202,-0.446,0.131,-1.123,-1.259,0.615,1.127,-0.734,-0.26,0.461,-0.529,-0.197,-2.695,-0.913,0.323,0.885,-2.322,-0.611,0.048,-1.718,1.29,-3.648,0.711,-1.162,-1.463,-0.663,1.228,-1.263,1.582,0.719,0.611,1.64,1.791,0.738,-1.317,-0.196,-1.976,0.871,0.375,0.903,0.501,0.115,-2.314,-0.808,-0.401,-1.734,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(21948, 403)

Unnamed: 0,sig_id,abc_transporter_expression_enhancer,abl_inhibitor,ace_inhibitor,acetylcholine_release_enhancer,adenosine_deaminase_inhibitor,adenosine_kinase_inhibitor,adenylyl_cyclase_inhibitor,age_inhibitor,alcohol_dehydrogenase_inhibitor,aldehyde_dehydrogenase_activator,aldose_reductase_inhibitor,alpha_mannosidase_inhibitor,ampk_inhibitor,androgen_biosynthesis_inhibitor,angiotensin_receptor_agonist,antacid,anthelmintic,antihypertensive,antimetabolite,antipruritic,antirheumatic_drug,antiseptic,antispasmodic,antithyroid_agent,antitussive,anxiolytic,ap_inhibitor,apolipoprotein_expression_enhancer,apoptosis_inhibitor,arf_inhibitor,aryl_hydrocarbon_receptor_agonist,aryl_hydrocarbon_receptor_antagonist,aspartic_protease_inhibitor,atherogenesis_inhibitor,atherosclerosis_formation_inhibitor,atp-sensitive_potassium_channel_agonist,atp-sensitive_potassium_channel_inhibitor,atp_channel_activator,atp_channel_blocker,atp_citrase_lyase_inhibitor,autophagy_inducer,axl_kinase_inhibitor,bacterial_atpase_inhibitor,bacterial_permeability_inducer,bacterial_protein_synthesis_inhibitor,benzodiazepine_receptor_antagonist,beta_catenin_inhibitor,beta_lactamase_inhibitor,beta_secretase_inhibitor,big1_inhibitor,bile_acid,biliverdin_reductase_a_activator,bone_resorption_inhibitor,botulin_neurotoxin_inhibitor,bradykinin_receptor_antagonist,breast_cancer_resistance_protein_inhibitor,bronchodilator,calcitonin_antagonist,calcium_channel_activator,calmodulin_inhibitor,calpain_inhibitor,camp_stimulant,capillary_stabilizing_agent,car_agonist,car_antagonist,carboxylesterase_inhibitor,carcinogen,cardiac_glycoside,carnitine_palmitoyltransferase_inhibitor,caspase_inhibitor,cathepsin_inhibitor,cc_chemokine_receptor_agonist,cdc_inhibitor,cdk_expression_enhancer,cell_cycle_inhibitor,cell_proliferation_inhibitor,ceramidase_inhibitor,cftr_channel_agonist,cftr_channel_antagonist,chitin_inhibitor,chloride_channel_activator,choleretic_agent,cholinergic_receptor_agonist,cholinesterase_inhibitor,clk_inhibitor,coenzyme_a_precursor,collagenase_inhibitor,collapsin_response_mediator_protein_stimulant,coloring_agent,complement_antagonist,complement_inhibitor,contraceptive_agent,contrast_agent,corticosteroid_antagonist,cyclin_d_inhibitor,cysteine_peptidase_inhibitor,cytidine_deaminase_inhibitor,cytokine_production_inhibitor,dehydrogenase_inhibitor,deubiquitinase_inhibitor,diacylglycerol_kinase_inhibitor,diacylglycerol_o_acyltransferase_inhibitor,differentiation_inducer,dihydroorotate_dehydrogenase_inhibitor,dihydropteroate_synthase_inhibitor,dihydropyrimidine_dehydrogenase_inhibitor,dna_dependent_protein_kinase_inhibitor,dna_methyltransferase_inhibitor,dna_polymerase_inhibitor,dna_repair_enzyme_inhibitor,dna_synthesis_inhibitor,dopamine_release_enhancer,dot1l_inhibitor,dynamin_inhibitor,dyrk_inhibitor,dystrophin_stimulant,endothelin_receptor_antagonist,enkephalinase_inhibitor,ephrin_inhibitor,epoxide_hydolase_inhibitor,etv1_inhibitor,eukaryotic_translation_initiation_factor_inhibitor,exportin_antagonist,fabi_inhibitor,farnesyl_pyrophosphate_synthase_inhibitor,fatty_acid_receptor_antagonist,fatty_acid_synthase_inhibitor,folate_receptor_ligand,free_fatty_acid_receptor_agonist,"fungal_1,3-beta-d-glucan_synthase_inhibitor",fungal_ergosterol_inhibitor,fungal_lanosterol_demethylase_inhibitor,fxr_agonist,fxr_antagonist,g_protein-coupled_receptor_agonist,g_protein-coupled_receptor_antagonist,g_protein_signaling_inhibitor,gaba_gated_chloride_channel_blocker,gaba_receptor_modulator,gaba_uptake_inhibitor,gap_junction_modulator,gastrin_inhibitor,gat_inhibitor,glcnac_phosphotransferase_inhibitor,gli_antagonist,glp_receptor_agonist,glucagon_receptor_antagonist,glucocorticoid_receptor_antagonist,glucokinase_activator,...,neurotransmitter,neurotrophic_agent,nfkb_activator,niemann-pick_c1-like_1_protein_antagonist,nitric_oxide_scavenger,nitric_oxide_stimulant,nociceptin_orphanin_fq_(nop)_receptor_antagonist,non-nucleoside_reverse_transcriptase_inhibitor,nootropic_agent,nop_receptor_agonist,noradrenaline_uptake_inhibitor,norepinephrine_inhibitor,notch_signaling_inhibitor,ntpdase_inhibitor,nucleoside_reverse_transcriptase_inhibitor,oct_activator,omega_3_fatty_acid_stimulant,osteoclast_inhibitor,oxidizing_agent,oxidosqualene_cyclase_inhibitor,oxytocin_receptor_agonist,oxytocin_receptor_antagonist,p21_activated_kinase_inhibitor,p53_activator,p53_inhibitor,paba_antagonist,pdk1_inhibitor,penicillin_binding_protein_inhibitor,peptidase_inhibitor,perk_inhibitor,phosphatase_inhibitor,phosphofructokinase_inhibitor,phospholipase_activator,pim_inhibitor,pka_activator,pka_inhibitor,plasminogen_activator_inhibitor,platelet_activating_factor_receptor_antagonist,platelet_aggregation_inhibitor,plk_inhibitor,porcupine_inhibitor,potassium_channel_agonist,potassium_channel_blocker,prmt_inhibitor,progestogen_hormone,prolactin_inhibitor,prostacyclin_analog,prostanoid_receptor_agonist,prostanoid_receptor_inhibitor,protease_inhibitor,protein_kinase_activator,protein_synthesis_stimulant,psychoactive_drug,purine_antagonist,purinergic_receptor_antagonist,pxr_ligand,pyruvate_dehydrogenase_inhibitor,pyruvate_kinase_isozyme_activator,quorum_sensing_signaling_modulator,rad51_inhibitor,rage_receptor_antagonist,receptor_tyrosine_protein_kinase_inhibitor,reducing_agent,ret_inhibitor,ret_tyrosine_kinase_inhibitor,reverse_transcriptase_inhibitor,ribosomal_protein_inhibitor,ripk_inhibitor,rna_synthesis_inhibitor,ror_inverse_agonist,rsv_fusion_inhibitor,s100a9_inhibitor,sars_coronavirus_3c-like_protease_inhibitor,sedative,selective_estrogen_receptor_modulator_(serm),selective_serotonin_reuptake_inhibitor_(ssri),serine_protease_inhibitor,serine_threonine_kinase_inhibitor,serine_threonine_protein_phosphatase_activator,serotonin_release_inhibitor,sirt_activator,sirt_inhibitor,smoothened_receptor_agonist,sodium_calcium_exchange_inhibitor,sodium_channel_activator,sodium_channel_blocker,somatostatin_receptor_agonist,sphingosine_1_phosphate_receptor_agonist,sphingosine_kinase_inhibitor,src_activator,srebp_inhibitor,stat_inhibitor,stearoyl-coa_desaturase_inhibitor,steroid_sulfatase_inhibitor,steroidal_progestin,sterol_demethylase_inhibitor,sterol_regulatory_element_binding_protein_(srebp)_inhibitor,steryl_sulfatase_inhibitor,structural_glycoprotein_antagonist,succinimide_antiepileptic,sulfonylurea,synthetic_estrogen,t_cell_inhibitor,tankyrase_inhibitor,telomerase_inhibitor,testosterone_receptor_antagonist,thiazide_diuretic,thioredoxin_inhibitor,thrombopoietin_receptor_agonist,thromboxane_receptor_antagonist,thromboxane_synthase_inhibitor,thyroid_hormone_inhibitor,thyroid_hormone_stimulant,thyrotropin_releasing_hormone_receptor_agonist,tie_inhibitor,tissue_transglutaminase_inhibitor,topical_anesthetic,topical_sunscreen_agent,trace_amine_associated_receptor_agonist,trace_amine_associated_receptor_antagonist,trail_modulator,transient_receptor_potential_channel_agonist,triacylglycerol_lipase_inhibitor,tricyclic_antidepressant,tryptophan_hydroxylase_inhibitor,tyrosinase_inhibitor,tyrosine_hydroxylase_inhibitor,tyrosine_phosphatase_inhibitor,ubiquitin-conjugating_enzyme_inhibitor,ubiquitin_ligase_inhibitor,urease_inhibitor,uric_acid_diuretic,uricase_inhibitor,uricosuric,urotensin_receptor_agonist,urotensin_receptor_antagonist,vasoconstrictor,vasodilator,vasopressin_receptor_agonist,vasopressin_receptor_antagonist,ve-cadherin_antagonist,vesicular_monoamine_transporter_inhibitor,vitamin_k_antagonist,voltage-gated_calcium_channel_ligand,voltage-gated_potassium_channel_activator,voltage-gated_sodium_channel_blocker,wdr5_mll_interaction_inhibitor,wnt_agonist,xanthine_oxidase_inhibitor,xiap_inhibitor
0,id_000644bb2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
display(len(target_cols), len(target_cols_non_scored), len(feature_cols))

206

402

1222

# Definition

In [None]:
# seedの固定
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(SEED)

# Model

In [None]:
# Function to calculate the mean log loss of the targets including clipping
def mean_log_loss(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    metrics = []
    for target in range(206):
        metrics.append(log_loss(y_true[:, target], y_pred[:, target]))
    return np.mean(metrics)


def create_model_3l(shape):
    inp = tf.keras.layers.Input(shape = (shape))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(0.4914099166744246)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(1159, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.18817607797795838)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(960, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.12542057776853896)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(1811, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.20175242230280122)(x)
    out = tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation = 'sigmoid'))(x)
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    opt = tf.optimizers.Adam(learning_rate = LEARNING_RATE)
    opt = tfa.optimizers.Lookahead(opt, sync_period = 10)
    model.compile(optimizer = opt, 
                  loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = 0.0015),
                  metrics = tf.keras.metrics.BinaryCrossentropy())
    return model

# HyperParameters

In [None]:
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
BATCH_SIZE = 128

if DEBUG:
    EPOCHS = 3
    SEED_AVG = [0, 101]
    NFOLDS = 3
    VERBOSE = 1
else:
    EPOCHS = 80
    SEED_AVG = [0, 101, 202, 303 ,404]
    NFOLDS = 5
    VERBOSE = 0

In [None]:
print(DEBUG)

False


# Training

In [None]:
def run_training(tr_idx, va_idx, fold, seed):

    # SEEDの設定    
    seed_everything(seed)

    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    predictions = np.zeros((len(test), target.iloc[:, 1:].shape[1]))

    # trainとvalidationのDFを生成
    train_df = train.iloc[tr_idx]
    valid_df = train.iloc[va_idx]
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values

    K.clear_session()
    model = create_model_3l(len(feature_cols))
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_binary_crossentropy',
                                                        mode = 'min',
                                                        patience = 10,
                                                        restore_best_weights = True,
                                                        verbose = VERBOSE)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_binary_crossentropy',
                                                        mode = 'min',
                                                        factor = 0.3,
                                                        patience = 3,
                                                        verbose = VERBOSE)
    
    model.fit(x_train, y_train,
                validation_data = (x_valid, y_valid),
                epochs = EPOCHS, 
                batch_size = BATCH_SIZE,
                callbacks = [early_stopping, reduce_lr],
                verbose = VERBOSE
            )

    oof[va_idx] = model.predict(x_valid)
    predictions = model.predict(test[feature_cols].values)

    print(f'FOLD:{fold}\t Done.')
    
    return oof, predictions

### train

In [None]:
%%time
test_pred = []
oof_pred = []

oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED_AVG:
    print(f'============== Training SEED is {seed} ==============')
    start = time.time()

    oof_fold = np.zeros((len(train), len(target_cols)))
    predictions_fold = np.zeros((len(test), len(target_cols)))

    # CV
    mskf = MultilabelStratifiedKFold(n_splits=NFOLDS, random_state=seed, shuffle=True)

    for i_fold, (tr_idx, va_idx) in enumerate(mskf.split(X=train, y=target)):
        # 学習
        oof_, pred_ = run_training(tr_idx, va_idx, i_fold, seed)

        # fold間のスコア
        oof_fold += oof_
        predictions_fold += pred_ / NFOLDS

    oof_score = mean_log_loss(target.drop('sig_id', axis=1).values, oof_fold)
    print(f'fold mean log loss score is {oof_score}')

    # seed間のスコア
    oof += oof_fold / len(SEED_AVG)
    predictions += predictions_fold / len(SEED_AVG)
    
    elapsed_time = time.time() - start
    print(f'SEED: {seed} Elapsed_time:{elapsed_time:.4f} sec')

seed_log_loss = mean_log_loss(target.drop('sig_id', axis=1).values, oof)
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

FOLD:0	 Done.
FOLD:1	 Done.
FOLD:2	 Done.
FOLD:3	 Done.
FOLD:4	 Done.
fold mean log loss score is 0.016344476072546264
SEED: 0 Elapsed_time:574.5840 sec
FOLD:0	 Done.
FOLD:1	 Done.
FOLD:2	 Done.
FOLD:3	 Done.
FOLD:4	 Done.
fold mean log loss score is 0.016303291097579383
SEED: 101 Elapsed_time:568.3477 sec
FOLD:0	 Done.
FOLD:1	 Done.
FOLD:2	 Done.
FOLD:3	 Done.
FOLD:4	 Done.
fold mean log loss score is 0.0163255925168121
SEED: 202 Elapsed_time:636.9056 sec
FOLD:0	 Done.
FOLD:1	 Done.
FOLD:2	 Done.
FOLD:3	 Done.
FOLD:4	 Done.
fold mean log loss score is 0.016275077514560664
SEED: 303 Elapsed_time:568.6055 sec
FOLD:0	 Done.
FOLD:1	 Done.
FOLD:2	 Done.
FOLD:3	 Done.
FOLD:4	 Done.
fold mean log loss score is 0.016348484453462257
SEED: 404 Elapsed_time:597.8755 sec
Our out of folds log loss for our seed blend model is 0.01601140679038031
CPU times: user 42min 36s, sys: 4min 4s, total: 46min 41s
Wall time: 49min 7s


# Submission

In [None]:
# 予測値の設定
train[target_cols] = oof
test[target_cols] = predictions
print(len(target_cols))

valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
print(train_targets_scored.shape, valid_results.shape)

206
(23814, 207) (23814, 207)


In [None]:
y_true = train_targets_scored[target_cols].values
y_pred = valid_results[target_cols].values

score = 0
for i in range(len(target_cols)):
    score_ = log_loss(y_true[:, i], y_pred[:, i])
    score += score_ / target.shape[1]
    
print("CV log_loss: ", score)

CV log_loss:  0.014847399190757107


ベストのCV log_loss: 0.014564886043488417

---
PCA： 0.014782093830112145  
PCA + SVD: 0.014742253743297256  
PCA + SVD + kmeans: 0.014748313722749552  
PCA + SVD + FA: 0.014717234352890492  
PCA + SVD + FA + g**2特徴量: 0.014847399190757107

In [None]:
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission.csv', index=False)