<a href="https://colab.research.google.com/github/takapy0210/kaggle_MoA/blob/master/20201023_2_tf_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Config

In [52]:
class Config():
    # setting
    is_debug = False
    is_kaggle = False
    is_pretrain = False
    is_sub_clipping = True

    # features
    do_variancethreshold = False
    do_kmeans = False
    do_filter = True
    do_feature_squared = True
    do_feature_stats = True
    do_feature_pca = True
    do_feature_svd = True
    do_feature_fa = True

    # constant
    seed = 42
    n_gene_comp = 70
    n_cell_comp = 10
    n_gene_kmeans_cluster = 30
    n_cell_kmeans_cluster = 5
    n_variance_threshold = 0.7
    p_min = 0.001
    p_max = 0.999
    scaler = 'Rankgauss' # Standard, Robust, MinMax

    # HyperParameters
    epochs = 80
    seed_avg = [0, 101, 202, 303 ,404]
    nfold = 5
    verbose = 0
    lr = 1e-3
    weight_decay = 1e-5
    batch_size = 128

config = Config()

In [2]:
! ls

drive  sample_data


In [3]:
import os
import sys
if config.is_kaggle:
    DATA_DIR = '../input/lish-moa/'
    sys.path.append('../input/iterative-stratification/iterative-stratification-master')
else:
    ! pip install iterative-stratification
    DATA_DIR = '/content/drive/My Drive/ML/kaggle/MoA/data/raw/'

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold



In [4]:
print(os.listdir(DATA_DIR))

['sample_submission.csv', 'test_features.csv', 'train_features.csv', 'train_targets_nonscored.csv', 'train_targets_scored.csv', '.DS_Store', 'config.txt']


# import

In [5]:
import json
import time
import re
import random
import datetime
import pickle
import gc
import warnings

import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm

# sklearn
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, OneHotEncoder, LabelEncoder, QuantileTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA, TruncatedSVD, FactorAnalysis
from sklearn.cluster import KMeans

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.modules.loss import _WeightedLoss

# tensorflow
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow.keras import layers, regularizers, Sequential, backend, callbacks, optimizers, metrics, losses

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_colwidth', 300)
pd.options.display.float_format = '{:.3f}'.format
%matplotlib inline

# Data load

In [6]:
train_features = pd.read_csv(DATA_DIR + 'train_features.csv')
train_targets_scored = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
train_targets_nonscored = pd.read_csv(DATA_DIR + 'train_targets_nonscored.csv')
test_features = pd.read_csv(DATA_DIR + 'test_features.csv')
sample_submission = pd.read_csv(DATA_DIR + 'sample_submission.csv')

In [7]:
display(train_features.shape, train_targets_scored.shape, train_targets_nonscored.shape, test_features.shape, sample_submission.shape)

(23814, 876)

(23814, 207)

(23814, 403)

(3982, 876)

(3982, 207)

# 特徴量生成

In [8]:
def data_filter(train, test):
    """cp_type = ctl_vehicleのデータは除外（unknownデータなので）
    """
    train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
    test = test[test['cp_type']!='ctl_vehicle'].reset_index(drop=True)
    train = train.drop('cp_type', axis=1)
    test = test.drop('cp_type', axis=1)
    return train, test


def one_hot_encoder(df, cols):
    """sklearnのOneHotEncoderでEncodingを行う
    """
    for col in cols:
        ohe = OneHotEncoder(sparse=False)
        ohe_df = pd.DataFrame(ohe.fit_transform(df[[col]])).add_prefix(col + '_ohe_')
        # 元のDFに結合
        df = pd.concat([df, ohe_df], axis=1)
        # oheしたカラムを除外
        df = df.drop(col, axis=1)
    return df


def kmeans(df, n_cluster):
    """k-meansで教師なし学習（クラスタ分類）
    """
    km = KMeans(
            n_clusters=n_cluster,
            init='k-means++',
            random_state=SEED
        )
    y_km = km.fit(df)

    return y_km.labels_


def feature_stats(df):
    """基礎統計量の追加
    """
    df.loc[:, 'g-sum'] = df[GENES].sum(axis=1)
    df.loc[:, 'g-mean'] = df[GENES].mean(axis=1)
    df.loc[:, 'g-std'] = df[GENES].std(axis=1)
    df.loc[:, 'g-kurt'] = df[GENES].kurtosis(axis=1)
    df.loc[:, 'g-skew'] = df[GENES].skew(axis=1)

    df.loc[:, 'c-sum'] = df[CELLS].sum(axis=1)
    df.loc[:, 'c-mean'] = df[CELLS].mean(axis=1)
    df.loc[:, 'c-std'] = df[CELLS].std(axis=1)
    df.loc[:, 'c-kurt'] = df[CELLS].kurtosis(axis=1)
    df.loc[:, 'c-skew'] = df[CELLS].skew(axis=1)

    df.loc[:, 'gc-sum'] = df[GENES + CELLS].sum(axis=1)
    df.loc[:, 'gc-mean'] = df[GENES + CELLS].mean(axis=1)
    df.loc[:, 'gc-std'] = df[GENES + CELLS].std(axis=1)
    df.loc[:, 'gc-kurt'] = df[GENES + CELLS].kurtosis(axis=1)
    df.loc[:, 'gc-skew'] = df[GENES + CELLS].skew(axis=1)
    return df


def feature_pca(df, col_list, n_comp, col_type='g', seed=config.seed):
    """PCAの特徴量を生成
    """
    pca = (PCA(n_components=n_comp, random_state=seed).fit_transform(df[col_list]))
    pca_df = pd.DataFrame(pca, columns=[f'{col_type}-pca_{i}' for i in range(n_comp)])
    df = pd.concat([df, pca_df], axis=1)
    return df


def feature_svd(df, col_list, n_comp, col_type='g', seed=config.seed):
    """SVDの特徴量を生成
    """
    svd = (TruncatedSVD(n_components=n_comp, random_state=seed).fit_transform(df[col_list]))
    svd_df = pd.DataFrame(svd, columns=[f'{col_type}-svd_{i}' for i in range(n_comp)])
    df = pd.concat([df, svd_df], axis=1)
    return df


def feature_fa(df, col_list, n_comp, col_type='g', seed=config.seed):
    """FAの特徴量を生成
    """
    svd = (FactorAnalysis(n_components=n_comp, random_state=seed).fit_transform(df[col_list]))
    svd_df = pd.DataFrame(svd, columns=[f'{col_type}-fa_{i}' for i in range(n_comp)])
    df = pd.concat([df, svd_df], axis=1)
    return df


def feature_squared(df, cols_list):
    """二乗を計算
    """
    for feature in cols_list:
        df.loc[:, f'{feature}_squared'] = df[feature] ** 2
    return df


def variance_threshold(df, n):
    """分散がしきい値以下の特徴量を捨てる
    """
    var_thresh = VarianceThreshold(threshold=n)
    df = pd.DataFrame(var_thresh.fit_transform(df))
    return df


def rankgauss(df, cols, seed=config.seed):
    """RankGauss
    """
    for col in cols:
        transformer = QuantileTransformer(n_quantiles=100, random_state=seed, output_distribution="normal")
        vec_len = len(df[col].values)
        raw_vec = df[col].values.reshape(vec_len, 1)
        transformer.fit(raw_vec)
        df[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
        
    return df


def feature_engineering(train_features, test_features):

    global GENES, CELLS

    # カラムのリストを保持
    GENES = [col for col in train_features.columns if col.startswith('g-')]
    CELLS = [col for col in train_features.columns if col.startswith('c-')]

    # カテゴリカラム
    cat_columns = ['cp_time', 'cp_dose']

    # filter
    if config.do_filter:
        print('do filter')
        train, test = data_filter(train_features, test_features)

    df = pd.concat([train, test])
    df = df.reset_index(drop=True)

    # k-means cluster
    if config.do_kmeans:
        print('do k-means')
        df.loc[:, 'g-cluster'] = kmeans(df[GENES], n_cluster=config.n_gene_kmeans_cluster)
        df.loc[:, 'c-cluster'] = kmeans(df[CELLS], n_cluster=config.n_cell_kmeans_cluster)
        cat_columns = cat_columns + ['g-cluster', 'c-cluster']

    # Stats feature
    if config.do_feature_stats:
        print('do feature_stats')
        df = feature_stats(df)

    # squared
    if config.do_feature_squared:
        print('do feature_squared')
        df = feature_squared(df, CELLS)

    # PCA feature
    if config.do_feature_pca:
        print('do feature_pca')
        df = feature_pca(df, GENES, n_comp=config.n_gene_comp, col_type='g')
        df = feature_pca(df, CELLS, n_comp=config.n_cell_comp, col_type='c')

    # SVD feature
    if config.do_feature_svd:
        print('do feature_svd')
        df = feature_svd(df, GENES, n_comp=config.n_gene_comp, col_type='g')
        df = feature_svd(df, CELLS, n_comp=config.n_cell_comp, col_type='c')

    # FA feature
    if config.do_feature_fa:
        print('do feature_fa')
        df = feature_fa(df, GENES, n_comp=config.n_gene_comp, col_type='g')
        df = feature_fa(df, CELLS, n_comp=config.n_cell_comp, col_type='c')

    # カテゴリのDFとnotカテゴリのDFに分割（標準化&エンコードのため）
    cat_df = df[['sig_id'] + cat_columns]
    num_df = df.drop(['sig_id'] + cat_columns, axis=1)

    # VarianceThreshold
    if config.do_variancethreshold:
        print('do variancethreshold')
        num_df = variance_threshold(num_df, n=config.n_variance_threshold)

    # 正規化
    if config.scaler == 'Rankgauss':
        print('do Rankgauss')
        df = rankgauss(df, num_df.columns.tolist())

    elif config.scaler == 'Standard':
        print('do Standard')
        sscaler = StandardScaler()
        num_df.iloc[:, :] = sscaler.fit_transform(num_df)

    elif config.scaler == 'Robust':
        print('do Robust')
        rscaler = RobustScaler()
        num_df.iloc[:, :] = rscaler.fit_transform(num_df)

    elif config.scaler == 'MinMax':
        print('do MinMax')
        mmscaler = MinMaxScaler()
        num_df.iloc[:, :] = mmscaler.fit_transform(num_df)

    # カテゴリ変数をone-hot-encode
    cat_df = one_hot_encoder(cat_df, cat_columns)

    # カテゴリDFとnotカテゴリDFを結合
    df = pd.concat([cat_df, num_df], axis=1)

    # trainとtestに再分割
    train = df.iloc[:len(train), :]
    test = df.iloc[len(train):, :]
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    return train, test

In [9]:
%%time
train, test = feature_engineering(train_features, test_features)

do filter
do feature_stats
do feature_squared
do feature_pca
do feature_svd
do feature_fa
do Rankgauss
CPU times: user 48.3 s, sys: 4.74 s, total: 53 s
Wall time: 38.5 s


In [10]:
# カラムのリストを取得
target_cols = train_targets_scored.drop('sig_id', axis=1).columns.values.tolist()  # 目的変数のカラムリスト
target_cols_non_scored = train_targets_nonscored.drop('sig_id', axis=1).columns.values.tolist()  # pretrain用の目的変数カラムリスト
feature_cols = [c for c in train.columns if c not in ['sig_id']]  # 学習に使用するカラムリスト

# train用のデータセット生成
train = train.merge(train_targets_scored, on='sig_id')
target = train[train_targets_scored.columns]

# pretrain用のデータセット生成
train_non_scored = train[['sig_id'] + feature_cols].merge(train_targets_nonscored, on='sig_id')
target_non_scored = train_non_scored[train_targets_nonscored.columns]

In [11]:
display(train.shape, train.head(), test.shape, test.head(), target.shape, target.head())

(21948, 1439)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,g-8,g-9,g-10,g-11,g-12,g-13,g-14,g-15,g-16,g-17,g-18,g-19,g-20,g-21,g-22,g-23,g-24,g-25,g-26,g-27,g-28,g-29,g-30,g-31,g-32,g-33,g-34,g-35,g-36,g-37,g-38,g-39,g-40,g-41,g-42,g-43,g-44,g-45,g-46,g-47,g-48,g-49,g-50,g-51,g-52,g-53,g-54,g-55,g-56,g-57,g-58,g-59,g-60,g-61,g-62,g-63,g-64,g-65,g-66,g-67,g-68,g-69,g-70,g-71,g-72,g-73,g-74,g-75,g-76,g-77,g-78,g-79,g-80,g-81,g-82,g-83,g-84,g-85,g-86,g-87,g-88,g-89,g-90,g-91,g-92,g-93,g-94,g-95,g-96,g-97,g-98,g-99,g-100,g-101,g-102,g-103,g-104,g-105,g-106,g-107,g-108,g-109,g-110,g-111,g-112,g-113,g-114,g-115,g-116,g-117,g-118,g-119,g-120,g-121,g-122,g-123,g-124,g-125,g-126,g-127,g-128,g-129,g-130,g-131,g-132,g-133,g-134,g-135,g-136,g-137,g-138,g-139,g-140,g-141,g-142,g-143,...,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,1.0,0.0,0.0,1.0,0.0,1.062,0.558,-0.248,-0.621,-0.194,-1.012,-1.022,-0.033,0.555,-0.092,1.183,0.153,0.557,-0.402,0.179,-0.653,-0.797,0.634,0.178,-0.369,-0.569,-1.136,-1.188,0.694,0.439,0.266,0.191,0.163,-0.285,0.582,0.293,-0.558,-0.092,-0.301,-0.154,0.22,0.296,-0.505,-0.512,-0.216,-0.035,-0.257,-1.198,0.312,-1.033,0.221,-0.114,-0.025,0.035,0.423,-0.122,-0.493,0.612,0.471,-0.035,-0.161,-0.675,0.226,0.579,0.959,0.606,-0.13,0.661,-0.314,-0.202,0.025,0.167,-0.419,0.655,-0.74,-0.705,-0.537,0.031,-0.524,-0.341,-0.633,-1.033,-0.376,-0.855,0.735,-0.223,-0.196,-0.001,-2.144,1.732,0.019,-0.119,0.405,-1.584,-0.301,0.205,-1.123,-1.559,-1.798,0.702,-1.039,0.096,0.453,0.545,-0.083,-0.536,0.552,-0.777,-0.804,-0.118,-1.256,0.159,-0.7,-1.747,0.712,-0.763,-0.029,1.267,0.473,-0.99,0.439,-0.119,-0.035,0.468,1.975,0.054,1.288,1.226,0.049,-0.862,-1.277,-0.181,-0.26,-0.513,-0.023,-0.226,-0.735,0.312,-0.877,0.354,0.283,-0.35,-0.437,-0.465,-0.494,0.53,1.242,-1.187,0.438,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0.0,0.0,1.0,1.0,0.0,0.074,0.409,0.299,0.06,1.019,0.521,0.234,0.337,-0.405,0.851,-1.152,-0.42,-0.096,0.459,0.08,0.225,0.529,0.284,-0.349,0.288,0.945,-0.165,-0.266,-0.337,0.314,-0.432,0.477,0.207,-0.422,-0.116,-0.05,-0.263,0.996,-0.248,0.266,-0.21,0.166,0.53,-0.257,-0.045,0.119,-0.396,-1.173,0.451,1.925,1.101,0.265,-0.361,0.171,-0.89,1.273,-0.504,-0.228,-0.734,-0.428,-0.123,-0.633,0.476,0.325,-0.741,-0.581,1.784,-0.109,1.035,0.409,-0.695,-0.803,0.095,-0.405,0.12,0.196,0.072,-0.246,-0.108,0.401,-0.586,0.893,0.284,1.187,0.731,0.773,-1.592,-1.35,0.005,-0.651,-0.398,0.698,0.138,0.256,0.256,0.056,-0.385,-0.653,-0.174,-0.019,-0.491,-1.382,0.43,-0.397,0.135,0.713,-0.072,-0.86,-0.386,-0.095,-0.593,-0.163,-0.236,0.232,0.049,0.73,0.027,-0.148,0.27,0.884,-0.294,-0.481,0.027,-1.214,1.763,0.786,0.023,0.015,0.08,-0.807,1.134,-0.254,-0.164,-0.653,1.148,0.217,-0.049,0.081,-0.806,-0.306,-0.408,0.504,0.747,-0.042,0.955,-0.478,-1.391,0.19,-0.469,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0.0,1.0,0.0,1.0,0.0,0.628,0.582,1.554,-0.076,-0.032,1.239,0.172,0.215,0.006,1.23,-0.48,-0.563,-0.037,-1.83,0.606,-0.328,0.604,-0.307,-0.115,-0.057,-0.08,-0.818,-1.532,0.231,0.49,0.478,-1.397,4.624,-0.044,1.287,-1.853,0.607,0.429,0.178,0.002,-1.18,0.126,-0.122,5.447,1.031,0.348,-0.556,0.036,-0.364,-0.465,-0.835,0.111,-0.761,-0.498,0.455,-0.098,-0.266,0.606,0.269,-0.619,-0.444,1.254,0.062,-0.176,-1.437,0.555,0.835,-0.333,-0.293,-0.164,-1.128,-0.029,0.094,0.592,0.681,0.782,0.68,0.963,0.406,-0.071,-1.357,0.231,-0.413,-0.141,0.308,-0.427,-0.807,1.005,-0.068,-0.062,-0.156,0.408,0.716,-0.204,-0.925,0.854,0.429,0.34,1.687,-0.628,0.255,0.682,0.096,-0.306,0.734,-0.765,-0.315,0.685,0.733,0.0,-0.446,-0.96,-0.26,-0.744,0.139,-0.12,-0.008,-0.899,-0.163,-0.383,-0.124,-0.745,-0.445,-0.247,-0.018,0.255,-0.963,1.066,0.181,0.241,0.918,-0.049,-0.389,-0.284,0.305,-0.175,-0.346,-0.619,-0.689,0.653,-0.03,-0.075,-0.009,-0.406,-0.774,-1.126,-0.812,0.393,1.806,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0.0,1.0,0.0,1.0,0.0,-0.514,-0.249,-0.266,0.529,4.062,-0.809,-1.959,0.179,-0.132,-1.06,-0.827,-0.358,-0.851,-0.584,-2.569,0.818,-0.053,-0.855,0.116,-2.352,2.12,-1.158,-0.719,-0.8,-1.467,-0.011,-0.899,0.241,-0.248,-1.089,-0.757,0.088,-2.737,0.875,0.579,-1.674,-1.672,-1.269,3.09,-0.381,-0.723,-0.001,0.135,-1.64,-0.748,-1.082,-0.152,0.075,-0.485,0.664,-2.823,-0.604,-1.355,2.02,-0.53,-1.324,-0.594,0.433,0.09,-1.524,0.394,1.463,-1.21,-1.234,-1.03,0.798,0.231,0.119,1.537,0.387,-0.404,0.977,-0.916,1.272,1.367,-3.321,-1.051,-0.707,1.038,-0.73,-0.98,0.134,0.326,0.564,0.896,1.028,0.051,-0.474,1.164,0.276,1.631,4.916,2.369,0.683,-1.481,1.431,0.169,-1.722,0.089,-0.812,1.039,-0.085,2.057,-0.797,0.0,1.672,-0.162,-0.947,-0.494,-0.265,-0.703,-0.358,-0.136,-2.053,0.281,-0.016,-0.537,-1.897,0.064,0.233,-0.721,-1.396,0.649,-0.258,1.416,-1.542,0.455,0.23,0.858,0.213,0.097,3.085,0.02,-0.392,-1.291,0.284,-0.52,-0.028,1.014,1.13,-0.383,-0.277,0.599,-0.515,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0.0,0.0,1.0,0.0,1.0,-0.325,-0.401,0.97,0.692,1.418,-0.824,-0.28,-0.15,-0.879,0.863,-0.222,-0.512,-0.958,1.175,0.204,0.197,0.124,-1.709,-0.354,-0.516,-0.333,-0.269,0.765,0.206,1.372,0.683,0.806,-0.375,-1.209,0.296,-0.071,0.639,0.667,-0.078,1.174,-0.711,-1.447,1.062,0.789,-0.085,0.13,0.122,0.958,0.213,0.616,1.157,1.114,-0.942,-0.378,-0.815,-0.08,-0.382,0.688,0.0,0.178,1.132,-0.118,1.213,0.771,0.674,0.787,1.641,0.181,1.348,1.107,0.539,0.0,0.126,-0.156,-0.544,0.859,0.58,0.935,-0.751,-1.116,-0.787,0.168,-0.309,0.663,-1.7,0.425,-1.17,-0.874,0.617,-1.216,-0.149,-2.252,0.682,1.48,0.907,-0.798,0.802,1.299,1.069,-0.152,-0.8,1.194,-0.233,0.234,-0.438,0.097,-1.188,-1.303,0.619,0.0,1.179,-0.852,-0.28,0.469,-0.536,-0.225,-2.351,-0.918,0.3,0.86,-2.335,-0.652,0.048,-1.678,1.189,-3.848,0.735,-1.069,-1.395,-0.629,1.197,-1.388,1.203,0.66,0.624,1.533,2.039,0.453,-1.421,-0.187,-1.704,0.915,0.377,0.913,0.502,0.085,-2.428,-0.86,-0.382,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(3624, 1233)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,g-8,g-9,g-10,g-11,g-12,g-13,g-14,g-15,g-16,g-17,g-18,g-19,g-20,g-21,g-22,g-23,g-24,g-25,g-26,g-27,g-28,g-29,g-30,g-31,g-32,g-33,g-34,g-35,g-36,g-37,g-38,g-39,g-40,g-41,g-42,g-43,g-44,g-45,g-46,g-47,g-48,g-49,g-50,g-51,g-52,g-53,g-54,g-55,g-56,g-57,g-58,g-59,g-60,g-61,g-62,g-63,g-64,g-65,g-66,g-67,g-68,g-69,g-70,g-71,g-72,g-73,g-74,g-75,g-76,g-77,g-78,g-79,g-80,g-81,g-82,g-83,g-84,g-85,g-86,g-87,g-88,g-89,g-90,g-91,g-92,g-93,g-94,g-95,g-96,g-97,g-98,g-99,g-100,g-101,g-102,g-103,g-104,g-105,g-106,g-107,g-108,g-109,g-110,g-111,g-112,g-113,g-114,g-115,g-116,g-117,g-118,g-119,g-120,g-121,g-122,g-123,g-124,g-125,g-126,g-127,g-128,g-129,g-130,g-131,g-132,g-133,g-134,g-135,g-136,g-137,g-138,g-139,g-140,g-141,g-142,g-143,...,g-svd_10,g-svd_11,g-svd_12,g-svd_13,g-svd_14,g-svd_15,g-svd_16,g-svd_17,g-svd_18,g-svd_19,g-svd_20,g-svd_21,g-svd_22,g-svd_23,g-svd_24,g-svd_25,g-svd_26,g-svd_27,g-svd_28,g-svd_29,g-svd_30,g-svd_31,g-svd_32,g-svd_33,g-svd_34,g-svd_35,g-svd_36,g-svd_37,g-svd_38,g-svd_39,g-svd_40,g-svd_41,g-svd_42,g-svd_43,g-svd_44,g-svd_45,g-svd_46,g-svd_47,g-svd_48,g-svd_49,g-svd_50,g-svd_51,g-svd_52,g-svd_53,g-svd_54,g-svd_55,g-svd_56,g-svd_57,g-svd_58,g-svd_59,g-svd_60,g-svd_61,g-svd_62,g-svd_63,g-svd_64,g-svd_65,g-svd_66,g-svd_67,g-svd_68,g-svd_69,c-svd_0,c-svd_1,c-svd_2,c-svd_3,c-svd_4,c-svd_5,c-svd_6,c-svd_7,c-svd_8,c-svd_9,g-fa_0,g-fa_1,g-fa_2,g-fa_3,g-fa_4,g-fa_5,g-fa_6,g-fa_7,g-fa_8,g-fa_9,g-fa_10,g-fa_11,g-fa_12,g-fa_13,g-fa_14,g-fa_15,g-fa_16,g-fa_17,g-fa_18,g-fa_19,g-fa_20,g-fa_21,g-fa_22,g-fa_23,g-fa_24,g-fa_25,g-fa_26,g-fa_27,g-fa_28,g-fa_29,g-fa_30,g-fa_31,g-fa_32,g-fa_33,g-fa_34,g-fa_35,g-fa_36,g-fa_37,g-fa_38,g-fa_39,g-fa_40,g-fa_41,g-fa_42,g-fa_43,g-fa_44,g-fa_45,g-fa_46,g-fa_47,g-fa_48,g-fa_49,g-fa_50,g-fa_51,g-fa_52,g-fa_53,g-fa_54,g-fa_55,g-fa_56,g-fa_57,g-fa_58,g-fa_59,g-fa_60,g-fa_61,g-fa_62,g-fa_63,g-fa_64,g-fa_65,g-fa_66,g-fa_67,g-fa_68,g-fa_69,c-fa_0,c-fa_1,c-fa_2,c-fa_3,c-fa_4,c-fa_5,c-fa_6,c-fa_7,c-fa_8,c-fa_9
0,id_0004d9e33,1.0,0.0,0.0,1.0,0.0,-0.546,0.131,-0.513,0.441,1.55,-0.164,-0.214,0.222,-0.326,1.939,-0.231,-0.367,1.304,1.461,0.004,0.682,-0.23,-0.064,-0.203,-0.682,-0.624,0.13,-0.034,0.337,0.225,0.479,0.764,0.664,-0.248,-0.118,-0.485,-0.018,-0.82,-0.53,-1.507,-0.014,0.445,0.575,-0.299,-0.953,-0.144,-1.088,0.208,0.567,1.119,-0.045,0.279,-1.023,-0.623,-0.668,-0.072,-1.524,0.912,0.688,-0.107,-0.113,0.454,5.514,0.327,0.139,-0.232,1.517,0.055,0.904,-0.309,1.536,0.151,-0.371,-0.019,-0.499,1.208,-0.361,0.798,0.308,0.596,0.401,0.355,-0.042,0.316,0.066,0.22,-0.59,-0.428,0.148,-0.879,0.193,0.686,-0.205,-0.032,-0.458,-0.068,-0.526,0.105,1.432,-0.214,0.801,-0.189,-0.288,-0.182,-0.264,-0.218,0.271,0.049,-0.609,0.199,-0.349,-0.566,-0.057,-0.637,0.282,0.613,-0.523,-0.113,-1.326,0.375,-0.026,-0.448,-0.673,-0.185,-0.078,0.076,-0.859,-0.164,-0.682,-0.556,0.493,0.036,0.27,-0.273,-0.2,0.115,-0.704,-0.12,1.137,-0.024,-0.172,0.67,0.104,-0.276,0.123,-0.058,-1.439,0.184,-0.622,...,0.03,2.401,1.681,-1.312,-1.979,-0.511,-2.021,-1.442,-0.35,1.093,1.116,-2.876,-1.578,1.407,-2.171,-1.384,2.015,-0.087,-1.092,1.617,0.077,-0.278,-0.707,1.278,-0.595,0.174,0.808,0.6,-1.913,0.371,0.505,-0.109,-2.118,0.4,-0.102,0.675,0.338,-0.734,1.01,-0.527,1.811,0.562,-1.114,0.455,-0.552,0.903,1.788,-1.714,0.187,0.579,0.535,0.748,0.656,-0.184,0.981,1.027,-0.358,-1.43,1.224,-0.147,1.108,-0.96,-0.913,-0.409,-0.281,0.558,-1.223,0.17,-0.739,-0.198,-0.299,0.36,0.182,-0.384,-0.247,-0.283,0.187,0.038,0.51,0.444,-0.266,0.376,-0.504,0.149,0.352,-0.158,-0.646,0.064,-0.402,0.254,1.588,1.044,-0.176,-0.569,-0.745,0.23,-0.181,0.516,0.438,-0.291,-0.236,-0.415,0.755,0.301,-0.331,0.016,0.393,0.013,-0.264,0.444,0.193,0.043,-0.785,0.83,0.857,0.121,0.401,1.069,-0.181,0.593,-0.102,-1.296,-0.234,-0.453,-0.137,-0.719,0.779,0.692,0.405,0.494,-0.985,0.268,-0.082,0.185,-0.399,0.062,-0.192,-0.513,-0.665,-0.355,-0.21,-0.537,-0.605,-0.235,0.081,-0.852,0.402,-0.414,-0.422,0.041
1,id_001897cda,0.0,0.0,1.0,1.0,0.0,-0.183,0.232,1.208,-0.452,-0.365,-0.332,-1.882,0.402,-0.353,0.127,0.93,0.317,-1.012,-0.321,0.061,-0.539,-0.803,-1.06,-0.098,-0.816,-0.651,0.681,0.525,0.0,0.503,-0.15,-0.143,2.091,-0.656,-0.601,-0.41,-0.058,-0.361,0.22,-0.71,1.343,-0.961,-1.192,0.373,0.022,-0.179,-0.428,1.211,0.14,-1.094,0.775,0.0,2.369,-0.145,-1.08,-1.116,-0.121,0.067,-0.865,-1.015,0.538,1.306,0.287,-0.814,0.405,1.276,-0.646,0.54,-0.176,-0.027,0.71,0.851,0.987,0.791,-0.32,0.869,-1.29,-0.8,0.629,-1.23,-0.215,-1.302,-0.783,-0.716,-0.313,-0.894,1.054,0.603,0.643,0.997,1.518,-0.587,0.408,-0.635,-0.21,-0.022,-0.197,-0.156,0.395,0.748,-0.722,-1.725,0.6,-0.829,-0.107,0.738,0.968,-0.082,0.338,0.0,1.01,-0.867,-0.715,0.419,0.204,-0.8,-0.72,0.128,0.471,-0.929,0.3,0.688,1.337,1.312,1.383,0.263,0.182,-0.204,0.191,-0.458,0.552,-0.803,0.191,0.406,0.859,-0.296,0.406,0.0,0.488,-0.22,0.428,-0.609,0.297,0.004,-0.851,0.818,0.114,-1.346,0.243,...,4.751,0.476,0.22,-1.68,-2.409,-1.035,-0.737,-0.591,-1.464,0.265,-0.852,3.071,-0.073,-1.754,-2.791,0.164,3.392,-0.513,0.558,-1.761,-1.001,-0.12,0.245,1.049,-2.936,0.212,-0.538,-0.438,-0.404,0.454,0.032,2.077,-0.559,0.8,0.232,-1.395,-0.729,0.076,0.339,1.596,0.572,1.251,0.037,-0.78,-0.375,-0.416,0.868,-0.415,0.949,0.481,-0.076,0.758,-0.092,-0.681,1.231,1.487,-0.552,-0.137,-1.207,2.025,5.763,0.295,1.21,0.234,-0.327,-0.316,-0.377,-1.853,-0.984,1.155,-0.222,-0.478,-0.584,1.136,-0.384,-1.38,-1.532,0.63,0.044,0.52,1.049,-0.435,-0.464,0.289,0.246,-0.09,0.23,-0.49,0.72,-0.338,-0.824,0.429,-0.52,-0.785,-1.573,-0.041,-0.441,0.013,-0.836,1.045,-0.79,-0.262,0.797,-0.51,-0.102,0.084,0.443,0.792,-0.287,-1.19,0.6,0.003,-0.038,-0.588,0.569,0.486,0.134,0.722,-0.394,0.748,0.15,0.159,0.51,0.431,0.974,-0.24,0.228,-0.013,0.753,-0.163,0.176,0.963,-0.559,-0.971,0.174,0.017,0.098,-0.608,-0.005,-0.418,0.049,0.072,0.508,0.081,-0.257,-0.387,0.246,0.765,0.603,1.322
2,id_00276f245,1.0,0.0,0.0,0.0,1.0,0.483,0.196,0.383,0.424,-0.586,-1.202,0.6,-0.18,0.936,0.294,1.399,-1.108,1.161,-0.813,0.074,-0.31,-0.996,1.71,1.22,-0.126,-0.59,-0.796,-0.676,0.0,-0.882,0.761,-0.088,0.522,0.959,-0.476,0.269,0.475,0.02,0.278,-0.714,0.708,0.401,-0.018,0.59,-0.13,-0.102,-0.048,-0.763,-0.4,0.25,-1.945,0.388,0.11,0.362,-0.309,0.647,0.049,-0.347,0.476,-0.344,-0.243,1.041,0.349,0.93,-0.371,-0.406,-0.507,0.932,-0.23,-0.6,-1.758,-0.731,1.081,-0.977,-0.406,-0.039,0.286,-0.345,0.025,0.102,0.628,-0.597,-0.624,-0.477,0.797,-0.701,-0.234,0.397,0.034,0.949,0.762,-0.773,0.196,0.082,-1.121,-0.123,0.137,-0.279,-0.047,0.733,-0.814,0.089,-0.074,-0.084,0.272,-0.238,0.638,-0.404,-0.94,0.251,-0.889,0.8,-1.228,-0.069,0.43,1.122,0.366,1.601,0.15,0.428,0.534,0.059,-0.316,0.623,0.178,1.139,0.124,-0.248,-0.225,-0.304,-0.39,1.849,-0.93,-0.533,-0.243,4.207,-1.357,-0.775,0.304,1.047,0.351,-0.307,2.31,0.545,-1.149,1.062,0.979,0.002,0.022,...,1.402,1.091,-0.711,-0.35,-0.135,-1.903,-2.277,-0.951,0.869,-4.322,-1.066,-2.63,1.275,0.555,-1.753,2.222,-2.087,-1.834,1.471,-2.364,-2.549,1.342,-1.474,-0.952,0.164,1.108,0.006,0.703,-0.451,0.57,-2.159,-1.026,-1.131,0.925,0.675,-0.447,-0.998,1.994,-0.794,-0.795,-1.658,-1.53,-0.092,0.192,0.234,-1.073,1.179,2.107,-0.03,-0.325,-2.338,-0.917,-0.928,2.283,3.92,3.951,-1.303,0.106,-0.799,-0.317,1.89,-0.058,-1.108,-0.014,0.71,-0.143,0.171,0.701,0.976,0.554,-0.198,-0.075,-0.913,1.02,-0.205,0.626,-0.187,-0.537,-0.863,-0.496,0.705,-0.145,-0.446,-0.364,-0.032,-0.279,-0.361,0.147,-1.377,-0.554,0.358,-0.243,0.473,-0.013,0.377,-1.456,-0.026,0.049,-1.961,-0.375,-1.385,0.984,-0.003,0.941,0.649,-0.204,0.063,-1.357,-0.123,-0.144,0.041,0.088,-0.265,-0.493,-0.247,-0.336,-0.234,-0.667,0.563,-0.325,0.307,0.249,0.198,-0.925,0.09,0.596,0.675,0.336,0.358,0.443,-1.046,-0.01,-0.842,-0.71,-1.205,0.371,0.216,0.114,0.493,-0.413,-0.171,-0.15,-0.656,-0.042,0.284,0.58,-0.223,-0.646,0.406,-0.365
3,id_0027f1083,0.0,1.0,0.0,1.0,0.0,-0.398,-1.268,1.913,0.206,-0.586,-0.017,0.513,0.636,0.261,-1.112,-0.175,-0.205,-0.001,0.025,0.178,-0.131,1.07,0.048,-0.076,0.129,0.14,0.761,-0.11,0.0,-1.095,-0.078,0.905,-0.301,0.435,-0.156,0.21,0.485,0.114,0.674,-0.91,-0.669,1.632,-0.091,-0.284,-0.088,1.757,2.005,-0.824,0.323,0.238,-1.411,-0.138,0.063,0.22,-0.572,0.192,-0.386,0.156,0.19,0.498,0.742,1.111,0.492,-0.023,0.206,-0.725,0.595,0.311,0.692,1.025,-0.828,-0.551,-0.489,0.113,1.605,0.31,-0.011,1.706,-0.827,0.598,0.415,1.058,-0.332,0.459,-0.496,0.623,-0.388,-0.073,-0.738,-0.694,-1.42,-0.376,0.304,0.04,-0.217,-0.251,0.339,-0.405,0.167,-0.073,-0.232,0.469,0.322,0.628,0.313,-0.396,0.851,-0.507,-0.448,0.159,-0.251,0.415,0.699,0.543,0.384,0.965,0.391,-0.316,0.049,0.525,-0.327,-0.008,-0.234,-0.128,1.063,0.366,0.692,-0.213,0.107,-0.124,0.367,0.184,0.122,-0.733,-0.408,0.296,-0.498,0.0,-0.136,0.993,-0.574,0.824,0.886,-0.872,0.542,-0.157,-0.442,0.989,1.375,...,2.536,2.711,1.62,0.289,-0.216,0.602,0.733,-1.143,0.71,-0.207,-0.415,0.836,-0.201,-2.517,0.713,-1.106,-0.302,1.21,0.85,-0.14,-0.338,-0.932,-0.192,-0.107,-0.503,-0.046,0.634,1.076,-0.236,0.406,-0.512,0.056,1.326,-1.458,0.438,-1.603,0.579,1.011,-0.203,1.474,0.164,-0.041,1.016,0.764,-0.52,-1.775,0.55,1.668,-2.392,-0.597,-0.543,-1.853,1.074,-1.974,-0.176,1.002,0.833,1.161,1.179,-0.098,-5.079,1.393,-0.242,-0.659,-0.214,-1.488,0.503,-0.54,-0.317,0.57,-0.467,0.24,0.869,0.248,0.145,0.706,0.428,-0.748,-0.229,0.686,0.699,0.553,0.656,0.387,0.154,0.191,-0.156,0.211,0.275,0.018,-0.474,-0.684,-0.711,0.06,0.107,-0.092,-0.096,-0.458,-0.081,0.034,-0.135,-0.191,-0.352,0.674,0.132,0.446,-0.159,0.783,0.684,-0.403,-0.095,0.212,-0.673,-0.277,0.03,0.937,-0.304,-0.486,1.306,0.532,0.605,-0.935,0.052,0.479,-1.118,0.918,-0.926,0.302,-0.21,0.091,-0.632,-0.376,-0.834,-0.383,0.275,0.522,-0.579,0.343,-0.26,0.308,-0.553,0.555,-0.304,-0.459,-0.607,0.755,0.084,0.427,0.285,0.317
4,id_006fc47b8,0.0,1.0,0.0,0.0,1.0,0.366,0.554,-0.69,-1.627,0.524,-0.383,-0.465,1.007,0.373,0.081,-0.286,-0.718,-0.171,-0.626,-2.01,0.02,-0.924,0.677,-0.447,-1.239,-0.697,0.826,-0.422,0.423,0.577,-0.555,-0.333,-0.499,-0.326,-0.882,0.16,-0.126,0.229,-0.067,-0.973,0.824,-0.606,-1.002,-0.781,-1.012,-0.051,-0.422,1.028,1.531,-0.761,0.341,-0.245,-0.71,0.01,-0.135,-0.263,0.131,-0.008,-0.782,-0.59,0.945,1.12,-0.113,0.274,1.098,-0.825,0.687,1.083,0.137,0.76,-0.257,-0.379,-0.985,-0.154,-1.041,-0.046,-1.952,-0.396,0.074,-1.052,0.207,-0.93,0.618,-1.011,0.563,-0.167,0.074,1.402,-1.158,-0.435,0.071,0.524,-0.182,-0.126,-1.121,1.063,-2.567,0.092,-1.284,-0.727,0.149,-0.95,0.119,1.257,-0.708,0.709,1.289,0.377,-0.73,0.333,-0.334,-0.241,-1.331,-0.065,1.189,0.352,0.254,0.732,-0.831,-0.257,-0.216,0.479,0.189,-0.223,0.559,0.588,1.063,-1.045,0.176,-0.937,0.301,-0.511,-0.103,-0.439,0.929,-1.208,0.727,-0.902,-1.419,-1.25,-0.728,-0.638,2.008,-0.7,-1.836,1.203,-0.587,-1.184,-0.404,...,2.367,3.264,-0.951,-2.015,-1.445,-1.204,-2.52,-0.716,0.924,-2.365,1.747,-0.372,-0.854,0.624,1.525,-3.371,-0.468,-1.021,-0.48,-2.266,0.049,-3.145,0.895,-1.387,1.578,-1.638,-0.654,1.311,1.394,0.964,-1.612,1.007,1.173,-0.078,0.525,-0.886,0.572,-1.167,0.612,0.451,0.869,-0.022,0.043,0.363,-0.777,-2.256,1.018,0.932,1.034,0.738,-0.697,-0.742,0.81,2.428,0.473,-0.392,-0.219,1.326,0.188,0.008,6.983,-0.03,0.69,0.212,-0.271,0.32,1.197,0.272,1.338,-1.563,-0.32,0.129,0.479,2.06,-0.263,-1.701,-0.426,0.455,-0.732,0.004,0.9,0.484,-0.829,-0.221,-0.103,-0.511,-0.461,0.17,-0.494,0.566,-0.274,0.448,0.117,1.119,0.665,0.584,-0.049,0.406,-0.765,1.135,0.447,-0.759,-0.409,0.471,-0.214,0.253,-0.591,-0.129,-0.584,-0.422,-1.539,0.868,0.406,0.909,-0.476,0.652,1.046,-0.062,0.514,-0.259,0.923,-0.627,1.124,0.037,-0.976,0.68,-0.275,0.642,0.252,0.828,0.641,0.014,-0.507,-0.411,-0.578,0.584,-0.055,-0.851,-0.124,0.717,0.125,-0.094,0.375,0.058,-0.44,0.254,-0.599,-0.048,-0.421,-1.402


(21948, 207)

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
display(train_non_scored.shape, train_non_scored.head(), target_non_scored.shape, target_non_scored.head())

(21948, 1635)

Unnamed: 0,sig_id,cp_time_ohe_0,cp_time_ohe_1,cp_time_ohe_2,cp_dose_ohe_0,cp_dose_ohe_1,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,g-8,g-9,g-10,g-11,g-12,g-13,g-14,g-15,g-16,g-17,g-18,g-19,g-20,g-21,g-22,g-23,g-24,g-25,g-26,g-27,g-28,g-29,g-30,g-31,g-32,g-33,g-34,g-35,g-36,g-37,g-38,g-39,g-40,g-41,g-42,g-43,g-44,g-45,g-46,g-47,g-48,g-49,g-50,g-51,g-52,g-53,g-54,g-55,g-56,g-57,g-58,g-59,g-60,g-61,g-62,g-63,g-64,g-65,g-66,g-67,g-68,g-69,g-70,g-71,g-72,g-73,g-74,g-75,g-76,g-77,g-78,g-79,g-80,g-81,g-82,g-83,g-84,g-85,g-86,g-87,g-88,g-89,g-90,g-91,g-92,g-93,g-94,g-95,g-96,g-97,g-98,g-99,g-100,g-101,g-102,g-103,g-104,g-105,g-106,g-107,g-108,g-109,g-110,g-111,g-112,g-113,g-114,g-115,g-116,g-117,g-118,g-119,g-120,g-121,g-122,g-123,g-124,g-125,g-126,g-127,g-128,g-129,g-130,g-131,g-132,g-133,g-134,g-135,g-136,g-137,g-138,g-139,g-140,g-141,g-142,g-143,...,neurotransmitter,neurotrophic_agent,nfkb_activator,niemann-pick_c1-like_1_protein_antagonist,nitric_oxide_scavenger,nitric_oxide_stimulant,nociceptin_orphanin_fq_(nop)_receptor_antagonist,non-nucleoside_reverse_transcriptase_inhibitor,nootropic_agent,nop_receptor_agonist,noradrenaline_uptake_inhibitor,norepinephrine_inhibitor,notch_signaling_inhibitor,ntpdase_inhibitor,nucleoside_reverse_transcriptase_inhibitor,oct_activator,omega_3_fatty_acid_stimulant,osteoclast_inhibitor,oxidizing_agent,oxidosqualene_cyclase_inhibitor,oxytocin_receptor_agonist,oxytocin_receptor_antagonist,p21_activated_kinase_inhibitor,p53_activator,p53_inhibitor,paba_antagonist,pdk1_inhibitor,penicillin_binding_protein_inhibitor,peptidase_inhibitor,perk_inhibitor,phosphatase_inhibitor,phosphofructokinase_inhibitor,phospholipase_activator,pim_inhibitor,pka_activator,pka_inhibitor,plasminogen_activator_inhibitor,platelet_activating_factor_receptor_antagonist,platelet_aggregation_inhibitor,plk_inhibitor,porcupine_inhibitor,potassium_channel_agonist,potassium_channel_blocker,prmt_inhibitor,progestogen_hormone,prolactin_inhibitor,prostacyclin_analog,prostanoid_receptor_agonist,prostanoid_receptor_inhibitor,protease_inhibitor,protein_kinase_activator,protein_synthesis_stimulant,psychoactive_drug,purine_antagonist,purinergic_receptor_antagonist,pxr_ligand,pyruvate_dehydrogenase_inhibitor,pyruvate_kinase_isozyme_activator,quorum_sensing_signaling_modulator,rad51_inhibitor,rage_receptor_antagonist,receptor_tyrosine_protein_kinase_inhibitor,reducing_agent,ret_inhibitor,ret_tyrosine_kinase_inhibitor,reverse_transcriptase_inhibitor,ribosomal_protein_inhibitor,ripk_inhibitor,rna_synthesis_inhibitor,ror_inverse_agonist,rsv_fusion_inhibitor,s100a9_inhibitor,sars_coronavirus_3c-like_protease_inhibitor,sedative,selective_estrogen_receptor_modulator_(serm),selective_serotonin_reuptake_inhibitor_(ssri),serine_protease_inhibitor,serine_threonine_kinase_inhibitor,serine_threonine_protein_phosphatase_activator,serotonin_release_inhibitor,sirt_activator,sirt_inhibitor,smoothened_receptor_agonist,sodium_calcium_exchange_inhibitor,sodium_channel_activator,sodium_channel_blocker,somatostatin_receptor_agonist,sphingosine_1_phosphate_receptor_agonist,sphingosine_kinase_inhibitor,src_activator,srebp_inhibitor,stat_inhibitor,stearoyl-coa_desaturase_inhibitor,steroid_sulfatase_inhibitor,steroidal_progestin,sterol_demethylase_inhibitor,sterol_regulatory_element_binding_protein_(srebp)_inhibitor,steryl_sulfatase_inhibitor,structural_glycoprotein_antagonist,succinimide_antiepileptic,sulfonylurea,synthetic_estrogen,t_cell_inhibitor,tankyrase_inhibitor,telomerase_inhibitor,testosterone_receptor_antagonist,thiazide_diuretic,thioredoxin_inhibitor,thrombopoietin_receptor_agonist,thromboxane_receptor_antagonist,thromboxane_synthase_inhibitor,thyroid_hormone_inhibitor,thyroid_hormone_stimulant,thyrotropin_releasing_hormone_receptor_agonist,tie_inhibitor,tissue_transglutaminase_inhibitor,topical_anesthetic,topical_sunscreen_agent,trace_amine_associated_receptor_agonist,trace_amine_associated_receptor_antagonist,trail_modulator,transient_receptor_potential_channel_agonist,triacylglycerol_lipase_inhibitor,tricyclic_antidepressant,tryptophan_hydroxylase_inhibitor,tyrosinase_inhibitor,tyrosine_hydroxylase_inhibitor,tyrosine_phosphatase_inhibitor,ubiquitin-conjugating_enzyme_inhibitor,ubiquitin_ligase_inhibitor,urease_inhibitor,uric_acid_diuretic,uricase_inhibitor,uricosuric,urotensin_receptor_agonist,urotensin_receptor_antagonist,vasoconstrictor,vasodilator,vasopressin_receptor_agonist,vasopressin_receptor_antagonist,ve-cadherin_antagonist,vesicular_monoamine_transporter_inhibitor,vitamin_k_antagonist,voltage-gated_calcium_channel_ligand,voltage-gated_potassium_channel_activator,voltage-gated_sodium_channel_blocker,wdr5_mll_interaction_inhibitor,wnt_agonist,xanthine_oxidase_inhibitor,xiap_inhibitor
0,id_000644bb2,1.0,0.0,0.0,1.0,0.0,1.062,0.558,-0.248,-0.621,-0.194,-1.012,-1.022,-0.033,0.555,-0.092,1.183,0.153,0.557,-0.402,0.179,-0.653,-0.797,0.634,0.178,-0.369,-0.569,-1.136,-1.188,0.694,0.439,0.266,0.191,0.163,-0.285,0.582,0.293,-0.558,-0.092,-0.301,-0.154,0.22,0.296,-0.505,-0.512,-0.216,-0.035,-0.257,-1.198,0.312,-1.033,0.221,-0.114,-0.025,0.035,0.423,-0.122,-0.493,0.612,0.471,-0.035,-0.161,-0.675,0.226,0.579,0.959,0.606,-0.13,0.661,-0.314,-0.202,0.025,0.167,-0.419,0.655,-0.74,-0.705,-0.537,0.031,-0.524,-0.341,-0.633,-1.033,-0.376,-0.855,0.735,-0.223,-0.196,-0.001,-2.144,1.732,0.019,-0.119,0.405,-1.584,-0.301,0.205,-1.123,-1.559,-1.798,0.702,-1.039,0.096,0.453,0.545,-0.083,-0.536,0.552,-0.777,-0.804,-0.118,-1.256,0.159,-0.7,-1.747,0.712,-0.763,-0.029,1.267,0.473,-0.99,0.439,-0.119,-0.035,0.468,1.975,0.054,1.288,1.226,0.049,-0.862,-1.277,-0.181,-0.26,-0.513,-0.023,-0.226,-0.735,0.312,-0.877,0.354,0.283,-0.35,-0.437,-0.465,-0.494,0.53,1.242,-1.187,0.438,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0.0,0.0,1.0,1.0,0.0,0.074,0.409,0.299,0.06,1.019,0.521,0.234,0.337,-0.405,0.851,-1.152,-0.42,-0.096,0.459,0.08,0.225,0.529,0.284,-0.349,0.288,0.945,-0.165,-0.266,-0.337,0.314,-0.432,0.477,0.207,-0.422,-0.116,-0.05,-0.263,0.996,-0.248,0.266,-0.21,0.166,0.53,-0.257,-0.045,0.119,-0.396,-1.173,0.451,1.925,1.101,0.265,-0.361,0.171,-0.89,1.273,-0.504,-0.228,-0.734,-0.428,-0.123,-0.633,0.476,0.325,-0.741,-0.581,1.784,-0.109,1.035,0.409,-0.695,-0.803,0.095,-0.405,0.12,0.196,0.072,-0.246,-0.108,0.401,-0.586,0.893,0.284,1.187,0.731,0.773,-1.592,-1.35,0.005,-0.651,-0.398,0.698,0.138,0.256,0.256,0.056,-0.385,-0.653,-0.174,-0.019,-0.491,-1.382,0.43,-0.397,0.135,0.713,-0.072,-0.86,-0.386,-0.095,-0.593,-0.163,-0.236,0.232,0.049,0.73,0.027,-0.148,0.27,0.884,-0.294,-0.481,0.027,-1.214,1.763,0.786,0.023,0.015,0.08,-0.807,1.134,-0.254,-0.164,-0.653,1.148,0.217,-0.049,0.081,-0.806,-0.306,-0.408,0.504,0.747,-0.042,0.955,-0.478,-1.391,0.19,-0.469,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0.0,1.0,0.0,1.0,0.0,0.628,0.582,1.554,-0.076,-0.032,1.239,0.172,0.215,0.006,1.23,-0.48,-0.563,-0.037,-1.83,0.606,-0.328,0.604,-0.307,-0.115,-0.057,-0.08,-0.818,-1.532,0.231,0.49,0.478,-1.397,4.624,-0.044,1.287,-1.853,0.607,0.429,0.178,0.002,-1.18,0.126,-0.122,5.447,1.031,0.348,-0.556,0.036,-0.364,-0.465,-0.835,0.111,-0.761,-0.498,0.455,-0.098,-0.266,0.606,0.269,-0.619,-0.444,1.254,0.062,-0.176,-1.437,0.555,0.835,-0.333,-0.293,-0.164,-1.128,-0.029,0.094,0.592,0.681,0.782,0.68,0.963,0.406,-0.071,-1.357,0.231,-0.413,-0.141,0.308,-0.427,-0.807,1.005,-0.068,-0.062,-0.156,0.408,0.716,-0.204,-0.925,0.854,0.429,0.34,1.687,-0.628,0.255,0.682,0.096,-0.306,0.734,-0.765,-0.315,0.685,0.733,0.0,-0.446,-0.96,-0.26,-0.744,0.139,-0.12,-0.008,-0.899,-0.163,-0.383,-0.124,-0.745,-0.445,-0.247,-0.018,0.255,-0.963,1.066,0.181,0.241,0.918,-0.049,-0.389,-0.284,0.305,-0.175,-0.346,-0.619,-0.689,0.653,-0.03,-0.075,-0.009,-0.406,-0.774,-1.126,-0.812,0.393,1.806,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0.0,1.0,0.0,1.0,0.0,-0.514,-0.249,-0.266,0.529,4.062,-0.809,-1.959,0.179,-0.132,-1.06,-0.827,-0.358,-0.851,-0.584,-2.569,0.818,-0.053,-0.855,0.116,-2.352,2.12,-1.158,-0.719,-0.8,-1.467,-0.011,-0.899,0.241,-0.248,-1.089,-0.757,0.088,-2.737,0.875,0.579,-1.674,-1.672,-1.269,3.09,-0.381,-0.723,-0.001,0.135,-1.64,-0.748,-1.082,-0.152,0.075,-0.485,0.664,-2.823,-0.604,-1.355,2.02,-0.53,-1.324,-0.594,0.433,0.09,-1.524,0.394,1.463,-1.21,-1.234,-1.03,0.798,0.231,0.119,1.537,0.387,-0.404,0.977,-0.916,1.272,1.367,-3.321,-1.051,-0.707,1.038,-0.73,-0.98,0.134,0.326,0.564,0.896,1.028,0.051,-0.474,1.164,0.276,1.631,4.916,2.369,0.683,-1.481,1.431,0.169,-1.722,0.089,-0.812,1.039,-0.085,2.057,-0.797,0.0,1.672,-0.162,-0.947,-0.494,-0.265,-0.703,-0.358,-0.136,-2.053,0.281,-0.016,-0.537,-1.897,0.064,0.233,-0.721,-1.396,0.649,-0.258,1.416,-1.542,0.455,0.23,0.858,0.213,0.097,3.085,0.02,-0.392,-1.291,0.284,-0.52,-0.028,1.014,1.13,-0.383,-0.277,0.599,-0.515,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0.0,0.0,1.0,0.0,1.0,-0.325,-0.401,0.97,0.692,1.418,-0.824,-0.28,-0.15,-0.879,0.863,-0.222,-0.512,-0.958,1.175,0.204,0.197,0.124,-1.709,-0.354,-0.516,-0.333,-0.269,0.765,0.206,1.372,0.683,0.806,-0.375,-1.209,0.296,-0.071,0.639,0.667,-0.078,1.174,-0.711,-1.447,1.062,0.789,-0.085,0.13,0.122,0.958,0.213,0.616,1.157,1.114,-0.942,-0.378,-0.815,-0.08,-0.382,0.688,0.0,0.178,1.132,-0.118,1.213,0.771,0.674,0.787,1.641,0.181,1.348,1.107,0.539,0.0,0.126,-0.156,-0.544,0.859,0.58,0.935,-0.751,-1.116,-0.787,0.168,-0.309,0.663,-1.7,0.425,-1.17,-0.874,0.617,-1.216,-0.149,-2.252,0.682,1.48,0.907,-0.798,0.802,1.299,1.069,-0.152,-0.8,1.194,-0.233,0.234,-0.438,0.097,-1.188,-1.303,0.619,0.0,1.179,-0.852,-0.28,0.469,-0.536,-0.225,-2.351,-0.918,0.3,0.86,-2.335,-0.652,0.048,-1.678,1.189,-3.848,0.735,-1.069,-1.395,-0.629,1.197,-1.388,1.203,0.66,0.624,1.533,2.039,0.453,-1.421,-0.187,-1.704,0.915,0.377,0.913,0.502,0.085,-2.428,-0.86,-0.382,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(21948, 403)

Unnamed: 0,sig_id,abc_transporter_expression_enhancer,abl_inhibitor,ace_inhibitor,acetylcholine_release_enhancer,adenosine_deaminase_inhibitor,adenosine_kinase_inhibitor,adenylyl_cyclase_inhibitor,age_inhibitor,alcohol_dehydrogenase_inhibitor,aldehyde_dehydrogenase_activator,aldose_reductase_inhibitor,alpha_mannosidase_inhibitor,ampk_inhibitor,androgen_biosynthesis_inhibitor,angiotensin_receptor_agonist,antacid,anthelmintic,antihypertensive,antimetabolite,antipruritic,antirheumatic_drug,antiseptic,antispasmodic,antithyroid_agent,antitussive,anxiolytic,ap_inhibitor,apolipoprotein_expression_enhancer,apoptosis_inhibitor,arf_inhibitor,aryl_hydrocarbon_receptor_agonist,aryl_hydrocarbon_receptor_antagonist,aspartic_protease_inhibitor,atherogenesis_inhibitor,atherosclerosis_formation_inhibitor,atp-sensitive_potassium_channel_agonist,atp-sensitive_potassium_channel_inhibitor,atp_channel_activator,atp_channel_blocker,atp_citrase_lyase_inhibitor,autophagy_inducer,axl_kinase_inhibitor,bacterial_atpase_inhibitor,bacterial_permeability_inducer,bacterial_protein_synthesis_inhibitor,benzodiazepine_receptor_antagonist,beta_catenin_inhibitor,beta_lactamase_inhibitor,beta_secretase_inhibitor,big1_inhibitor,bile_acid,biliverdin_reductase_a_activator,bone_resorption_inhibitor,botulin_neurotoxin_inhibitor,bradykinin_receptor_antagonist,breast_cancer_resistance_protein_inhibitor,bronchodilator,calcitonin_antagonist,calcium_channel_activator,calmodulin_inhibitor,calpain_inhibitor,camp_stimulant,capillary_stabilizing_agent,car_agonist,car_antagonist,carboxylesterase_inhibitor,carcinogen,cardiac_glycoside,carnitine_palmitoyltransferase_inhibitor,caspase_inhibitor,cathepsin_inhibitor,cc_chemokine_receptor_agonist,cdc_inhibitor,cdk_expression_enhancer,cell_cycle_inhibitor,cell_proliferation_inhibitor,ceramidase_inhibitor,cftr_channel_agonist,cftr_channel_antagonist,chitin_inhibitor,chloride_channel_activator,choleretic_agent,cholinergic_receptor_agonist,cholinesterase_inhibitor,clk_inhibitor,coenzyme_a_precursor,collagenase_inhibitor,collapsin_response_mediator_protein_stimulant,coloring_agent,complement_antagonist,complement_inhibitor,contraceptive_agent,contrast_agent,corticosteroid_antagonist,cyclin_d_inhibitor,cysteine_peptidase_inhibitor,cytidine_deaminase_inhibitor,cytokine_production_inhibitor,dehydrogenase_inhibitor,deubiquitinase_inhibitor,diacylglycerol_kinase_inhibitor,diacylglycerol_o_acyltransferase_inhibitor,differentiation_inducer,dihydroorotate_dehydrogenase_inhibitor,dihydropteroate_synthase_inhibitor,dihydropyrimidine_dehydrogenase_inhibitor,dna_dependent_protein_kinase_inhibitor,dna_methyltransferase_inhibitor,dna_polymerase_inhibitor,dna_repair_enzyme_inhibitor,dna_synthesis_inhibitor,dopamine_release_enhancer,dot1l_inhibitor,dynamin_inhibitor,dyrk_inhibitor,dystrophin_stimulant,endothelin_receptor_antagonist,enkephalinase_inhibitor,ephrin_inhibitor,epoxide_hydolase_inhibitor,etv1_inhibitor,eukaryotic_translation_initiation_factor_inhibitor,exportin_antagonist,fabi_inhibitor,farnesyl_pyrophosphate_synthase_inhibitor,fatty_acid_receptor_antagonist,fatty_acid_synthase_inhibitor,folate_receptor_ligand,free_fatty_acid_receptor_agonist,"fungal_1,3-beta-d-glucan_synthase_inhibitor",fungal_ergosterol_inhibitor,fungal_lanosterol_demethylase_inhibitor,fxr_agonist,fxr_antagonist,g_protein-coupled_receptor_agonist,g_protein-coupled_receptor_antagonist,g_protein_signaling_inhibitor,gaba_gated_chloride_channel_blocker,gaba_receptor_modulator,gaba_uptake_inhibitor,gap_junction_modulator,gastrin_inhibitor,gat_inhibitor,glcnac_phosphotransferase_inhibitor,gli_antagonist,glp_receptor_agonist,glucagon_receptor_antagonist,glucocorticoid_receptor_antagonist,glucokinase_activator,...,neurotransmitter,neurotrophic_agent,nfkb_activator,niemann-pick_c1-like_1_protein_antagonist,nitric_oxide_scavenger,nitric_oxide_stimulant,nociceptin_orphanin_fq_(nop)_receptor_antagonist,non-nucleoside_reverse_transcriptase_inhibitor,nootropic_agent,nop_receptor_agonist,noradrenaline_uptake_inhibitor,norepinephrine_inhibitor,notch_signaling_inhibitor,ntpdase_inhibitor,nucleoside_reverse_transcriptase_inhibitor,oct_activator,omega_3_fatty_acid_stimulant,osteoclast_inhibitor,oxidizing_agent,oxidosqualene_cyclase_inhibitor,oxytocin_receptor_agonist,oxytocin_receptor_antagonist,p21_activated_kinase_inhibitor,p53_activator,p53_inhibitor,paba_antagonist,pdk1_inhibitor,penicillin_binding_protein_inhibitor,peptidase_inhibitor,perk_inhibitor,phosphatase_inhibitor,phosphofructokinase_inhibitor,phospholipase_activator,pim_inhibitor,pka_activator,pka_inhibitor,plasminogen_activator_inhibitor,platelet_activating_factor_receptor_antagonist,platelet_aggregation_inhibitor,plk_inhibitor,porcupine_inhibitor,potassium_channel_agonist,potassium_channel_blocker,prmt_inhibitor,progestogen_hormone,prolactin_inhibitor,prostacyclin_analog,prostanoid_receptor_agonist,prostanoid_receptor_inhibitor,protease_inhibitor,protein_kinase_activator,protein_synthesis_stimulant,psychoactive_drug,purine_antagonist,purinergic_receptor_antagonist,pxr_ligand,pyruvate_dehydrogenase_inhibitor,pyruvate_kinase_isozyme_activator,quorum_sensing_signaling_modulator,rad51_inhibitor,rage_receptor_antagonist,receptor_tyrosine_protein_kinase_inhibitor,reducing_agent,ret_inhibitor,ret_tyrosine_kinase_inhibitor,reverse_transcriptase_inhibitor,ribosomal_protein_inhibitor,ripk_inhibitor,rna_synthesis_inhibitor,ror_inverse_agonist,rsv_fusion_inhibitor,s100a9_inhibitor,sars_coronavirus_3c-like_protease_inhibitor,sedative,selective_estrogen_receptor_modulator_(serm),selective_serotonin_reuptake_inhibitor_(ssri),serine_protease_inhibitor,serine_threonine_kinase_inhibitor,serine_threonine_protein_phosphatase_activator,serotonin_release_inhibitor,sirt_activator,sirt_inhibitor,smoothened_receptor_agonist,sodium_calcium_exchange_inhibitor,sodium_channel_activator,sodium_channel_blocker,somatostatin_receptor_agonist,sphingosine_1_phosphate_receptor_agonist,sphingosine_kinase_inhibitor,src_activator,srebp_inhibitor,stat_inhibitor,stearoyl-coa_desaturase_inhibitor,steroid_sulfatase_inhibitor,steroidal_progestin,sterol_demethylase_inhibitor,sterol_regulatory_element_binding_protein_(srebp)_inhibitor,steryl_sulfatase_inhibitor,structural_glycoprotein_antagonist,succinimide_antiepileptic,sulfonylurea,synthetic_estrogen,t_cell_inhibitor,tankyrase_inhibitor,telomerase_inhibitor,testosterone_receptor_antagonist,thiazide_diuretic,thioredoxin_inhibitor,thrombopoietin_receptor_agonist,thromboxane_receptor_antagonist,thromboxane_synthase_inhibitor,thyroid_hormone_inhibitor,thyroid_hormone_stimulant,thyrotropin_releasing_hormone_receptor_agonist,tie_inhibitor,tissue_transglutaminase_inhibitor,topical_anesthetic,topical_sunscreen_agent,trace_amine_associated_receptor_agonist,trace_amine_associated_receptor_antagonist,trail_modulator,transient_receptor_potential_channel_agonist,triacylglycerol_lipase_inhibitor,tricyclic_antidepressant,tryptophan_hydroxylase_inhibitor,tyrosinase_inhibitor,tyrosine_hydroxylase_inhibitor,tyrosine_phosphatase_inhibitor,ubiquitin-conjugating_enzyme_inhibitor,ubiquitin_ligase_inhibitor,urease_inhibitor,uric_acid_diuretic,uricase_inhibitor,uricosuric,urotensin_receptor_agonist,urotensin_receptor_antagonist,vasoconstrictor,vasodilator,vasopressin_receptor_agonist,vasopressin_receptor_antagonist,ve-cadherin_antagonist,vesicular_monoamine_transporter_inhibitor,vitamin_k_antagonist,voltage-gated_calcium_channel_ligand,voltage-gated_potassium_channel_activator,voltage-gated_sodium_channel_blocker,wdr5_mll_interaction_inhibitor,wnt_agonist,xanthine_oxidase_inhibitor,xiap_inhibitor
0,id_000644bb2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
display(len(target_cols), len(target_cols_non_scored), len(feature_cols))

206

402

1232

# Definition

In [14]:
# seedの固定
def seed_everything(seed=config.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

# Model

In [20]:
# Function to calculate the mean log loss of the targets including clipping
def mean_log_loss(y_true, y_pred):
    y_pred = np.clip(y_pred, config.p_min, config.p_max)
    metrics = []
    for target in range(len(target_cols)):
        metrics.append(log_loss(y_true[:, target], y_pred[:, target]))
    return np.mean(metrics)


# カスタムloss
def logloss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, config.p_min, config.p_max)
    return -backend.mean(y_true*backend.log(y_pred) + (1-y_true)*backend.log(1-y_pred))


def create_model_3l(feature_len, target_len):
    inp = tf.keras.layers.Input(shape=(feature_len))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(0.4914099166744246)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(1159, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.18817607797795838)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(960, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.12542057776853896)(x)
    x = tfa.layers.WeightNormalization(tf.keras.layers.Dense(1811, activation = 'relu'))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.20175242230280122)(x)
    out = tfa.layers.WeightNormalization(tf.keras.layers.Dense(target_len, activation = 'sigmoid'))(x)
    model = tf.keras.models.Model(inputs=inp, outputs=out)
    opt = tf.optimizers.Adam(learning_rate=LEARNING_RATE)
    opt = tfa.optimizers.Lookahead(opt, sync_period=10)
    # model.compile(optimizer = opt, 
    #               loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = 0.0015),
    #               metrics = tf.keras.metrics.BinaryCrossentropy())
    model.compile(
        optimizer=opt, 
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=config.p_min),
        metrics=tf.keras.metrics.BinaryCrossentropy()
    )
    return model

# HyperParameters

In [21]:
LEARNING_RATE = config.lr
WEIGHT_DECAY = config.weight_decay
BATCH_SIZE = config.batch_size
EPOCHS = config.epochs
SEED_AVG = config.seed_avg
NFOLDS = config.nfold
VERBOSE = config.verbose

if config.is_debug:
    EPOCHS = 3
    SEED_AVG = [0, 101]
    NFOLDS = 3
    VERBOSE = 1

In [22]:
print(f'debug: {config.is_debug}')
print(f'lr: {LEARNING_RATE}')
print(f'weight_decay: {WEIGHT_DECAY}')
print(f'batch_size: {BATCH_SIZE}')
print(f'epochs: {EPOCHS}')
print(f'seed_svg: {SEED_AVG}')
print(f'nfold: {NFOLDS}')
print(f'verbose: {VERBOSE}')

debug: False
lr: 0.001
weight_decay: 1e-05
batch_size: 128
epochs: 80
seed_svg: [0, 101, 202, 303, 404]
nfold: 5
verbose: 0


# Training

In [28]:
def run_training(tr_idx, va_idx, fold, seed):

    # SEEDの設定    
    seed_everything(seed)

    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    predictions = np.zeros((len(test), target.iloc[:, 1:].shape[1]))

    # trainとvalidationのDFを生成
    train_df = train.iloc[tr_idx]
    valid_df = train.iloc[va_idx]
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values

    K.clear_session()
    model = create_model_3l(len(feature_cols), len(target_cols))
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor = 'val_binary_crossentropy',
        mode = 'min',
        patience = 10,
        restore_best_weights = True,
        verbose = VERBOSE
    )
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor = 'val_binary_crossentropy',
        mode = 'min',
        factor = 0.3,
        patience = 3,
        verbose = VERBOSE
    )
    # checkpoint_path = f'repeat:{seed}_Fold:{fold}.hdf5'
    # cb_checkpt = ModelCheckpoint(
    #     checkpoint_path,
    #     monitor = 'val_loss',
    #     verbose = VERBOSE,
    #     save_best_only = True,
    #     save_weights_only = True,
    #     mode = 'min'
    # )
    
    model.fit(
        x_train, y_train,
        validation_data = (x_valid, y_valid),
        epochs = EPOCHS, 
        batch_size = BATCH_SIZE,
        callbacks = [early_stopping, reduce_lr],
        verbose = VERBOSE
    )

    # model.load_weights(checkpoint_path)
    oof[va_idx] = model.predict(x_valid)
    predictions = model.predict(test[feature_cols].values)
    
    return oof, predictions

### train

In [29]:
%%time

oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED_AVG:
    print(f'============== Training SEED is {seed} ==============')
    start = time.time()

    oof_fold = np.zeros((len(train), len(target_cols)))
    predictions_fold = np.zeros((len(test), len(target_cols)))

    # CV
    mskf = MultilabelStratifiedKFold(n_splits=NFOLDS, random_state=seed, shuffle=True)

    for i_fold, (tr_idx, va_idx) in enumerate(tqdm(mskf.split(X=train, y=target))):
        # 学習
        oof_, pred_ = run_training(tr_idx, va_idx, i_fold, seed)

        # fold間のスコア
        oof_fold += oof_
        predictions_fold += pred_ / NFOLDS

    oof_score = mean_log_loss(target.drop('sig_id', axis=1).values, oof_fold)
    print(f'seed: {seed} fold mean log loss score is {oof_score}')

    # seed間のスコア
    oof += oof_fold / len(SEED_AVG)
    predictions += predictions_fold / len(SEED_AVG)
    
    elapsed_time = time.time() - start
    print(f'SEED: {seed} Elapsed_time:{elapsed_time:.4f} sec \n')

seed_log_loss = mean_log_loss(target.drop('sig_id', axis=1).values, oof)
print(f'Our out of folds log loss for our seed blend model is {seed_log_loss}')

print(f'==================== Training END ====================')



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


seed: 0 fold mean log loss score is 0.016439219424393536
SEED: 0 Elapsed_time:315.8719 sec


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


seed: 101 fold mean log loss score is 0.01643500737978605
SEED: 101 Elapsed_time:295.2210 sec


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


seed: 202 fold mean log loss score is 0.016459931271354068
SEED: 202 Elapsed_time:314.4969 sec


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


seed: 303 fold mean log loss score is 0.016437664951808133
SEED: 303 Elapsed_time:326.4092 sec


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


seed: 404 fold mean log loss score is 0.016427193065563134
SEED: 404 Elapsed_time:330.4052 sec
Our out of folds log loss for our seed blend model is 0.01610288709831343
CPU times: user 30min 58s, sys: 2min 14s, total: 33min 12s
Wall time: 26min 23s


# Submission

In [77]:
# 予測値の設定
train[target_cols] = oof
test[target_cols] = predictions
print(len(target_cols))

valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
print(train_targets_scored.shape, valid_results.shape)

206
(23814, 207) (23814, 207)


In [78]:
def log_loss_metric(y_true, y_pred):
    """numpy用
    """
    metrics = 0
    for i in range(len(target_cols)):
        score_ = log_loss(y_true[:, i], y_pred[:, i])
        metrics += score_ / target.shape[1]
    return metrics


def log_loss_metric_df(y_true, y_pred):
    """df用
    """
    metrics = []
    for _target in target_cols:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)

In [79]:
# numpy
# y_true = train_targets_scored[target_cols].values
# y_pred = valid_results[target_cols].values

# df
y_true = train_targets_scored[target_cols]
y_pred = valid_results[target_cols]

# score = log_loss_metric(y_true, y_pred)
score = log_loss_metric_df(y_true, y_pred)
org_score = score
print("CV log_loss: ", score)

CV log_loss:  0.014691349446479957


In [80]:
# clip
y_pred.loc[:, target_cols] = np.clip(y_pred[target_cols].values, config.p_min, config.p_max)
score = log_loss_metric_df(y_true, y_pred)
clip_score = score
print("CV after clip log_loss: ", score)

CV after clip log_loss:  0.014919505318562608


In [81]:
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
if config.is_sub_clipping:
    print('clipping...')
    sub.loc[:, target_cols] = np.clip(sub[target_cols].values, config.p_min, config.p_max)

clipping...


In [82]:
# kaggle suibmit
if config.is_kaggle:
    sub.to_csv('submission.csv', index=False)

# configファイルの保存

In [84]:
config_str = ''
for key, value in Config.__dict__.items():
    if not key.startswith('__'):
        config_str += f'{key}: {value}\n'

config_str += 'CV log_loss:' + str(org_score) + '\n'
config_str += 'CV clip log_loss:' + str(clip_score)
# 日付を取得
data_str = datetime.datetime.now().strftime("%m%d%H%M")

if config.is_kaggle:
    config_dir = './'
else:
    config_dir = '/content/drive/My Drive/ML/kaggle/MoA/score/'

with open(config_dir + str(round(org_score, 7)) + '_' + data_str + '_config_score.txt', mode='w') as f:
    f.write(config_str)

ベストのCV log_loss: 0.014564886043488417

---
PCA： 0.014782093830112145  
PCA + SVD: 0.014742253743297256  
PCA + SVD + kmeans: 0.014748313722749552  
PCA + SVD + FA: 0.014717234352890492  
PCA + SVD + FA + g**2特徴量: 0.014847399190757107