- cancel different variables for difficult variables
- change early stopping rounds to 25
- set 2nd-layer of lightgbm

In [1]:
import scipy
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import preprocessing
from sklearn.metrics import log_loss, mutual_info_score
from sklearn.decomposition import PCA
from tqdm._tqdm_notebook import tqdm_notebook
from sklearn.model_selection import StratifiedKFold, KFold
tqdm_notebook.pandas(desc="progress")
pd.set_option("max_columns", 1000)

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  
  from pandas import Panel


In [2]:
DIR = "/kaggle/input/lish-moa/"
train_feat = pd.read_csv(DIR+"train_features.csv")
test_feat = pd.read_csv(DIR+"test_features.csv")
train_nonscore = pd.read_csv(DIR+"train_targets_nonscored.csv")
train_score = pd.read_csv(DIR+"train_targets_scored.csv")
sub = pd.read_csv(DIR+"sample_submission.csv")

In [3]:
# features after rfecv
diff_feats = ["acetylcholine_receptor_agonist","acetylcholine_receptor_antagonist","adrenergic_receptor_agonist","adrenergic_receptor_antagonist",
 "bacterial_cell_wall_synthesis_inhibitor","calcium_channel_blocker","cyclooxygenase_inhibitor","dna_inhibitor","dopamine_receptor_antagonist",
 "estrogen_receptor_agonist","glutamate_receptor_antagonist","histamine_receptor_antagonist","phosphodiesterase_inhibitor",
 "serotonin_receptor_agonist","serotonin_receptor_antagonist","sodium_channel_inhibitor","tubulin_inhibitor"] 

df = pd.read_csv("/kaggle/input/moagcvariables/feats.csv", header=None, sep='\n')
df = df[0].str.split(',', expand=True)
df[0] = df[0].astype(int)
df = df.sort_values(0, ascending=True).reset_index(drop=True)
decreased_diff_vars = df[0].values
df = df.set_index(0)
print(df.shape[0], df.index)

18 Int64Index([3, 4, 5, 9, 10, 43, 54, 71, 77, 79, 83, 99, 105, 151, 176, 177,
            182, 199],
           dtype='int64', name=0)


In [4]:
target_feats = [ i for i in train_score.columns if i != "sig_id"]
g_feats = [i for i in train_feat.columns if "g-" in i]
c_feats = [i for i in train_feat.columns if "c-" in i]

In [5]:
noncons_train_index = train_feat[train_feat.cp_type=="ctl_vehicle"].index
cons_train_index = train_feat[train_feat.cp_type!="ctl_vehicle"].index
noncons_test_index = test_feat[test_feat.cp_type=="ctl_vehicle"].index
cons_test_index = test_feat[test_feat.cp_type!="ctl_vehicle"].index

# preprocess

In [6]:
# normalization by ctl group
train_ctl = train_feat[train_feat.index.isin(noncons_train_index)].copy().reset_index(drop=True)
test_ctl = test_feat[test_feat.index.isin(noncons_test_index)].copy().reset_index(drop=True)
ctl_df = pd.concat([train_ctl, test_ctl])

ctl_group_data = ctl_df.groupby(["cp_dose", "cp_time"]).agg({"mean"}).reset_index()
mean_g_feats = ["mean-" + i for i in g_feats]
mean_c_feats = ["mean-" + i for i in c_feats]
columns = ["cp_dose", "cp_time"] + mean_g_feats + mean_c_feats
ctl_group_data.columns = columns

train_cons = train_feat[train_feat.index.isin(cons_train_index)].copy().reset_index(drop=True)
test_cons = test_feat[test_feat.index.isin(cons_test_index)].copy().reset_index(drop=True)
n_train_score = train_score[train_score.index.isin(cons_train_index)].copy().reset_index(drop=True)
#n_train_nonscore = train_nonscore[train_nonscore.index.isin(cons_train_index)].copy().reset_index(drop=True)

train_cons = pd.merge(train_cons, ctl_group_data, on=["cp_time", "cp_dose"], how="left")
test_cons = pd.merge(test_cons, ctl_group_data, on=["cp_time", "cp_dose"], how="left")

for i in range(len(g_feats)):
    train_cons["diff-g-"+str(i)] = train_cons["g-"+str(i)] - train_cons["mean-g-"+str(i)]
    test_cons["diff-g-"+str(i)] = test_cons["g-"+str(i)] - test_cons["mean-g-"+str(i)]
    
for i in range(len(c_feats)):
    train_cons["diff-c-"+str(i)] = train_cons["c-"+str(i)] - train_cons["mean-c-"+str(i)]
    test_cons["diff-c-"+str(i)] = test_cons["c-"+str(i)] - test_cons["mean-c-"+str(i)]

In [7]:
categoricals = ["cp_dose"]

def encoding(tr, te):
    for f in categoricals:
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(tr[f]))
        tr[f] = lbl.transform(list(tr[f]))
        te[f] = lbl.transform(list(te[f])) 
        
    return tr, te

n_train_feat, n_test_feat = encoding(train_cons, test_cons)

# feature engineering

In [8]:
def fe(df, remove_features):
    df.drop(remove_features, axis=1, inplace=True)
    return df

remove_features = ["cp_type"] + mean_g_feats + mean_c_feats + g_feats + c_feats 
for i in [i for i in n_train_feat.columns if i != "sig_id"]:
    if i not in remove_features and (n_train_feat[i].std() == 0):
        remove_features.append(i)
        
n_train_feat = fe(n_train_feat, remove_features)
n_test_feat = fe(n_test_feat, remove_features)
    
print(n_train_feat.shape, n_test_feat.shape)

(21948, 875) (3624, 875)


# Feature selection

In [9]:
from sklearn.feature_selection import RFECV
params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial', 
          'learning_rate': 0.01, "num_leaves": 10, 'random_seed':44, 'max_depth': 5}

def check(new_train, target_train, target, selected_features):
    
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()        
        
    remove_features = []
    for i in X_train.columns: 
        if i not in selected_features:
            remove_features.append(i)
    X_train.drop(remove_features, axis=1, inplace=True)
        
    n_folds=4
    if target not in ["erbb2_inhibitor", "atp-sensitive_potassium_channel_antagonist"]:
        skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    else:
        skf=KFold(n_splits = n_folds, shuffle=True, random_state=0)

    valid = np.zeros([X_train.shape[0]])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]

        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)
        
        clf = lgb.train(params, lgb_train,valid_sets=[lgb_train, lgb_eval], 
               num_boost_round=10000,early_stopping_rounds=50,verbose_eval = 1000) 

        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict
    
    score = log_loss(y_train, valid)
    
    return score

#for ind, target in enumerate(target_feats):
#    if target not in diff_feats:
#        feature_selector = RFECV(lgb.LGBMClassifier(**params),
#                         step=10, min_features_to_select=200, scoring='neg_log_loss',
#                         cv=4, verbose=1, n_jobs=-1)

#        X_train = n_train_feat.drop(['sig_id'],axis=1).copy()
#        y_train = n_train_score[target].copy()

#        feature_selector.fit(X_train, y_train)
        #print('Features selected:', feature_selector.n_features_)
#        selected_features = [f for f in X_train.columns[feature_selector.ranking_ == 1]]

#        print(target, ind, selected_features)

#        score = check(n_train_feat, n_train_score, target, selected_features)
#        print(target, score)

# parameter tuning

In [10]:
import optuna

columns_to_try = [
    'glutamate_receptor_antagonist',
    'dna_inhibitor',
    'serotonin_receptor_antagonist',
    'dopamine_receptor_antagonist',
    'cyclooxygenase_inhibitor'
]

def optuna_check(new_train, target_train, target, params):
    
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()
        
    if ind in decreased_diff_vars:
        selected_features = [i[1:-1] for i in df.loc[ind,:][df.loc[ind,:].notna()]]
        X_train = X_train[selected_features]
        
    n_folds=4
    if target not in ["erbb2_inhibitor", "atp-sensitive_potassium_channel_antagonist"]:
        skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    else:
        skf=KFold(n_splits = n_folds, shuffle=True, random_state=0)

    models = []

    valid = np.zeros([X_train.shape[0]])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]

        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)
        
        clf = lgb.train(params, lgb_train,valid_sets=[lgb_train, lgb_eval], 
               num_boost_round=10000,early_stopping_rounds=50,verbose_eval = 1000) 

        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict

    score = log_loss(y_train, valid)
            
    return score


def objective(trial):
    
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'boost_from_average': True,
        'num_threads': 4,
        'random_state': 42,
        
        'num_leaves': trial.suggest_int('num_leaves', 10, 1000),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 200),
        'min_child_weight': trial.suggest_loguniform('min_child_weight', 0.001, 0.1),
        'max_depth': trial.suggest_int('max_depth', 1, 100),
        'bagging_fraction': trial.suggest_loguniform('bagging_fraction', .5, .99),
        'feature_fraction': trial.suggest_loguniform('feature_fraction', .5, .99),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 0.1, 2),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 0.1, 2)
    }
    
    scores = []
    for column in columns_to_try:
        score = optuna_check(n_train_feat, n_train_score, column, params)
        scores.append(score)
    
    return np.mean(scores)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=100)

# 1st model

In [11]:
def modelling_lgb(new_train, target_train, new_test, target, ind):
    
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()
    
    params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial', 'learning_rate': 0.01, 
               "num_leaves": 10, 'random_seed':44, 'max_depth': 5} 
        
    n_folds=4
    if target not in ["erbb2_inhibitor", "atp-sensitive_potassium_channel_antagonist"]:
        skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    else:
        skf=KFold(n_splits = n_folds, shuffle=True, random_state=0)

    models = []

    valid = np.zeros([X_train.shape[0]])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]

        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)
        
        clf = lgb.train(params, lgb_train,valid_sets=[lgb_train, lgb_eval], 
               num_boost_round=10000,early_stopping_rounds=25,verbose_eval = 0) 

        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict
        models.append(clf)
        
    if new_test is not None:
        X_test = new_test.copy()
        X_test = new_test.drop(['sig_id'],axis=1).copy()
        pred_value = np.zeros(X_test.shape[0])
        for model in models:
            pred_value += model.predict(X_test, num_iteration = clf.best_iteration) / n_folds
    else:
        pred_value = None
            
    score = log_loss(y_train, valid)
            
    return valid, pred_value, score


train_checkscore = train_score.copy()
target_list = []
log_loss_list = []

for ind, target in enumerate(target_feats):
    print(ind, target)
    valid, pred_value, score = modelling_lgb(n_train_feat, n_train_score, n_test_feat, target, ind)
    train_checkscore.loc[cons_train_index, target] = valid
    train_checkscore.loc[noncons_train_index, target] = 0
    print("oof log_loss= {}, all log_loss= {}".format(score, log_loss(train_score[target], train_checkscore[target])))
    target_list.append(target)
    log_loss_list.append(score)
    sub.loc[cons_test_index, target] = pred_value
    sub.loc[noncons_test_index, target] = 0

0 5-alpha_reductase_inhibitor
oof log_loss= 0.006085844595936416, all log_loss= 0.005608974434854047
1 11-beta-hsd1_inhibitor
oof log_loss= 0.006635563403911071, all log_loss= 0.006115618778409426
2 acat_inhibitor
oof log_loss= 0.008440581650489961, all log_loss= 0.007779200724991836
3 acetylcholine_receptor_agonist
oof log_loss= 0.0487250530315697, all log_loss= 0.04490709095225052
4 acetylcholine_receptor_antagonist
oof log_loss= 0.07035821516483956, all log_loss= 0.06484513758452592
5 acetylcholinesterase_inhibitor
oof log_loss= 0.022193763665167048, all log_loss= 0.020454720959229372
6 adenosine_receptor_agonist
oof log_loss= 0.017225227949289594, all log_loss= 0.015875506132149574
7 adenosine_receptor_antagonist
oof log_loss= 0.02783326416937985, all log_loss= 0.025652325606347146
8 adenylyl_cyclase_activator
oof log_loss= 0.0038365206998609135, all log_loss= 0.0035359014159968595
9 adrenergic_receptor_agonist
oof log_loss= 0.06169559949541353, all log_loss= 0.05686130081991007
10

In [12]:
# local score
scores = []
for target_col in target_feats:
    scores.append(log_loss(train_score[target_col], train_checkscore[target_col]))
print(np.mean(scores))

0.01591548705279586


# 2nd model

In [13]:
first_layer_train_feat = train_checkscore[train_checkscore.index.isin(cons_train_index)].copy()
first_layer_test_feat = sub[sub.index.isin(cons_test_index)].copy()

n_train_feat2 = pd.merge(n_train_feat, first_layer_train_feat, on="sig_id", how="outer") 
n_test_feat2 = pd.merge(n_test_feat, first_layer_test_feat, on="sig_id", how="outer")

In [14]:
final = pd.read_csv(DIR+"sample_submission.csv")

In [15]:
train_checkscore2 = train_score.copy()
target_list2 = []
log_loss_list2 = []

for ind, target in enumerate(target_feats):
    print(ind, target)
    valid, pred_value, score = modelling_lgb(first_layer_train_feat, n_train_score, first_layer_test_feat, target, ind)
    train_checkscore2.loc[cons_train_index, target] = valid
    train_checkscore2.loc[noncons_train_index, target] = 0
    print("oof log_loss= {}, all log_loss= {}".format(score, log_loss(train_score[target], train_checkscore2[target])))
    target_list2.append(target)
    log_loss_list2.append(score)
    final.loc[cons_test_index, target] = pred_value
    final.loc[noncons_test_index, target] = 0

0 5-alpha_reductase_inhibitor
oof log_loss= 0.005991335915884285, all log_loss= 0.005521871196851856
1 11-beta-hsd1_inhibitor
oof log_loss= 0.006630872061836549, all log_loss= 0.006111295037087025
2 acat_inhibitor
oof log_loss= 0.008465524776272106, all log_loss= 0.00780218937556152
3 acetylcholine_receptor_agonist
oof log_loss= 0.04877178932560412, all log_loss= 0.044950165117929
4 acetylcholine_receptor_antagonist
oof log_loss= 0.06987890240516746, all log_loss= 0.06440338246361876
5 acetylcholinesterase_inhibitor
oof log_loss= 0.021942797116354676, all log_loss= 0.020223419463750496
6 adenosine_receptor_agonist
oof log_loss= 0.01720874923713231, all log_loss= 0.015860318646870823
7 adenosine_receptor_antagonist
oof log_loss= 0.027350879345680524, all log_loss= 0.025207739139959606
8 adenylyl_cyclase_activator
oof log_loss= 0.0034315283038794634, all log_loss= 0.003162643118062833
9 adrenergic_receptor_agonist
oof log_loss= 0.06079566143912271, all log_loss= 0.056031879451829476
10 a

In [16]:
# local score
scores = []
for target_col in target_feats:
    scores.append(log_loss(train_score[target_col], train_checkscore2[target_col]))
print(np.mean(scores))

0.015832927455056798


In [17]:
difficult_list2 = pd.DataFrame(target_list2, columns=["Target"])
difficult_list2["score"] = log_loss_list2
np.transpose(difficult_list2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205
Target,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
score,0.00599134,0.00663087,0.00846552,0.0487718,0.0698789,0.0219428,0.0172087,0.0273509,0.00343153,0.0607957,0.080812,0.0147078,0.00241168,0.0119948,0.00459611,0.00443099,0.0154134,0.0260931,0.0239006,0.0121104,0.0123802,0.0209307,0.00251869,0.0132462,0.00465861,0.00495214,0.00457752,0.00665112,0.0220696,0.011925,0.00813402,0.0154269,0.0149949,0.00252809,0.00157375,0.00371464,0.0241914,0.0060229,0.0153888,0.00252766,0.0188281,0.0239612,0.0118826,0.0494656,0.025834,0.0318794,0.00252267,0.00952405,0.0108509,0.0205802,0.0085536,0.0154557,0.00934004,0.00253173,0.0677029,0.0138126,0.0173054,0.0121655,0.0118009,0.00652946,0.00465841,0.0292999,0.00665845,0.0214039,0.0171534,0.00574333,0.0138967,0.015566,0.0169444,0.00253508,0.00930024,0.0949644,0.0295924,0.0108756,0.00872943,0.00253099,0.0155983,0.0862285,0.0334798,0.0888136,0.0229524,0.00248605,0.00157375,0.0421288,0.0154594,0.0117917,0.00558527,0.00877152,0.0119983,0.0277865,0.00505486,0.00664696,0.00814597,0.0302301,0.0433548,0.0127852,0.0157065,0.0050052,0.0224943,0.0834845,0.00658942,0.0138443,0.0220428,0.0126854,0.0185617,0.0588996,0.00716066,0.0100782,0.0219148,0.0132464,0.00895661,0.0114476,0.00768505,0.0105501,0.0203074,0.0101574,0.0133532,0.0137119,0.0135893,0.0207281,0.00252639,0.00253512,0.0193678,0.00465741,0.0190648,0.00238535,0.00724087,0.00993099,0.022499,0.00880974,0.00458869,0.0250825,0.00651792,0.0188118,0.01534,0.0122719,0.0332457,0.00252425,0.00910762,0.00460802,0.00917561,0.00283277,0.00519763,0.0190868,0.0277956,0.0123658,0.0116326,0.00854065,0.0180863,0.0223184,0.00653531,0.0637112,0.00880631,0.0308584,0.0104315,0.0173876,0.0284781,0.0260971,0.00985753,0.0317095,0.0060528,0.0120366,0.0249221,0.00134474,0.0150672,0.00249954,0.025703,0.00664239,0.0173705,0.00942605,0.00465998,0.0135835,0.00253466,0.010887,0.0103784,0.00843567,0.0587461,0.0893395,0.0143304,0.0121253,0.0120695,0.00876746,0.0652947,0.0088328,0.0178291,0.00253208,0.00632847,0.0187961,0.00432158,0.00689839,0.0114807,0.0103736,0.00289186,0.0119354,0.016501,0.00665627,0.00253151,0.00781081,0.0154714,0.0243286,0.0215731,0.00252689,0.0286259,0.00917359,0.00806247,0.0103572


# submission

In [18]:
final.to_csv("submission.csv", index=False)