- code refactoring to select features by rfecv
- modify my dataset

In [1]:
import scipy
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import preprocessing
from sklearn.metrics import log_loss, mutual_info_score
from sklearn.decomposition import PCA
from tqdm._tqdm_notebook import tqdm_notebook
from sklearn.model_selection import StratifiedKFold, KFold
tqdm_notebook.pandas(desc="progress")
pd.set_option("max_columns", 1000)

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  
  from pandas import Panel


In [2]:
DIR = "/kaggle/input/lish-moa/"
train_feat = pd.read_csv(DIR+"train_features.csv")
test_feat = pd.read_csv(DIR+"test_features.csv")
#train_nonscore = pd.read_csv(DIR+"train_targets_nonscored.csv")
train_score = pd.read_csv(DIR+"train_targets_scored.csv")
sub = pd.read_csv(DIR+"sample_submission.csv")

In [3]:
# features after rfecv
df = pd.read_csv("/kaggle/input/moagcvariables/feats.csv", header=None, sep='\n')
df = df[0].str.split(',', expand=True)
df[0] = df[0].astype(int)
df = df.sort_values(0, ascending=True).reset_index(drop=True)
decreased_vars = df[0].values
df = df.set_index(0)

In [4]:
target_feats = [ i for i in train_score.columns if i != "sig_id"]
g_feats = [i for i in train_feat.columns if "g-" in i]
c_feats = [i for i in train_feat.columns if "c-" in i]

In [5]:
noncons_train_index = train_feat[train_feat.cp_type=="ctl_vehicle"].index
cons_train_index = train_feat[train_feat.cp_type!="ctl_vehicle"].index
noncons_test_index = test_feat[test_feat.cp_type=="ctl_vehicle"].index
cons_test_index = test_feat[test_feat.cp_type!="ctl_vehicle"].index

# preprocess

In [6]:
# normalization by ctl group
train_ctl = train_feat[train_feat.index.isin(noncons_train_index)].copy().reset_index(drop=True)
test_ctl = test_feat[test_feat.index.isin(noncons_test_index)].copy().reset_index(drop=True)
ctl_df = pd.concat([train_ctl, test_ctl])

ctl_group_data = ctl_df.groupby(["cp_dose", "cp_time"]).agg({"mean"}).reset_index()
mean_g_feats = ["mean-" + i for i in g_feats]
mean_c_feats = ["mean-" + i for i in c_feats]
columns = ["cp_dose", "cp_time"] + mean_g_feats + mean_c_feats
ctl_group_data.columns = columns

train_cons = train_feat[train_feat.index.isin(cons_train_index)].copy().reset_index(drop=True)
test_cons = test_feat[test_feat.index.isin(cons_test_index)].copy().reset_index(drop=True)
n_train_score = train_score[train_score.index.isin(cons_train_index)].copy().reset_index(drop=True)
#n_train_nonscore = train_nonscore[train_nonscore.index.isin(cons_train_index)].copy().reset_index(drop=True)

train_cons = pd.merge(train_cons, ctl_group_data, on=["cp_time", "cp_dose"], how="left")
test_cons = pd.merge(test_cons, ctl_group_data, on=["cp_time", "cp_dose"], how="left")

for i in range(len(g_feats)):
    train_cons["diff-g-"+str(i)] = train_cons["g-"+str(i)] - train_cons["mean-g-"+str(i)]
    test_cons["diff-g-"+str(i)] = test_cons["g-"+str(i)] - test_cons["mean-g-"+str(i)]
    
for i in range(len(c_feats)):
    train_cons["diff-c-"+str(i)] = train_cons["c-"+str(i)] - train_cons["mean-c-"+str(i)]
    test_cons["diff-c-"+str(i)] = test_cons["c-"+str(i)] - test_cons["mean-c-"+str(i)]

In [7]:
categoricals = ["cp_dose"]

def encoding(tr, te):
    for f in categoricals:
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(tr[f]))
        tr[f] = lbl.transform(list(tr[f]))
        te[f] = lbl.transform(list(te[f])) 
        
    return tr, te

n_train_feat, n_test_feat = encoding(train_cons, test_cons)

# feature engineering

In [8]:
def fe(df, remove_features):
    df.drop(remove_features, axis=1, inplace=True)
    return df

remove_features = ["cp_type"] + mean_g_feats + mean_c_feats + g_feats + c_feats 
for i in [i for i in n_train_feat.columns if i != "sig_id"]:
    if i not in remove_features and (n_train_feat[i].std() == 0):
        remove_features.append(i)
        
n_train_feat = fe(n_train_feat, remove_features)
n_test_feat = fe(n_test_feat, remove_features)
    
print(n_train_feat.shape, n_test_feat.shape)

(21948, 875) (3624, 875)


# Feature selection

In [9]:
from sklearn.feature_selection import RFECV
params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial', 
          'learning_rate': 0.01, "num_leaves": 10, 'random_seed':44, 'max_depth': 5}

lgbm_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial', 'learning_rate': 0.01, 
               "num_leaves": 10, 'random_seed':44, 'max_depth': 5}

#diff_feats = ["acetylcholine_receptor_agonist","acetylcholine_receptor_antagonist","adrenergic_receptor_agonist","adrenergic_receptor_antagonist",
# "bacterial_cell_wall_synthesis_inhibitor","calcium_channel_blocker","cyclooxygenase_inhibitor","dna_inhibitor","dopamine_receptor_antagonist",
# "estrogen_receptor_agonist","glutamate_receptor_antagonist","histamine_receptor_antagonist","phosphodiesterase_inhibitor",
# "serotonin_receptor_agonist","serotonin_receptor_antagonist","sodium_channel_inhibitor","tubulin_inhibitor"]

diff_var = ["histamine_receptor_antagonist","phosphodiesterase_inhibitor",
 "serotonin_receptor_agonist","serotonin_receptor_antagonist","sodium_channel_inhibitor","tubulin_inhibitor"]

def check(new_train, target_train, target, selected_features):
    
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()        
        
    remove_features = []
    for i in X_train.columns: 
        if i not in selected_features:
            remove_features.append(i)
    X_train.drop(remove_features, axis=1, inplace=True)
        
    n_folds=4
    if target not in ["erbb2_inhibitor", "atp-sensitive_potassium_channel_antagonist"]:
        skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    else:
        skf=KFold(n_splits = n_folds, shuffle=True, random_state=0)

    valid = np.zeros([X_train.shape[0]])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]

        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)
        
        clf = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval], 
               num_boost_round=10000,early_stopping_rounds=50,verbose_eval = 1000) 

        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict
    
    score = log_loss(y_train, valid)
    
    return score

#for target in diff_var:
#    feature_selector = RFECV(lgb.LGBMClassifier(**params),
#                         step=10, min_features_to_select=200, scoring='neg_log_loss',
#                         cv=4, verbose=1, n_jobs=-1)

    #X_train = n_train_feat.drop(['sig_id'],axis=1).copy()
    #y_train = n_train_score[target].copy()

    #feature_selector.fit(X_train, y_train)
    #print('Features selected:', feature_selector.n_features_)
    #selected_features = [f for f in X_train.columns[feature_selector.ranking_ == 1]]

    #print(target, selected_features)

#    score = check(n_train_feat, n_train_score, target, selected_features)
#    print(target, score)

# parameter tuning

In [10]:
#import optuna.integration.lightgbm as lgb
#import json

def modelling_optuna(new_train, target_train, target):
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()
    
    n_folds=4
    skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)

    valid = np.zeros([X_train.shape[0]])
    best_params_list = []
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]
        
        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)

        best_params, tuning_history = dict(), list()
        lgbm_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial'}
        
        clf = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
           num_boost_round=10000,early_stopping_rounds=100,verbose_eval = 1000, 
                    best_params=best_params, tuning_history=tuning_history) 
        
        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict
            
        #pd.DataFrame(tuning_history).to_csv('./tuning_history.csv')
        best_params_list.append(best_params)
        
    #for j in range(n_folds):
    #    print('Fold: ' + str(j+1) + ' Best parameters: ' + json.dumps(best_params_list[j], indent=4))

    #print('Best parameters: ' + json.dumps(best_params, indent=4))

    score = log_loss(y_train, valid)
    print("score = {}".format(score))
    return best_params_list

#best_params_list = modelling_optuna(new_train,n_train_score, target)
#best_params_list

# modelling

In [11]:
lgbm_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'tree_learner': 'serial', 'learning_rate': 0.01, 
               "num_leaves": 10, 'random_seed':44, 'max_depth': 5}

def modelling_lgb(new_train, target_train, new_test, target, ind):
    
    X_train = new_train.drop(['sig_id'],axis=1).copy()
    y_train = target_train[target].copy()
    X_test = new_test.copy()
    X_test = new_test.drop(['sig_id'],axis=1).copy()
        
    pred_value = np.zeros(X_test.shape[0])
        
    if ind in decreased_vars:
        selected_features = [i[1:-1] for i in df.loc[ind,:][df.loc[ind,:].notna()]]
        X_train = X_train[selected_features]
        X_test = X_test[selected_features]
        
    n_folds=4
    if target not in ["erbb2_inhibitor", "atp-sensitive_potassium_channel_antagonist"]:
        skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    else:
        skf=KFold(n_splits = n_folds, shuffle=True, random_state=0)

    models = []

    valid = np.zeros([X_train.shape[0]])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]

        X_valid2 = X_train.iloc[test_index,:]
        y_valid2 = y_train.iloc[test_index]
        
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_valid2, y_valid2, reference=lgb_train)
        
        clf = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval], 
               num_boost_round=10000,early_stopping_rounds=50,verbose_eval = 1000) 

        valid_predict = clf.predict(X_valid2, num_iteration = clf.best_iteration)
        valid[test_index] = valid_predict
        pred_value += clf.predict(X_test, num_iteration = clf.best_iteration) / n_folds

    score = log_loss(y_train, valid)
            
    return valid, pred_value, score

train_checkscore = train_score.copy()
target_list = []
log_loss_list = []

for ind, target in enumerate(target_feats):
    print(ind, target)
    valid, pred_value, score = modelling_lgb(n_train_feat, n_train_score, n_test_feat, target, ind)
    train_checkscore.loc[cons_train_index, target] = valid
    train_checkscore.loc[noncons_train_index, target] = 0
    print("oof log_loss= {} ".format(score))
    print("all log_loss= {} ".format(log_loss(train_score[target], train_checkscore[target])))
    target_list.append(target)
    log_loss_list.append(score)
    sub.loc[cons_test_index, target] = pred_value
    sub.loc[noncons_test_index, target] = 0

0 5-alpha_reductase_inhibitor
Fold 1
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's binary_logloss: 0.00595763	valid_1's binary_logloss: 0.0058469
Fold 2
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[56]	training's binary_logloss: 0.00127755	valid_1's binary_logloss: 0.00596482
Fold 3
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[80]	training's binary_logloss: 0.00100471	valid_1's binary_logloss: 0.00537319
Fold 4
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[109]	training's binary_logloss: 0.000664927	valid_1's binary_logloss: 0.0071177
oof log_loss= 0.006075651377849975 
all log_loss= 0.005599579929497485 
1 11-beta-hsd1_inhibitor
Fold 1
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[41]	training's binary_logloss: 0.00196

In [12]:
# local score
scores = []
for target_col in target_feats:
    scores.append(log_loss(train_score[target_col], train_checkscore[target_col]))
print(np.mean(scores))

0.015773848452461218


In [13]:
difficult_list = pd.DataFrame(target_list, columns=["Target"])
difficult_list["score"] = log_loss_list
np.transpose(difficult_list)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205
Target,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
score,0.00607565,0.00662189,0.00843873,0.0471292,0.0690841,0.021901,0.0171832,0.0277751,0.00383652,0.0593518,0.0785539,0.0155387,0.00233972,0.0118439,0.00458633,0.00452143,0.0154159,0.0263142,0.0239117,0.0120969,0.0124162,0.0215702,0.00252711,0.0133475,0.00466088,0.00487605,0.00466274,0.00648111,0.0220385,0.0120665,0.00822767,0.0154739,0.0152612,0.00252552,0.00157376,0.00331201,0.0231797,0.00569297,0.01612,0.00254499,0.0187594,0.0238711,0.0120284,0.0482364,0.0261593,0.0318622,0.0027503,0.0096134,0.0113172,0.0204287,0.00855626,0.0149623,0.00960127,0.0025256,0.0650959,0.0138538,0.0171171,0.0121433,0.0117716,0.00659744,0.00466106,0.0292509,0.0066586,0.021628,0.0172165,0.00539583,0.0137462,0.0155199,0.0171519,0.0025279,0.00963845,0.0924955,0.0295469,0.0103305,0.00865342,0.00244157,0.0152817,0.0834787,0.0335795,0.0863835,0.0263916,0.0025343,0.00157447,0.0402608,0.0151534,0.0118792,0.0060714,0.00881815,0.0121519,0.029373,0.00484212,0.00666289,0.00822956,0.0302241,0.0428157,0.012967,0.0152137,0.0050084,0.0222849,0.0814973,0.00665608,0.0134728,0.0219699,0.0134376,0.0183271,0.0581465,0.00729916,0.0105764,0.0217308,0.0133441,0.0091596,0.0111506,0.00785888,0.0105988,0.0204655,0.0102146,0.0141429,0.0137524,0.0163844,0.0221358,0.00253825,0.00245805,0.0193661,0.00465539,0.0185549,0.00249238,0.00771044,0.0104427,0.0223667,0.00882491,0.00465777,0.0251955,0.00666037,0.0187806,0.0152795,0.0122886,0.0341459,0.00253177,0.00916406,0.00446567,0.00914871,0.00289535,0.00507978,0.0190125,0.0279389,0.0124596,0.0119417,0.00847539,0.0179032,0.0231249,0.00661428,0.061525,0.00870337,0.0300564,0.0103121,0.0173665,0.0285049,0.0269569,0.0100552,0.0309887,0.00642209,0.0120556,0.025116,0.00102597,0.0151104,0.00250456,0.0254648,0.00692607,0.0175606,0.00953687,0.00462885,0.0136882,0.0025267,0.0110156,0.0103823,0.00871404,0.0574008,0.0866207,0.0143908,0.01214,0.0119454,0.00862727,0.063591,0.00876065,0.0177705,0.00252825,0.00626659,0.018641,0.0047292,0.00698235,0.0117621,0.0103922,0.00289182,0.0118475,0.0174347,0.00663386,0.00252918,0.00774644,0.0153965,0.0243475,0.0220131,0.0025291,0.0293075,0.00911536,0.00792857,0.0103894


In [14]:
sub.to_csv("submission.csv", index=False)