In [41]:
import os
import numpy as np
import pandas as pd

from bayes_opt import BayesianOptimization
import xgboost as xgb

# Scikit-learn
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier

pd.options.display.max_columns = 999
np.random.seed(123)
random_state = 2017
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

In [2]:
def load_data(ravel=True):
    x_train = np.loadtxt("x_train.csv", delimiter = ",", skiprows = 1)
    x_test  = np.loadtxt("x_test.csv", delimiter = ",", skiprows = 1)    
    y_train = np.loadtxt("y_train.csv", delimiter = ",", skiprows = 1)
    
    
    # remove the first column(Id)
    x_train = x_train[:,1:]
    x_test  = x_test[:,1:]
    y_train = y_train[:,1:]

    num_genes_train = x_train.shape[0] / 100
    num_genes_test  = x_test.shape[0] / 100

    print("Train / test data has %d / %d genes." % \
          (num_genes_train, num_genes_test))

    x_train = np.split(x_train, num_genes_train)
    x_test  = np.split(x_test, num_genes_test)

    if ravel:
        # Reshape by raveling each 100x5 array into a 500-length vector
        x_train = [g.ravel() for g in x_train]
        x_test  = [g.ravel() for g in x_test]
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    x_test  = np.array(x_test)
    y_train = np.ravel(y_train)
    
    
    print("x_train shape is %s" % str(x_train.shape))    
    print("y_train shape is %s" % str(y_train.shape))
    print("x_test shape is %s" % str(x_test.shape))
    
    return(x_train, y_train, x_test)

def minmax_scale(array, minimum=0, maximum=1):
    array_std = (array - array.min(axis=0)) / (array.max(axis=0) - array.min(axis=0))
    return array_std * (maximum - minimum) + minimum



In [3]:
x_train, y_train, x_test = load_data()
x_train_df = pd.DataFrame(x_train)
x_test_df = pd.DataFrame(x_test)

Loading data...
All files loaded. Preprocessing...
Train / test data has 15485 / 3871 genes.
x_train shape is (15485, 500)
y_train shape is (15485,)
x_test shape is (3871, 500)
Data preprocessing done...


In [4]:
x_train_100_5, y_train, x_test_100_5 = load_data(ravel=False)
x_train_100_5 = minmax_scale(x_train_100_5)
x_test_100_5 = minmax_scale(x_test_100_5)

Loading data...
All files loaded. Preprocessing...
Train / test data has 15485 / 3871 genes.
x_train shape is (15485, 100, 5)
y_train shape is (15485,)
x_test shape is (3871, 100, 5)
Data preprocessing done...


In [5]:
def get_stats(row, index=0, stat='mean'):
    if stat == 'mean':
        return row[index:500:5].mean()
    elif stat == 'perc0': 
        return np.mean(row[index:500:5] == 0)
    elif stat == 'std':
        return np.std(row[index:500:5])
    else:
        raise ValueError(stat + ' currently not supported.')

In [6]:
markers = ['H3K4me3', 'H3K4me1', 'H3K36me3', 'H3K9me3', 'H3K27me3']
dataframes = [x_train_df, x_test_df]
stats = ['mean', 'perc0', 'std']

for df in dataframes:
    for stat in stats:
        for i, j in enumerate(markers):
            df[j + '_' + stat] = df.apply(get_stats, args=(i, stat), axis=1)

In [7]:
xg_train = xgb.DMatrix(x_train_df, label=y_train)

In [9]:
def xgb_evaluate(min_child_weight,
                 colsample_bytree,
                 max_depth,
                 subsample,
                 gamma,
                 alpha):

    params['min_child_weight'] = int(min_child_weight)
    params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)

    cv_result = xgb.cv(params, xg_train, num_boost_round=num_rounds, 
                       nfold=5, seed=random_state, stratified=True, 
                       metrics='auc', callbacks=[xgb.callback.early_stop(100)])

    return cv_result['test-auc-mean'].values[-1]


num_rounds = 3000
num_iter = 2
init_points = 5
params = {
        'objective': 'binary:logistic',
        'eta': .01,
        'silent': 1,
        'verbose_eval': True,
        'seed': random_state
    }

xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
                                            'colsample_bytree': (.1, 1),
                                            'max_depth': (1, 40),
                                            'subsample': (.6, 1),
                                            'gamma': (0, 8),
                                            'alpha': (0, 8),
                                            })

xgbBO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[1844]	train-auc:0.951736+0.00100339	test-auc:0.918979+0.00698068

    1 | 03m01s | [35m   0.91898[0m | [32m   4.8936[0m | [32m            0.5854[0m | [32m   0.3563[0m | [32m     3.4288[0m | [32m            4.5143[0m | [32m     0.6743[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[876]	train-auc:0.999539+3.55562e-05	test-auc:0.919992+0.00618546

    2 | 04m19s | [35m   0.91999[0m | [32m   2.4851[0m | 



[31mBayesian Optimization[0m
[94m---------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[1304]	train-auc:0.998937+4.51442e-05	test-auc:0.917774+0.00703591

    6 | 13m29s |    0.91777 |    7.9754 |             0.8993 |    0.0578 |     39.3562 |             3.6012 |      0.9260 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
    7 | 07m46s |    0.91884 |    7.9573 |             0.1652 |    7.9872 |     19.5067 |             2.1825 |      0.9468 | 


In [10]:
bayes_params3 = {
    'objective': 'binary:logistic',
    'eta': .01,
    'alpha': 0.9939,
    'colsample_bytree': .1066,
    'gamma': 1.8422,
    'max_depth': 39,
    'min_child_weight': 7,
    'subsample': .8101,
    'seed': random_state,
    'silent': 1
}

In [12]:
xgb_cv = xgb.cv(bayes_params3, xg_train, num_boost_round=10000, early_stopping_rounds=200, nfold=5,
                stratified=True, verbose_eval=True, metrics='auc', seed=random_state)

[0]	train-auc:0.906633+0.00581156	test-auc:0.886849+0.0117642
[1]	train-auc:0.920163+0.00250473	test-auc:0.898137+0.00515209
[2]	train-auc:0.925403+0.00227319	test-auc:0.90277+0.00477192
[3]	train-auc:0.927815+0.00202068	test-auc:0.906217+0.00400852
[4]	train-auc:0.929821+0.0019787	test-auc:0.907839+0.00465316
[5]	train-auc:0.931135+0.0023938	test-auc:0.909536+0.00521998
[6]	train-auc:0.932222+0.00259713	test-auc:0.909966+0.00494245
[7]	train-auc:0.93317+0.00254182	test-auc:0.9106+0.00500454
[8]	train-auc:0.933745+0.00239626	test-auc:0.911318+0.00439832
[9]	train-auc:0.934476+0.00196349	test-auc:0.91147+0.0043765
[10]	train-auc:0.934874+0.00171577	test-auc:0.911797+0.0039877
[11]	train-auc:0.935287+0.00167982	test-auc:0.912434+0.00408773
[12]	train-auc:0.93578+0.00161783	test-auc:0.912655+0.00418142
[13]	train-auc:0.936024+0.0015447	test-auc:0.9128+0.00388814
[14]	train-auc:0.936242+0.00151727	test-auc:0.912872+0.00381564
[15]	train-auc:0.936635+0.00157077	test-auc:0.913161+0.00409302


[129]	train-auc:0.950795+0.000854917	test-auc:0.917896+0.0054143
[130]	train-auc:0.950892+0.000854655	test-auc:0.917912+0.00541602
[131]	train-auc:0.951003+0.000850877	test-auc:0.917932+0.00542791
[132]	train-auc:0.951093+0.000854805	test-auc:0.917959+0.00544311
[133]	train-auc:0.951193+0.000860324	test-auc:0.917971+0.00546522
[134]	train-auc:0.951298+0.000844889	test-auc:0.917974+0.00548146
[135]	train-auc:0.951406+0.00085463	test-auc:0.918023+0.00550234
[136]	train-auc:0.951514+0.000855068	test-auc:0.918016+0.00548969
[137]	train-auc:0.951612+0.000862156	test-auc:0.918022+0.00553448
[138]	train-auc:0.951727+0.000869399	test-auc:0.918018+0.005549
[139]	train-auc:0.951835+0.000861503	test-auc:0.918065+0.00552501
[140]	train-auc:0.951942+0.000868815	test-auc:0.918071+0.00551986
[141]	train-auc:0.952044+0.000851723	test-auc:0.918057+0.00549599
[142]	train-auc:0.952141+0.00085892	test-auc:0.918026+0.00551311
[143]	train-auc:0.952245+0.000841752	test-auc:0.918049+0.00551631
[144]	train-auc

[254]	train-auc:0.962379+0.000560677	test-auc:0.919401+0.00556288
[255]	train-auc:0.962465+0.000563277	test-auc:0.91943+0.00554441
[256]	train-auc:0.962545+0.000567815	test-auc:0.919429+0.00557524
[257]	train-auc:0.962633+0.000555065	test-auc:0.91942+0.00558308
[258]	train-auc:0.962706+0.000552212	test-auc:0.91943+0.00556146
[259]	train-auc:0.962786+0.000554656	test-auc:0.919439+0.00555019
[260]	train-auc:0.962873+0.00054962	test-auc:0.919451+0.00554684
[261]	train-auc:0.962957+0.000546157	test-auc:0.919456+0.00556039
[262]	train-auc:0.963034+0.000546678	test-auc:0.919445+0.00555902
[263]	train-auc:0.963117+0.000550226	test-auc:0.919435+0.00558106
[264]	train-auc:0.963195+0.000551163	test-auc:0.919449+0.0055745
[265]	train-auc:0.963282+0.000546471	test-auc:0.919461+0.00558696
[266]	train-auc:0.963365+0.000552255	test-auc:0.919492+0.00560126
[267]	train-auc:0.963445+0.000549405	test-auc:0.919499+0.00558808
[268]	train-auc:0.963527+0.000548407	test-auc:0.919509+0.00559381
[269]	train-auc

[380]	train-auc:0.971882+0.000446778	test-auc:0.92025+0.00555242
[381]	train-auc:0.971945+0.0004468	test-auc:0.920247+0.0055491
[382]	train-auc:0.972012+0.000439512	test-auc:0.920264+0.00555603
[383]	train-auc:0.972078+0.000439117	test-auc:0.92026+0.00553883
[384]	train-auc:0.97214+0.000434953	test-auc:0.920255+0.00553669
[385]	train-auc:0.972206+0.000424675	test-auc:0.920271+0.00552971
[386]	train-auc:0.972276+0.000424547	test-auc:0.920283+0.00554048
[387]	train-auc:0.972344+0.000421929	test-auc:0.920285+0.00554501
[388]	train-auc:0.972407+0.000425314	test-auc:0.92029+0.00554711
[389]	train-auc:0.972471+0.000425887	test-auc:0.920277+0.00555621
[390]	train-auc:0.972532+0.000418545	test-auc:0.920271+0.00554091
[391]	train-auc:0.972593+0.000416937	test-auc:0.920286+0.00554092
[392]	train-auc:0.972656+0.000415232	test-auc:0.920283+0.00553862
[393]	train-auc:0.972723+0.000410344	test-auc:0.920309+0.00552557
[394]	train-auc:0.972793+0.000411907	test-auc:0.920307+0.00554183
[395]	train-auc:0

[505]	train-auc:0.979433+0.000322864	test-auc:0.920762+0.00568679
[506]	train-auc:0.979485+0.000322234	test-auc:0.92077+0.00568264
[507]	train-auc:0.979538+0.000323544	test-auc:0.920773+0.00567486
[508]	train-auc:0.97959+0.000324053	test-auc:0.920777+0.0056744
[509]	train-auc:0.979641+0.000324113	test-auc:0.920789+0.00567464
[510]	train-auc:0.979693+0.00032308	test-auc:0.920807+0.00566845
[511]	train-auc:0.979745+0.00032313	test-auc:0.920792+0.00567144
[512]	train-auc:0.979795+0.000324268	test-auc:0.920797+0.00567263
[513]	train-auc:0.979848+0.000324713	test-auc:0.920806+0.00566789
[514]	train-auc:0.979899+0.000323063	test-auc:0.920806+0.0056775
[515]	train-auc:0.97995+0.00032073	test-auc:0.920818+0.00567062
[516]	train-auc:0.980005+0.000317473	test-auc:0.920815+0.00566107
[517]	train-auc:0.980058+0.00031644	test-auc:0.920807+0.00565853
[518]	train-auc:0.980105+0.000311976	test-auc:0.920821+0.0056609
[519]	train-auc:0.980153+0.000308407	test-auc:0.92082+0.00566877
[520]	train-auc:0.980

[630]	train-auc:0.985122+0.000184511	test-auc:0.920981+0.00567349
[631]	train-auc:0.985164+0.000185444	test-auc:0.92099+0.00567721
[632]	train-auc:0.985205+0.000187782	test-auc:0.92098+0.00567609
[633]	train-auc:0.985241+0.000190132	test-auc:0.920975+0.00567549
[634]	train-auc:0.985281+0.000190161	test-auc:0.920975+0.00566819
[635]	train-auc:0.985319+0.000188284	test-auc:0.920973+0.00567292
[636]	train-auc:0.985362+0.000186057	test-auc:0.920976+0.00568364
[637]	train-auc:0.985396+0.000187169	test-auc:0.920985+0.00568582
[638]	train-auc:0.985434+0.000186356	test-auc:0.920994+0.00567831
[639]	train-auc:0.985473+0.000184563	test-auc:0.920995+0.00568506
[640]	train-auc:0.985511+0.000179461	test-auc:0.921005+0.00569306
[641]	train-auc:0.985548+0.000180485	test-auc:0.921002+0.00568052
[642]	train-auc:0.985587+0.000177177	test-auc:0.921008+0.00568552
[643]	train-auc:0.985624+0.000179635	test-auc:0.921002+0.00567158
[644]	train-auc:0.98566+0.00017957	test-auc:0.921001+0.00567299
[645]	train-au

[755]	train-auc:0.989297+0.000131175	test-auc:0.921016+0.00574288
[756]	train-auc:0.989327+0.000131203	test-auc:0.921001+0.00574251
[757]	train-auc:0.989353+0.000132587	test-auc:0.921009+0.00573092
[758]	train-auc:0.989385+0.000130205	test-auc:0.921009+0.00573397
[759]	train-auc:0.989413+0.000130303	test-auc:0.921006+0.00572679
[760]	train-auc:0.989442+0.00012933	test-auc:0.921003+0.00573041
[761]	train-auc:0.989471+0.000129423	test-auc:0.920994+0.00574082
[762]	train-auc:0.9895+0.000129396	test-auc:0.921004+0.00573168
[763]	train-auc:0.989528+0.000128752	test-auc:0.921012+0.00573684
[764]	train-auc:0.989555+0.00012999	test-auc:0.921014+0.00573126
[765]	train-auc:0.989582+0.000130228	test-auc:0.921026+0.00571745
[766]	train-auc:0.989611+0.000128857	test-auc:0.921016+0.00572023
[767]	train-auc:0.989642+0.000130843	test-auc:0.921013+0.00572565
[768]	train-auc:0.989671+0.000129764	test-auc:0.921013+0.00571843
[769]	train-auc:0.989699+0.000129552	test-auc:0.921009+0.00570233
[770]	train-au

[881]	train-auc:0.992297+7.32759e-05	test-auc:0.921073+0.0057871
[882]	train-auc:0.992318+7.12388e-05	test-auc:0.921072+0.00577681
[883]	train-auc:0.992337+7.05833e-05	test-auc:0.921087+0.00578779
[884]	train-auc:0.992358+6.79618e-05	test-auc:0.921088+0.00578374
[885]	train-auc:0.992378+6.998e-05	test-auc:0.921089+0.00578761
[886]	train-auc:0.992398+7.11129e-05	test-auc:0.921093+0.0057794
[887]	train-auc:0.99242+6.90478e-05	test-auc:0.921099+0.00579002
[888]	train-auc:0.992439+6.979e-05	test-auc:0.921101+0.0057989
[889]	train-auc:0.992458+7.1051e-05	test-auc:0.921101+0.00579993
[890]	train-auc:0.992479+7.2522e-05	test-auc:0.921104+0.00580541
[891]	train-auc:0.992497+7.07152e-05	test-auc:0.921104+0.0057972
[892]	train-auc:0.992517+7.22617e-05	test-auc:0.921105+0.00580435
[893]	train-auc:0.992538+7.01524e-05	test-auc:0.921109+0.00580468
[894]	train-auc:0.992556+6.9681e-05	test-auc:0.921106+0.00580556
[895]	train-auc:0.992576+7.18704e-05	test-auc:0.921103+0.00581199
[896]	train-auc:0.9925

[1006]	train-auc:0.994401+4.18426e-05	test-auc:0.921089+0.00591703
[1007]	train-auc:0.994413+4.15375e-05	test-auc:0.92109+0.00591265
[1008]	train-auc:0.994427+4.08039e-05	test-auc:0.9211+0.00591129
[1009]	train-auc:0.994441+4.09458e-05	test-auc:0.921088+0.00591403
[1010]	train-auc:0.994455+3.98196e-05	test-auc:0.921096+0.00590471
[1011]	train-auc:0.994467+4.08245e-05	test-auc:0.921083+0.0059017
[1012]	train-auc:0.99448+4.13057e-05	test-auc:0.921088+0.00591626
[1013]	train-auc:0.994494+4.0802e-05	test-auc:0.921098+0.00591417
[1014]	train-auc:0.994507+4.01766e-05	test-auc:0.92109+0.00592358
[1015]	train-auc:0.99452+3.82905e-05	test-auc:0.921103+0.00592176
[1016]	train-auc:0.994534+3.79051e-05	test-auc:0.921098+0.0059205
[1017]	train-auc:0.994547+3.76117e-05	test-auc:0.921096+0.00593133
[1018]	train-auc:0.994561+3.79231e-05	test-auc:0.921094+0.00592795
[1019]	train-auc:0.994574+3.70546e-05	test-auc:0.921096+0.00592162
[1020]	train-auc:0.994587+3.77921e-05	test-auc:0.921099+0.00592978
[102

In [42]:
x_train_df_meta = x_train_df.copy()
x_train_df_meta['XGB'] = np.nan
x_train_df_meta['LR'] = np.nan
x_train_df_meta['ET'] = np.nan
x_train_df_meta['RF'] = np.nan

x_test_df_meta = x_test_df.copy()
x_test_df_meta['XGB'] = np.nan
x_test_df_meta['LR'] = np.nan
x_test_df_meta['ET'] = np.nan
x_test_df_meta['RF'] = np.nan

In [43]:
clfs = [LogisticRegression(C=.01, penalty='l1'),
       ExtraTreesClassifier(n_estimators=800, n_jobs=2),
       RandomForestClassifier(n_estimators=800, n_jobs=2, criterion='entropy')]

clf_names = ['LR', 'ET', 'RF']

In [44]:
num_fold = 0
n_epochs = 2

CNN_test_preds = []
RNN_test_preds = []

neural_nets = ['CNN', 'RNN']

for train_index, test_index in cv.split(x_train_df, y_train):
    
    # Valitaan data sekä neuroverkoille että muille käytettäville malleille
    X_cvtrain = x_train_df.iloc[train_index]
    X_cvtest = x_train_df.iloc[test_index]
    X_cvtrain_100_5 = x_train_100_5[train_index]
    X_cvtest_100_5 = x_train_100_5[test_index]
    y_cvtrain = y_train[train_index]
    y_cvtest = y_train[test_index]
    
    num_fold += 1
    
    # Valmiiksi oltiin luotu viisi kansiota hakemistoon, yksi kutakin kierrosta varten
    directory = ('/Users/shrey/Desktop/summer/gene prediction/Kaggle-gene-expression-master/Fold'+ str(num_fold)) 
    if not os.path.exists(directory):
        os.makedirs(directory)
    os.chdir(directory)
    
    # Koulutetaan molemmat neuroverkot loopissa, CNN ja CNN+RNN
    
    # Rullataan scikit-learn mallit läpi ja ennustetaan jokaisella
    for i, clf in enumerate(clfs):
        clf.fit(X_cvtrain, y_cvtrain)
        clf_pred = clf.predict_proba(X_cvtest)[:, 1]
        x_train_df_meta.loc[test_index, clf_names[i]] = clf_pred
        
        print('Fold', num_fold, clf_names[i], 'CV AUC:', roc_auc_score(y_cvtest, clf_pred))
    
    # Koulutetaan XGBoost-malli aiemmin löydetyillä parametreilla
    xg_cvtrain = xgb.DMatrix(X_cvtrain, label=y_cvtrain)
    xg_cvtest = xgb.DMatrix(X_cvtest)
    
    xgb_model = xgb.train(bayes_params3, xg_cvtrain, num_boost_round=1184)
    xgb_cvpred = xgb_model.predict(xg_cvtest)
    x_train_df_meta.loc[test_index, 'XGB'] = xgb_cvpred
    print('Fold', num_fold, 'XGB CV AUC:', roc_auc_score(y_cvtest, xgb_cvpred))
    
    print('Fold', num_fold, 'completed.')
    print(50 * '-')

print('Train meta filled. Thank you.')
os.chdir('/Users/shrey/Desktop/summer/gene prediction/Kaggle-gene-expression-master')

Fold 1 LR CV AUC: 0.917492639802
Fold 1 ET CV AUC: 0.921853119457
Fold 1 RF CV AUC: 0.925319414252
Fold 1 XGB CV AUC: 0.927216561529
Fold 1 completed.
--------------------------------------------------
Fold 2 LR CV AUC: 0.914304957256
Fold 2 ET CV AUC: 0.918060491991
Fold 2 RF CV AUC: 0.921160860751
Fold 2 XGB CV AUC: 0.923885926101
Fold 2 completed.
--------------------------------------------------
Fold 3 LR CV AUC: 0.909510452298
Fold 3 ET CV AUC: 0.917681535845
Fold 3 RF CV AUC: 0.920647531159
Fold 3 XGB CV AUC: 0.924156096764
Fold 3 completed.
--------------------------------------------------
Fold 4 LR CV AUC: 0.913788577784
Fold 4 ET CV AUC: 0.916791546951
Fold 4 RF CV AUC: 0.918991609965
Fold 4 XGB CV AUC: 0.92067212092
Fold 4 completed.
--------------------------------------------------
Fold 5 LR CV AUC: 0.895832107483
Fold 5 ET CV AUC: 0.904875860443
Fold 5 RF CV AUC: 0.909656991314
Fold 5 XGB CV AUC: 0.910069085818
Fold 5 completed.
------------------------------------------

In [45]:
epoch_auc

0.90648814462215588

In [46]:
x_train_df_meta.isnull().sum()

0                 0
1                 0
2                 0
3                 0
4                 0
5                 0
6                 0
7                 0
8                 0
9                 0
10                0
11                0
12                0
13                0
14                0
15                0
16                0
17                0
18                0
19                0
20                0
21                0
22                0
23                0
24                0
25                0
26                0
27                0
28                0
29                0
                 ..
489               0
490               0
491               0
492               0
493               0
494               0
495               0
496               0
497               0
498               0
499               0
H3K4me3_mean      0
H3K4me1_mean      0
H3K36me3_mean     0
H3K9me3_mean      0
H3K27me3_mean     0
H3K4me3_perc0     0
H3K4me1_perc0     0
H3K36me3_perc0    0


In [47]:
x_train_df_meta.to_csv('train_meta.csv', index=False)

In [48]:
LR = LogisticRegression()
used_cols = ['XGB', 'LR', 'ET', 'RF']

cross_val_score(LR, x_train_df_meta[used_cols], y_train, scoring='roc_auc', cv=cv).mean()

0.92099685379871321

In [49]:
stat_cols = x_train_df_meta.columns[500:-7].values
used_cols = ['XGB','LR', 'ET','H3K27me3_std', 
             'H3K27me3_mean', 'H3K4me1_perc0', 'H3K9me3_std']

print('Baseline:', cross_val_score(LR, x_train_df_meta[used_cols], 
                      y_train, scoring='roc_auc', cv=cv).mean())

# Printataan uudet CV-pisteet featureita yksi kerrallaan lisäillen
for i in range(len(stat_cols)):
    print(stat_cols[i], cross_val_score(LR, x_train_df_meta[used_cols + [stat_cols[i]]],
                                        y_train, scoring='roc_auc', cv=cv).mean())

Baseline: 0.920723712877
H3K4me3_mean 0.919912882641
H3K4me1_mean 0.920257822472
H3K36me3_mean 0.92059741298
H3K9me3_mean 0.920386984768
H3K27me3_mean 0.920729969464
H3K4me3_perc0 0.919964759025
H3K4me1_perc0 0.920730470505
H3K36me3_perc0 0.92045173902
H3K9me3_perc0 0.920225272155
H3K27me3_perc0 0.920466890476
H3K4me3_std 0.91989614191
H3K4me1_std 0.920249123134


In [50]:
xgbs = []
xg_train = xgb.DMatrix(x_train_df, label=y_train)
xg_test = xgb.DMatrix(x_test_df)

for i in range(1, 8):
    complete_xgb_model = xgb.train(bayes_params3, xg_train, num_boost_round=int(1184/.8))
    xgb_pred = complete_xgb_model.predict(xg_test)
    xgbs.append(xgb_pred)
    
x_test_df_meta['XGB'] = np.mean(xgbs, axis=0)

In [51]:
final_clfs = [LogisticRegression(C=.01, penalty='l1'),
              ExtraTreesClassifier(n_estimators=800, n_jobs=2)]
final_clf_names = ['LR', 'ET']

x_test_df_meta.drop('RF', axis=1, inplace=True) # Random forest huomattiin haitalliseksi ensemblessä


for i, clf in enumerate(final_clfs):
    clf.fit(x_train_df, y_train)
    x_test_df_meta[final_clf_names[i]] = clf.predict_proba(x_test_df)[:, 1]

In [52]:
xg_train_meta = xgb.DMatrix(x_train_df_meta[used_cols], label=y_train)
xg_test_meta = xgb.DMatrix(x_test_df_meta[used_cols])

In [53]:
def xgb_evaluate(min_child_weight,
                 colsample_bytree,
                 max_depth,
                 subsample,
                 gamma,
                 alpha):

    params['min_child_weight'] = int(min_child_weight)
    params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)

    cv_result = xgb.cv(params, xg_train_meta, num_boost_round=num_rounds, 
                       nfold=5, seed=random_state, stratified=True, 
                       metrics='auc', callbacks=[xgb.callback.early_stop(100)])

    return cv_result['test-auc-mean'].values[-1]


num_rounds = 3000
num_iter = 120
init_points = 5
params = {
        'objective': 'binary:logistic',
        'eta': .01,
        'silent': 1,
        'verbose_eval': True,
        'seed': random_state
    }

xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
                                            'colsample_bytree': (.1, 1),
                                            'max_depth': (1, 40),
                                            'subsample': (.6, 1),
                                            'gamma': (0, 8),
                                            'alpha': (0, 8),
                                            })

xgbBO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[1241]	train-auc:0.927614+0.0014028	test-auc:0.919835+0.00600617

    1 | 00m29s | [35m   0.91983[0m | [32m   5.7595[0m | [32m            0.3749[0m | [32m   2.5662[0m | [32m     5.6199[0m | [32m            3.1571[0m | [32m     0.6286[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[296]	train-auc:0.924138+0.00143912	test-auc:0.920761+0.00611824

    2 | 00m12s | [35m   0.92076[0m | [32m   6.1588[0m | [

  " state: %s" % convergence_dict)


[31mBayesian Optimization[0m
[94m---------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[115]	train-auc:0.946955+0.00176823	test-auc:0.917526+0.00572783

    6 | 00m20s |    0.91753 |    0.4389 |             0.3289 |    0.1813 |     39.7229 |             1.2710 |      0.6329 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[575]	train-auc:0.922843+0.00150042	test-auc:0.920631+0.00622723

    7 | 00m24s |    0.92063 |    7.5140 |             0.9312 |    7.9824 |      2.0596 |            14.6366 |      0.9

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[402]	train-auc:0.923552+0.00144994	test-auc:0.92062+0.00621686

   10 | 00m36s |    0.92062 |    7.5552 |             0.9311 |    7.6344 |     17.3634 |             1.4539 |      0.9942 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[285]	train-auc:0.922505+0.00145138	test-auc:0.920762+0.00611941

   11 | 00m20s | [35m   0.92076[0m | [32m   4.9506[0m | [32m            0.9177[0m | [32m   7.9255[0m | [32m     3.3033[0m | [32m            1.9153[0m | [32m     0.6024[0m | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[310]	train-auc:0.922644+0.0014575	test-auc:0.920787+0.00614625

   12 | 00m24s | [35m   0.92079[0m | [32m   7.6898[0m | [32m            0.9594[0m | [32m   7.2503[0m | [32m    20.5796[0m | [32m           14.3377[0m | [32m     0.6317[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[597]	train-auc:0.923065+0.00148916	test-auc:0.920776+0.00625179

   13 | 00m24s |    0.92078 |    0.1259 |             0.8123 |    7.7127 |      2.1315 |            19.5155 |      0.6972 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[476]	train-auc:0.923131+0.00152521	test-auc:0.920858+0.00627978

   14 | 00m32s | [35m   0.92086[0m | [32m   6.2443[0m | [32m            0.9980[0m | [32m   7.9779[0m | [32m    38.5873[0m | [32m            8.4753[0m | [32m     0.6291[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[94]	train-auc:0.926708+0.00123717	test-auc:0.920428+0.00613308

   15 | 00m20s |    0.92043 |    0.9281 |             0.9857 |    1.0648 |     38.1483 |            19.5022 |      0.6127 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[864]	train-auc:0.922742+0.00147934	test-auc:0.920944+0.00620553

   16 | 00m28s | [35m   0.92094[0m | [32m   7.7027[0m | [32m            0.9728[0m | [32m   1.7325[0m | [32m     1.7462[0m | [32m            4.5124[0m | [32m     0.6214[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[434]	train-auc:0.923673+0.00139092	test-auc:0.920787+0.00623952

   17 | 00m26s |    0.92079 |    7.0255 |             0.9468 |    5.7126 |      7.7004 |             4.8956 |      0.6203 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[639]	train-auc:0.922609+0.00154495	test-auc:0.920974+0.00627357

   18 | 00m24s | [35m   0.92097[0m | [32m   2.5107[0m | [32m            0.9805[0m | [32m   0.6505[0m | [32m     1.5347[0m | [32m           18.1815[0m | [32m     0.7061[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[629]	train-auc:0.922512+0.00155129	test-auc:0.920932+0.00624291

   19 | 00m24s |    0.92093 |    2.9230 |             0.9429 |    6.5914 |      1.3307 |             9.2493 |      0.6369 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[298]	train-auc:0.92504+0.00136304	test-auc:0.920724+0.00615457

   20 | 00m26s |    0.92072 |    3.7779 |             0.9954 |    5.2317 |     25.3607 |             9.1728 |      0.6326 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[370]	train-auc:0.92294+0.00146004	test-auc:0.920954+0.00619949

   21 | 00m21s |    0.92095 |    0.8271 |             0.9600 |    3.7251 |      2.1790 |            17.0263 |      0.6213 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[382]	train-auc:0.924329+0.00147568	test-auc:0.920862+0.00620457

   22 | 00m24s |    0.92086 |    1.0765 |             0.9984 |    6.9740 |      4.6033 |            15.7260 |      0.7134 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[676]	train-auc:0.922722+0.00153606	test-auc:0.920968+0.00635123

   23 | 00m26s |    0.92097 |    0.0270 |             0.9926 |    1.1650 |      1.5344 |            19.2001 |      0.7790 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[220]	train-auc:0.925124+0.00144796	test-auc:0.920552+0.00627329

   24 | 00m20s |    0.92055 |    2.7685 |             0.9891 |    5.9184 |     31.6518 |            16.8643 |      0.9451 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[282]	train-auc:0.923975+0.00148148	test-auc:0.920773+0.00618501

   25 | 00m25s |    0.92077 |    5.7254 |             0.9899 |    5.1603 |     11.2135 |             3.6324 |      0.6033 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[736]	train-auc:0.9226+0.00152534	test-auc:0.920741+0.00635342

   26 | 00m24s |    0.92074 |    3.8001 |             0.7847 |    3.7576 |      1.1687 |            19.2853 |      0.6774 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[306]	train-auc:0.923181+0.00151962	test-auc:0.920769+0.00609204

   27 | 00m26s |    0.92077 |    7.3069 |             0.9119 |    7.2954 |     26.0005 |             5.6169 |      0.8440 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[841]	train-auc:0.92295+0.00152664	test-auc:0.920977+0.00628756

   28 | 00m24s | [35m   0.92098[0m | [32m   3.5960[0m | [32m            0.9510[0m | [32m   2.7757[0m | [32m     1.8280[0m | [32m            9.8457[0m | [32m     0.9051[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[132]	train-auc:0.928075+0.00133371	test-auc:0.920213+0.00598358

   29 | 00m20s |    0.92021 |    1.7306 |             0.9848 |    1.9571 |     10.0557 |            16.8835 |      0.8898 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[562]	train-auc:0.923165+0.00144367	test-auc:0.920863+0.00622037

   30 | 00m24s |    0.92086 |    3.8381 |             0.9166 |    5.7628 |      2.0013 |            18.3350 |      0.7986 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[314]	train-auc:0.925291+0.00138961	test-auc:0.920751+0.0062374

   31 | 00m23s |    0.92075 |    0.9655 |             0.9988 |    4.5653 |      4.2217 |            15.8979 |      0.6291 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.924439+0.00136859	test-auc:0.920645+0.00611215

   32 | 00m26s |    0.92064 |    5.8875 |             0.9857 |    5.2012 |     30.9188 |             7.0318 |      0.8380 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[728]	train-auc:0.923802+0.00150763	test-auc:0.920571+0.00623942

   33 | 00m37s |    0.92057 |    7.5069 |             0.8279 |    6.8366 |     20.9828 |             9.8139 |      0.8486 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.927365+0.00134675	test-auc:0.920203+0.00605755

   34 | 00m26s |    0.92020 |    7.6356 |             0.9632 |    0.9420 |     13.6871 |             5.4695 |      0.6035 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[300]	train-auc:0.923716+0.0014288	test-auc:0.920696+0.00619292

   35 | 00m26s |    0.92070 |    6.3582 |             0.9142 |    4.7849 |     23.5016 |            12.7012 |      0.6124 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.92345+0.00152623	test-auc:0.920848+0.00616905

   36 | 00m27s |    0.92085 |    3.2932 |             0.9026 |    7.9425 |     26.0307 |            18.2062 |      0.7338 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.923586+0.00147219	test-auc:0.920873+0.0062281

   37 | 00m27s |    0.92087 |    3.0298 |             0.8968 |    7.4919 |     29.6745 |            19.1575 |      0.6750 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[287]	train-auc:0.924384+0.0014205	test-auc:0.920748+0.00614178

   38 | 00m26s |    0.92075 |    4.7146 |             0.9919 |    4.9816 |     36.2258 |            18.8615 |      0.7124 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[652]	train-auc:0.922601+0.00151196	test-auc:0.920859+0.00626375

   39 | 00m27s |    0.92086 |    5.8798 |             0.9182 |    5.5875 |      1.0741 |            14.1124 |      0.9407 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[296]	train-auc:0.923375+0.00145491	test-auc:0.920482+0.00610193

   40 | 00m27s |    0.92048 |    5.8873 |             0.8528 |    7.1089 |     18.8099 |            10.5537 |      0.8844 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.925457+0.00130951	test-auc:0.920752+0.00619774

   41 | 00m28s |    0.92075 |    1.1189 |             0.9789 |    6.8263 |     18.9333 |            19.5115 |      0.7371 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[287]	train-auc:0.923053+0.00148331	test-auc:0.920816+0.00609776

   42 | 00m22s |    0.92082 |    6.6905 |             0.9173 |    7.5609 |     13.2355 |            14.9217 |      0.8492 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[245]	train-auc:0.924363+0.00155754	test-auc:0.920891+0.00640883

   43 | 00m27s |    0.92089 |    0.4876 |             0.9978 |    7.5826 |     38.7798 |            19.7687 |      0.6071 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[453]	train-auc:0.923361+0.00144832	test-auc:0.92077+0.00615189

   44 | 00m26s |    0.92077 |    4.6072 |             0.9053 |    0.1974 |      2.4698 |             2.1273 |      0.8578 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.923229+0.0014366	test-auc:0.920768+0.00613638

   45 | 00m25s |    0.92077 |    5.9350 |             0.8640 |    7.5373 |     25.6929 |             6.9418 |      0.7723 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[284]	train-auc:0.923104+0.00154478	test-auc:0.92086+0.00609066

   46 | 00m28s |    0.92086 |    5.3919 |             0.9390 |    6.8003 |     22.4700 |            10.6699 |      0.6017 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[741]	train-auc:0.92275+0.00156468	test-auc:0.92074+0.00633533

   47 | 00m29s |    0.92074 |    0.5944 |             0.7851 |    1.3427 |      1.0701 |            13.5869 |      0.8501 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[606]	train-auc:0.923211+0.00149223	test-auc:0.920654+0.00629682

   48 | 00m42s |    0.92065 |    7.0348 |             0.7207 |    7.9063 |     24.5285 |             4.2675 |      0.7490 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[200]	train-auc:0.926288+0.00145007	test-auc:0.92033+0.00604798

   49 | 00m25s |    0.92033 |    1.6759 |             0.7277 |    5.0634 |     23.8517 |            17.8422 |      0.9356 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[287]	train-auc:0.925321+0.00139147	test-auc:0.920599+0.00614403

   50 | 00m28s |    0.92060 |    6.4292 |             0.9404 |    3.4601 |     24.0466 |             5.0797 |      0.6861 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[238]	train-auc:0.924939+0.00142806	test-auc:0.920639+0.00620859

   51 | 00m26s |    0.92064 |    0.1696 |             0.9242 |    5.2574 |      4.2493 |            15.0879 |      0.8791 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[707]	train-auc:0.922664+0.00152627	test-auc:0.920983+0.00625958

   52 | 00m28s | [35m   0.92098[0m | [32m   7.8545[0m | [32m            0.9540[0m | [32m   0.7247[0m | [32m     1.4981[0m | [32m            5.4769[0m | [32m     0.7820[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[318]	train-auc:0.923368+0.00146759	test-auc:0.920773+0.00614741

   53 | 00m31s |    0.92077 |    6.1821 |             0.9511 |    7.1466 |     18.0061 |            16.9160 |      0.8397 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[486]	train-auc:0.9233+0.00140712	test-auc:0.920743+0.00615009

   54 | 00m24s |    0.92074 |    7.4356 |             0.9306 |    1.5633 |      2.1445 |             7.6332 |      0.8155 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[526]	train-auc:0.92365+0.00144032	test-auc:0.920761+0.00613962

   55 | 00m30s |    0.92076 |    3.5338 |             0.9990 |    2.3803 |      2.0495 |            11.5753 |      0.9374 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[749]	train-auc:0.924007+0.00145953	test-auc:0.920625+0.00623245

   56 | 00m50s |    0.92063 |    7.9548 |             0.8365 |    6.2027 |     39.6689 |             1.4269 |      0.7668 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[605]	train-auc:0.92408+0.00142818	test-auc:0.920745+0.00622231

   57 | 00m36s |    0.92075 |    2.3422 |             0.9543 |    6.7558 |      3.6204 |             4.2237 |      0.9416 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[782]	train-auc:0.922877+0.00151141	test-auc:0.920931+0.00626313

   58 | 00m35s |    0.92093 |    2.9389 |             0.9162 |    0.6876 |      1.5563 |             5.5760 |      0.8921 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[369]	train-auc:0.923486+0.00153212	test-auc:0.920871+0.00627007

   59 | 00m42s |    0.92087 |    5.0922 |             0.9979 |    7.9745 |     38.6193 |             1.1649 |      0.6957 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[628]	train-auc:0.922539+0.00155431	test-auc:0.920846+0.00625813

   60 | 00m33s |    0.92085 |    7.3494 |             0.9788 |    5.7602 |      1.0319 |             1.7972 |      0.8128 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[611]	train-auc:0.923312+0.00147438	test-auc:0.920887+0.00630157

   61 | 00m50s |    0.92089 |    6.7089 |             0.9552 |    7.8941 |     22.7998 |             4.8774 |      0.6494 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.922963+0.00153342	test-auc:0.920625+0.006019

   62 | 00m38s |    0.92062 |    4.1701 |             0.7995 |    7.0459 |     12.4357 |            19.1539 |      0.6112 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[430]	train-auc:0.923496+0.00143711	test-auc:0.92082+0.00623812

   63 | 00m51s |    0.92082 |    6.3047 |             0.8749 |    6.7893 |     34.8811 |             1.4301 |      0.6016 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[422]	train-auc:0.924621+0.0014691	test-auc:0.920892+0.00624957

   64 | 00m54s |    0.92089 |    2.7275 |             0.9046 |    7.5375 |     22.0447 |            15.3271 |      0.6689 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[639]	train-auc:0.923624+0.00145298	test-auc:0.92066+0.00630102

   65 | 00m59s |    0.92066 |    4.5911 |             0.8080 |    7.6283 |      6.3441 |            18.7201 |      0.6605 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[856]	train-auc:0.922931+0.00152215	test-auc:0.920914+0.00628934

   66 | 00m55s |    0.92091 |    2.4043 |             0.9245 |    4.5604 |      1.1349 |             8.7694 |      0.8320 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[232]	train-auc:0.92671+0.0012783	test-auc:0.920532+0.00620588

   67 | 00m53s |    0.92053 |    1.0156 |             0.7859 |    5.5513 |     35.0430 |            14.0083 |      0.7372 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[737]	train-auc:0.922731+0.00155195	test-auc:0.920965+0.00627049

   68 | 00m53s |    0.92096 |    3.8131 |             0.9551 |    4.7718 |      1.0480 |            18.7533 |      0.7946 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.92386+0.00144963	test-auc:0.920759+0.00609082

   69 | 00m54s |    0.92076 |    5.2304 |             0.9759 |    6.2870 |     29.4789 |             8.4637 |      0.7658 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[1041]	train-auc:0.923272+0.00131926	test-auc:0.919582+0.00600435

   70 | 01m06s |    0.91958 |    4.6339 |             0.5600 |    7.4563 |      3.1082 |             6.4200 |      0.9977 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
   71 | 01m51s |    0.91733 |    7.4801 |             0.2074 |    0.5341 |     39.7759 |            19.7680 |      0.9843 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[388]	train-auc:0.923248+0.00149008	test-auc:0.920926+0.0062736

   72 | 00m49s |    0.92093 |    0.3097 |             0.9698 |    6.5533 |      2.0038 |             1.1019 |      0.9425 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[315]	train-auc:0.922773+0.0015136	test-auc:0.920669+0.00622478

   73 | 00m54s |    0.92067 |    6.6287 |             0.9366 |    7.0119 |     12.4390 |            19.4366 |      0.6136 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[96]	train-auc:0.930117+0.00104578	test-auc:0.920335+0.00609516

   74 | 00m41s |    0.92034 |    0.1630 |             0.9439 |    7.3261 |     17.6371 |             1.1555 |      0.6983 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[629]	train-auc:0.922592+0.00155958	test-auc:0.920986+0.00626512

   75 | 00m57s | [35m   0.92099[0m | [32m   1.5395[0m | [32m            0.9855[0m | [32m   6.3995[0m | [32m     1.8155[0m | [32m           13.7314[0m | [32m     0.7561[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[236]	train-auc:0.923847+0.00146046	test-auc:0.920725+0.0061308

   76 | 00m56s |    0.92073 |    3.0582 |             0.9201 |    7.4903 |     17.9654 |            15.5667 |      0.8277 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[727]	train-auc:0.92275+0.00156496	test-auc:0.920986+0.00629392

   77 | 00m58s |    0.92099 |    4.1364 |             0.9637 |    0.4937 |      1.1979 |            11.4591 |      0.7955 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[477]	train-auc:0.924789+0.00141522	test-auc:0.920493+0.00612802

   78 | 01m05s |    0.92049 |    2.8123 |             0.6232 |    6.4237 |     32.3801 |            19.7821 |      0.6509 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[719]	train-auc:0.922785+0.00156887	test-auc:0.920923+0.00633653

   79 | 01m02s |    0.92092 |    1.5765 |             0.9494 |    1.1241 |      1.3823 |             9.2610 |      0.8071 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[273]	train-auc:0.924248+0.00143575	test-auc:0.920839+0.00616206

   80 | 01m03s |    0.92084 |    4.6833 |             0.9006 |    5.3958 |     25.0566 |             8.9117 |      0.6776 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.923094+0.00152976	test-auc:0.920939+0.00613587

   81 | 01m02s |    0.92094 |    4.1937 |             0.9860 |    7.0243 |      5.9642 |            18.7650 |      0.6204 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[650]	train-auc:0.922612+0.0015314	test-auc:0.920918+0.00624264

   82 | 01m03s |    0.92092 |    5.4936 |             0.9712 |    0.0596 |      1.0734 |             7.4983 |      0.7188 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[299]	train-auc:0.92582+0.00150713	test-auc:0.920776+0.0061815

   83 | 01m04s |    0.92078 |    1.2371 |             0.9490 |    7.3955 |      6.5828 |             2.1623 |      0.9803 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[607]	train-auc:0.922994+0.00149555	test-auc:0.920753+0.00622761

   84 | 01m12s |    0.92075 |    7.5955 |             0.8982 |    7.8056 |     30.9021 |            16.5098 |      0.6344 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[767]	train-auc:0.922761+0.00148737	test-auc:0.920825+0.00622747

   85 | 01m05s |    0.92082 |    6.7959 |             0.9305 |    1.2727 |      1.0564 |            15.9609 |      0.8495 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[905]	train-auc:0.922721+0.00150933	test-auc:0.920741+0.00629328

   86 | 01m06s |    0.92074 |    7.2854 |             0.8107 |    3.4332 |      1.0178 |             8.1078 |      0.8641 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[275]	train-auc:0.922859+0.00156281	test-auc:0.920835+0.00628719

   87 | 00m59s |    0.92083 |    5.0834 |             0.9037 |    7.7191 |     38.7488 |            14.1874 |      0.6471 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[759]	train-auc:0.922489+0.00150321	test-auc:0.920883+0.00628041

   88 | 01m06s |    0.92088 |    5.6779 |             0.9704 |    7.7692 |      1.4423 |            15.9691 |      0.6085 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[571]	train-auc:0.924188+0.00145187	test-auc:0.920471+0.00616866

   89 | 01m11s |    0.92047 |    4.9930 |             0.6112 |    6.4246 |     31.0103 |            13.7885 |      0.6900 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[423]	train-auc:0.922766+0.00146083	test-auc:0.920741+0.00619751

   90 | 01m07s |    0.92074 |    7.9745 |             0.8824 |    7.3166 |     21.3112 |            16.6724 |      0.6010 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[282]	train-auc:0.928293+0.0013111	test-auc:0.92+0.00608705

   91 | 01m05s |    0.92000 |    2.4547 |             0.6975 |    3.5417 |     39.7503 |            13.1999 |      0.7861 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[678]	train-auc:0.924966+0.00141666	test-auc:0.920066+0.00607024

   92 | 01m11s |    0.92007 |    6.8149 |             0.4803 |    4.1903 |     23.9915 |             9.8162 |      0.6929 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[288]	train-auc:0.925237+0.00155172	test-auc:0.92046+0.00611656

   93 | 01m09s |    0.92046 |    6.8984 |             0.8118 |    3.5118 |     25.4969 |             1.0784 |      0.7774 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[729]	train-auc:0.922762+0.00154043	test-auc:0.921001+0.00634223

   94 | 01m14s | [35m   0.92100[0m | [32m   2.9347[0m | [32m            0.9026[0m | [32m   3.4787[0m | [32m     1.6080[0m | [32m            4.3264[0m | [32m     0.7854[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[312]	train-auc:0.924965+0.00144613	test-auc:0.920674+0.00618678

   95 | 01m13s |    0.92067 |    0.4431 |             0.8142 |    7.6762 |      9.3601 |            19.5767 |      0.7212 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[253]	train-auc:0.927116+0.00136109	test-auc:0.920772+0.00632527

   96 | 00m54s |    0.92077 |    0.4065 |             0.9993 |    7.9853 |     39.4390 |             6.2824 |      0.6269 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[727]	train-auc:0.922661+0.00154354	test-auc:0.920754+0.00632837

   97 | 01m18s |    0.92075 |    1.6641 |             0.8387 |    2.5653 |      1.4279 |             5.2389 |      0.7450 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[405]	train-auc:0.924478+0.00136194	test-auc:0.920641+0.00605983

   98 | 01m10s |    0.92064 |    7.8839 |             1.0000 |    5.1012 |      5.3729 |             1.0412 |      0.9945 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[828]	train-auc:0.923007+0.00153451	test-auc:0.920929+0.0063125

   99 | 01m24s |    0.92093 |    2.3555 |             0.9007 |    3.8133 |      1.0156 |             9.5445 |      0.9760 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[462]	train-auc:0.923179+0.00147584	test-auc:0.920775+0.00618103

  100 | 01m28s |    0.92078 |    6.8349 |             0.9776 |    7.5372 |     25.4858 |            10.0340 |      0.6814 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[294]	train-auc:0.925387+0.00140571	test-auc:0.920501+0.00592231

  101 | 01m12s |    0.92050 |    6.2898 |             0.9553 |    3.6759 |     12.9508 |            15.5595 |      0.9558 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[404]	train-auc:0.926319+0.00141708	test-auc:0.920458+0.00615362

  102 | 01m22s |    0.92046 |    5.0592 |             0.7547 |    4.9776 |     29.3266 |             1.0048 |      0.8626 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[760]	train-auc:0.926566+0.00139186	test-auc:0.920035+0.00608206

  103 | 01m48s |    0.92004 |    4.7519 |             0.5462 |    5.4208 |     12.5299 |             3.5241 |      0.9054 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[300]	train-auc:0.923267+0.00148651	test-auc:0.920769+0.00612346

  104 | 01m27s |    0.92077 |    6.1730 |             0.9690 |    6.2321 |      8.6354 |            13.5161 |      0.6844 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[307]	train-auc:0.923463+0.00153528	test-auc:0.920456+0.00610556

  105 | 01m29s |    0.92046 |    6.2047 |             0.8341 |    6.3746 |     11.4516 |            17.2369 |      0.8700 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[628]	train-auc:0.925132+0.00146515	test-auc:0.920697+0.00629325

  106 | 01m46s |    0.92070 |    6.1309 |             0.9897 |    6.4129 |     22.3695 |             7.7409 |      0.9667 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[782]	train-auc:0.923875+0.00143897	test-auc:0.920665+0.00616225

  107 | 01m49s |    0.92067 |    7.9197 |             0.8047 |    6.0128 |     20.9105 |             6.2224 |      0.6857 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[281]	train-auc:0.924158+0.00139604	test-auc:0.920891+0.00616112

  108 | 01m32s |    0.92089 |    3.5812 |             0.9949 |    6.6170 |     23.4322 |             4.8063 |      0.6222 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[290]	train-auc:0.923625+0.00144102	test-auc:0.920691+0.00608128

  109 | 01m36s |    0.92069 |    5.2700 |             0.9930 |    6.2195 |     13.6900 |            17.1975 |      0.7470 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[182]	train-auc:0.925807+0.00132811	test-auc:0.920769+0.00628276

  110 | 01m31s |    0.92077 |    0.2325 |             0.8862 |    6.6087 |     15.2937 |            16.1186 |      0.7881 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[629]	train-auc:0.923682+0.00147553	test-auc:0.920797+0.00622058

  111 | 01m47s |    0.92080 |    7.6322 |             0.9816 |    6.8474 |     13.1505 |             9.3536 |      0.7619 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[977]	train-auc:0.922952+0.00154088	test-auc:0.92087+0.0062909

  112 | 01m41s |    0.92087 |    1.9680 |             0.9891 |    5.5140 |      1.5439 |             3.8858 |      0.9745 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[633]	train-auc:0.923369+0.00152638	test-auc:0.92082+0.00623641

  113 | 01m46s |    0.92082 |    6.5878 |             0.9429 |    7.8879 |     26.4545 |            18.1465 |      0.7699 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[419]	train-auc:0.923278+0.00142338	test-auc:0.920756+0.00627987

  114 | 01m50s |    0.92076 |    6.8722 |             0.9972 |    7.7917 |     21.0043 |             1.2566 |      0.7698 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[122]	train-auc:0.926718+0.00150248	test-auc:0.920391+0.00623404

  115 | 00m45s |    0.92039 |    0.4306 |             0.8148 |    4.6342 |      9.1399 |            15.4448 |      0.8103 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[318]	train-auc:0.923971+0.00138077	test-auc:0.920785+0.00611982

  116 | 01m50s |    0.92078 |    6.0983 |             0.8825 |    6.3956 |     27.2667 |             3.7612 |      0.8496 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[806]	train-auc:0.922763+0.00147791	test-auc:0.920883+0.00625281

  117 | 01m42s |    0.92088 |    7.7847 |             0.9302 |    3.0011 |      1.1834 |             2.4441 |      0.7388 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[287]	train-auc:0.925592+0.001406	test-auc:0.920756+0.00617854

  118 | 01m50s |    0.92076 |    3.8702 |             0.9520 |    5.2875 |      9.5178 |             3.4835 |      0.7284 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[1295]	train-auc:0.922646+0.00153142	test-auc:0.92049+0.00626916

  119 | 01m55s |    0.92049 |    3.4914 |             0.7043 |    5.4772 |      1.7069 |             9.5198 |      0.7435 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[527]	train-auc:0.923267+0.00143017	test-auc:0.920847+0.0061729

  120 | 01m53s |    0.92085 |    5.1532 |             0.9882 |    4.0855 |      2.2744 |            10.6310 |      0.8121 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[507]	train-auc:0.923496+0.00147117	test-auc:0.920602+0.00622852

  121 | 01m45s |    0.92060 |    7.4780 |             0.7152 |    6.5258 |     26.0406 |             5.3675 |      0.7205 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[207]	train-auc:0.929664+0.00112973	test-auc:0.920494+0.00626824

  122 | 02m01s |    0.92049 |    0.3411 |             0.9884 |    6.2525 |     12.2895 |             6.7554 |      0.6058 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[254]	train-auc:0.924117+0.00140843	test-auc:0.920645+0.00623184

  123 | 01m55s |    0.92065 |    4.0905 |             0.9220 |    6.4845 |     21.9938 |            18.3592 |      0.9153 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[275]	train-auc:0.923249+0.00141839	test-auc:0.920832+0.00611038

  124 | 01m48s |    0.92083 |    5.5531 |             0.9610 |    7.0624 |     30.8178 |             8.3332 |      0.7133 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
Stopping. Best iteration:
[430]	train-auc:0.923284+0.00148099	test-auc:0.920744+0.00627542

  125 | 01m38s |    0.92074 |    6.1349 |             0.9474 |    7.2599 |     10.6732 |            17.2274 |      0.6627 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


In [54]:
bayes_params_final = {
    'objective': 'binary:logistic',
    'eta': .01,
    'alpha': .0748,
    'colsample_bytree': .8819,
    'gamma': .8596,
    'max_depth': 1,
    'min_child_weight': 2,
    'subsample': .6542,
    'seed': random_state,
    'silent': 1
}

In [57]:
final_xgb_ = xgb.train(bayes_params_final, xg_train_meta, num_boost_round=int(912/.8))
xgb_pred = final_xgb_.predict(xg_test_meta)

In [58]:
pred_df = pd.DataFrame({'GeneId': np.arange(1, x_test.shape[0] + 1), 
                        'Prediction': xgb_pred})
pred_df.to_csv('submission.csv', index=False)