In [1]:
import pandas as pd
import numpy as np
import gc
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import lightgbm
from bayes_opt import BayesianOptimization

In [2]:
data = pd.read_csv('cleandata/train.csv')
data.fillna(np.nan, inplace=True)
data.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in data.columns]
data.replace([np.inf, -np.inf], np.nan, inplace=True)

In [3]:
Y = data['TARGET']
X = data[list(data.columns[2:])]
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
del data
gc.collect()

27

In [4]:
def bayes_optimization_gbm(X, Y, init_round=5, opt_round=15, n_folds=5, random_seed=0):
    train_data = lightgbm.Dataset(data=X, label=Y)
    # parameters
    def lgb_eval(n_estimators, learning_rate, num_leaves, min_data_in_leaf, max_depth, reg_alpha, reg_lambda, min_split_gain, min_child_weight,colsample_bytree,subsample):
        params = {'application':'binary', 'metric':'auc'}
        params['n_estimators'] = max(int(round(n_estimators)),500)
        params['learning_rate'] = min(learning_rate,0.3)
        params['num_leaves'] = int(round(num_leaves))
        params['min_data_in_leaf'] = int(round(min_data_in_leaf))
        params['max_depth'] = int(round(max_depth))
        params['reg_alpha'] = max(reg_alpha, 0)
        params['reg_lambda'] = max(reg_lambda, 0)
        params['min_split_gain'] = min_split_gain
        params['min_child_weight'] = min_child_weight 
        params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
        params['subsample'] = max(min(subsample, 1), 0)
        
        cv=lightgbm.cv(params,train_data,nfold=n_folds,seed=random_seed,early_stopping_rounds=1000,stratified=True,verbose_eval=200,metrics=['auc'])
        return max(cv['auc-mean'])

    gbmbo = BayesianOptimization(lgb_eval, {'n_estimators': (2000, 10000),
                                            'learning_rate': (0.001, 0.1),
                                            'num_leaves': (25, 45),
                                            'min_data_in_leaf': (100, 1000),
                                            'max_depth': (5, 10),
                                            'reg_alpha': (0, 3),
                                            'reg_lambda': (0, 3),
                                            'min_split_gain': (0.001, 0.1),
                                            'min_child_weight': (5, 50),
                                            'colsample_bytree': (0.1, 0.9),
                                            'subsample': (0.5, 1)},random_state=0)
    # optimize
    gbmbo.maximize(init_points=init_round, n_iter=opt_round)
    
    # return best parameters
    return gbmbo.max

best_params = bayes_optimization_gbm(x_train, y_train)

|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | min_da... | min_sp... | n_esti... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------------------




[200]	cv_agg's auc: 0.786219 + 0.00343571
[400]	cv_agg's auc: 0.786828 + 0.00321705
[600]	cv_agg's auc: 0.78558 + 0.00332422
[800]	cv_agg's auc: 0.783813 + 0.00375874
[1000]	cv_agg's auc: 0.782268 + 0.00387711
[1200]	cv_agg's auc: 0.780584 + 0.00392295
| [0m 1       [0m | [0m 0.787   [0m | [0m 0.5391  [0m | [0m 0.0718  [0m | [0m 8.014   [0m | [0m 29.52   [0m | [0m 481.3   [0m | [0m 0.06494 [0m | [0m 5.501e+0[0m | [0m 42.84   [0m | [0m 2.891   [0m | [0m 1.15    [0m | [0m 0.8959  [0m |




[200]	cv_agg's auc: 0.785941 + 0.00320549
[400]	cv_agg's auc: 0.787363 + 0.00310549
[600]	cv_agg's auc: 0.786947 + 0.00313445
[800]	cv_agg's auc: 0.786155 + 0.00321708
[1000]	cv_agg's auc: 0.785062 + 0.00319653
[1200]	cv_agg's auc: 0.783924 + 0.00323221
| [95m 2       [0m | [95m 0.7875  [0m | [95m 0.5231  [0m | [95m 0.05724 [0m | [95m 9.628   [0m | [95m 8.197   [0m | [95m 178.4   [0m | [95m 0.003002[0m | [95m 8.661e+0[0m | [95m 40.56   [0m | [95m 2.61    [0m | [95m 2.936   [0m | [95m 0.8996  [0m |




[200]	cv_agg's auc: 0.784327 + 0.0034967
[400]	cv_agg's auc: 0.785617 + 0.00360471
[600]	cv_agg's auc: 0.784882 + 0.00374952
[800]	cv_agg's auc: 0.783954 + 0.003728
[1000]	cv_agg's auc: 0.782512 + 0.00386147
[1200]	cv_agg's auc: 0.780974 + 0.00426796
| [0m 3       [0m | [0m 0.7857  [0m | [0m 0.4692  [0m | [0m 0.07827 [0m | [0m 5.591   [0m | [0m 33.8    [0m | [0m 229.0   [0m | [0m 0.09452 [0m | [0m 6.175e+0[0m | [0m 33.29   [0m | [0m 0.7937  [0m | [0m 2.323   [0m | [0m 0.7281  [0m |




[200]	cv_agg's auc: 0.748909 + 0.00315549
[400]	cv_agg's auc: 0.755393 + 0.00327523
[600]	cv_agg's auc: 0.7606 + 0.00339503
[800]	cv_agg's auc: 0.764977 + 0.00367598
[1000]	cv_agg's auc: 0.768621 + 0.00385639
[1200]	cv_agg's auc: 0.771882 + 0.00403955
[1400]	cv_agg's auc: 0.77465 + 0.00402691
[1600]	cv_agg's auc: 0.776908 + 0.00397327
[1800]	cv_agg's auc: 0.778738 + 0.00396106
[2000]	cv_agg's auc: 0.780265 + 0.00397127
[2200]	cv_agg's auc: 0.781503 + 0.00393769
[2400]	cv_agg's auc: 0.782518 + 0.00388783
[2600]	cv_agg's auc: 0.783398 + 0.00381039
[2800]	cv_agg's auc: 0.78414 + 0.00377953
[3000]	cv_agg's auc: 0.784773 + 0.00373817
[3200]	cv_agg's auc: 0.785325 + 0.00368575
[3400]	cv_agg's auc: 0.78583 + 0.0036312
[3600]	cv_agg's auc: 0.786253 + 0.00357092
[3800]	cv_agg's auc: 0.786631 + 0.00350344
[4000]	cv_agg's auc: 0.786952 + 0.00345124
[4200]	cv_agg's auc: 0.787251 + 0.00338117
[4400]	cv_agg's auc: 0.787526 + 0.00335168
[4600]	cv_agg's auc: 0.78777 + 0.00333357
[4800]	cv_agg's auc: 0



[200]	cv_agg's auc: 0.784285 + 0.00362413
[400]	cv_agg's auc: 0.786766 + 0.0035119
[600]	cv_agg's auc: 0.786631 + 0.0035399
[800]	cv_agg's auc: 0.785679 + 0.00361125
[1000]	cv_agg's auc: 0.78431 + 0.00345171
[1200]	cv_agg's auc: 0.782872 + 0.00370474
[1400]	cv_agg's auc: 0.781525 + 0.00406788
| [0m 5       [0m | [0m 0.787   [0m | [0m 0.6334  [0m | [0m 0.06739 [0m | [0m 6.052   [0m | [0m 10.8    [0m | [0m 383.9   [0m | [0m 0.03701 [0m | [0m 6.562e+0[0m | [0m 33.77   [0m | [0m 2.965   [0m | [0m 0.3061  [0m | [0m 0.6044  [0m |




[200]	cv_agg's auc: 0.782486 + 0.00379529
[400]	cv_agg's auc: 0.787243 + 0.00348125
[600]	cv_agg's auc: 0.788402 + 0.00344243
[800]	cv_agg's auc: 0.788428 + 0.00378179
[1000]	cv_agg's auc: 0.788139 + 0.0040706
[1200]	cv_agg's auc: 0.787625 + 0.00419447
[1400]	cv_agg's auc: 0.786964 + 0.00419664
[1600]	cv_agg's auc: 0.786365 + 0.004106
| [0m 6       [0m | [0m 0.7885  [0m | [0m 0.7331  [0m | [0m 0.03487 [0m | [0m 8.656   [0m | [0m 36.84   [0m | [0m 998.5   [0m | [0m 0.01208 [0m | [0m 2.017e+0[0m | [0m 39.96   [0m | [0m 1.706   [0m | [0m 2.073   [0m | [0m 0.8768  [0m |




[200]	cv_agg's auc: 0.7862 + 0.00400954
[400]	cv_agg's auc: 0.786357 + 0.00388533
[600]	cv_agg's auc: 0.784907 + 0.00367342
[800]	cv_agg's auc: 0.783039 + 0.00413348
[1000]	cv_agg's auc: 0.781352 + 0.00396594
[1200]	cv_agg's auc: 0.779736 + 0.00386054
| [0m 7       [0m | [0m 0.7867  [0m | [0m 0.2938  [0m | [0m 0.08932 [0m | [0m 9.973   [0m | [0m 11.51   [0m | [0m 978.6   [0m | [0m 0.04894 [0m | [0m 9.985e+0[0m | [0m 29.54   [0m | [0m 2.841   [0m | [0m 2.649   [0m | [0m 0.5812  [0m |




[200]	cv_agg's auc: 0.785617 + 0.00364534
[400]	cv_agg's auc: 0.787344 + 0.0034488
[600]	cv_agg's auc: 0.786945 + 0.00363609
[800]	cv_agg's auc: 0.785851 + 0.00402169
[1000]	cv_agg's auc: 0.784663 + 0.00400572
[1200]	cv_agg's auc: 0.783158 + 0.00412393
| [0m 8       [0m | [0m 0.7874  [0m | [0m 0.5856  [0m | [0m 0.0681  [0m | [0m 6.557   [0m | [0m 6.477   [0m | [0m 998.0   [0m | [0m 0.05784 [0m | [0m 6.18e+03[0m | [0m 39.37   [0m | [0m 1.347   [0m | [0m 1.552   [0m | [0m 0.6073  [0m |




[200]	cv_agg's auc: 0.785124 + 0.00361702
[400]	cv_agg's auc: 0.787065 + 0.00357216
[600]	cv_agg's auc: 0.786432 + 0.0033976
[800]	cv_agg's auc: 0.785501 + 0.00344241
[1000]	cv_agg's auc: 0.784012 + 0.00362046
[1200]	cv_agg's auc: 0.782756 + 0.00353949
| [0m 9       [0m | [0m 0.7871  [0m | [0m 0.2621  [0m | [0m 0.07979 [0m | [0m 6.2     [0m | [0m 49.24   [0m | [0m 998.5   [0m | [0m 0.04616 [0m | [0m 2.01e+03[0m | [0m 42.5    [0m | [0m 1.99    [0m | [0m 1.251   [0m | [0m 0.8973  [0m |




[200]	cv_agg's auc: 0.78385 + 0.00392264
[400]	cv_agg's auc: 0.787302 + 0.00330309
[600]	cv_agg's auc: 0.787553 + 0.0032865
[800]	cv_agg's auc: 0.787006 + 0.00351091
[1000]	cv_agg's auc: 0.786441 + 0.00335408
[1200]	cv_agg's auc: 0.785629 + 0.00320735
[1400]	cv_agg's auc: 0.784616 + 0.00334118
| [0m 10      [0m | [0m 0.7876  [0m | [0m 0.5991  [0m | [0m 0.04569 [0m | [0m 7.046   [0m | [0m 8.285   [0m | [0m 127.3   [0m | [0m 0.02775 [0m | [0m 2.034e+0[0m | [0m 35.22   [0m | [0m 1.891   [0m | [0m 0.362   [0m | [0m 0.6344  [0m |




[200]	cv_agg's auc: 0.783974 + 0.00367095
[400]	cv_agg's auc: 0.786998 + 0.00383079
[600]	cv_agg's auc: 0.787132 + 0.00371931
[800]	cv_agg's auc: 0.786595 + 0.00382663
[1000]	cv_agg's auc: 0.785432 + 0.003701
[1200]	cv_agg's auc: 0.784846 + 0.00366055
[1400]	cv_agg's auc: 0.783998 + 0.00346267
| [0m 11      [0m | [0m 0.7874  [0m | [0m 0.1021  [0m | [0m 0.061   [0m | [0m 9.414   [0m | [0m 45.7    [0m | [0m 102.8   [0m | [0m 0.007992[0m | [0m 9.952e+0[0m | [0m 28.59   [0m | [0m 0.5413  [0m | [0m 0.4948  [0m | [0m 0.606   [0m |




[200]	cv_agg's auc: 0.773986 + 0.00410416
[400]	cv_agg's auc: 0.782735 + 0.00405521
[600]	cv_agg's auc: 0.785742 + 0.00376751
[800]	cv_agg's auc: 0.787128 + 0.00357242
[1000]	cv_agg's auc: 0.788045 + 0.00360842
[1200]	cv_agg's auc: 0.788397 + 0.00368834
[1400]	cv_agg's auc: 0.788564 + 0.00371436
[1600]	cv_agg's auc: 0.788702 + 0.00373889
[1800]	cv_agg's auc: 0.788782 + 0.00370436
[2000]	cv_agg's auc: 0.788579 + 0.00381412
[2200]	cv_agg's auc: 0.788438 + 0.00375308
[2400]	cv_agg's auc: 0.788178 + 0.00377768
[2600]	cv_agg's auc: 0.787902 + 0.00380034
| [0m 12      [0m | [0m 0.7888  [0m | [0m 0.5953  [0m | [0m 0.02276 [0m | [0m 5.606   [0m | [0m 46.25   [0m | [0m 988.2   [0m | [0m 0.01498 [0m | [0m 9.901e+0[0m | [0m 27.45   [0m | [0m 0.19    [0m | [0m 1.148   [0m | [0m 0.6711  [0m |




[200]	cv_agg's auc: 0.786317 + 0.00328312
[400]	cv_agg's auc: 0.784607 + 0.00393123
[600]	cv_agg's auc: 0.782317 + 0.00377971
[800]	cv_agg's auc: 0.779869 + 0.00366628
[1000]	cv_agg's auc: 0.777155 + 0.00353864
[1200]	cv_agg's auc: 0.774894 + 0.00371928
| [0m 13      [0m | [0m 0.7864  [0m | [0m 0.6389  [0m | [0m 0.09276 [0m | [0m 9.212   [0m | [0m 47.43   [0m | [0m 981.6   [0m | [0m 0.07378 [0m | [0m 9.852e+0[0m | [0m 41.66   [0m | [0m 0.6787  [0m | [0m 0.4303  [0m | [0m 0.9683  [0m |




[200]	cv_agg's auc: 0.783043 + 0.00394669
[400]	cv_agg's auc: 0.787148 + 0.00366016
[600]	cv_agg's auc: 0.788207 + 0.00353121
[800]	cv_agg's auc: 0.78831 + 0.00384531
[1000]	cv_agg's auc: 0.788149 + 0.00388888
[1200]	cv_agg's auc: 0.787843 + 0.00396433
[1400]	cv_agg's auc: 0.7872 + 0.00414235
[1600]	cv_agg's auc: 0.786704 + 0.00416515
| [0m 14      [0m | [0m 0.7884  [0m | [0m 0.6587  [0m | [0m 0.03925 [0m | [0m 9.422   [0m | [0m 12.83   [0m | [0m 991.2   [0m | [0m 0.01891 [0m | [0m 2.034e+0[0m | [0m 28.28   [0m | [0m 0.9919  [0m | [0m 1.912   [0m | [0m 0.9757  [0m |




[200]	cv_agg's auc: 0.782788 + 0.00421735
[400]	cv_agg's auc: 0.787534 + 0.0037891
[600]	cv_agg's auc: 0.788486 + 0.00383881
[800]	cv_agg's auc: 0.788557 + 0.0037876
[1000]	cv_agg's auc: 0.788459 + 0.00399198
[1200]	cv_agg's auc: 0.788097 + 0.00410584
[1400]	cv_agg's auc: 0.787696 + 0.00395859
[1600]	cv_agg's auc: 0.787128 + 0.00385311
[1800]	cv_agg's auc: 0.786554 + 0.00396172
| [0m 15      [0m | [0m 0.7887  [0m | [0m 0.3887  [0m | [0m 0.04159 [0m | [0m 7.756   [0m | [0m 48.31   [0m | [0m 123.8   [0m | [0m 0.09984 [0m | [0m 2.139e+0[0m | [0m 25.52   [0m | [0m 1.579   [0m | [0m 0.3315  [0m | [0m 0.9572  [0m |




[200]	cv_agg's auc: 0.784059 + 0.0034781
[400]	cv_agg's auc: 0.786577 + 0.00341023
[600]	cv_agg's auc: 0.78651 + 0.00369081
[800]	cv_agg's auc: 0.785629 + 0.0036405
[1000]	cv_agg's auc: 0.784609 + 0.0035832
[1200]	cv_agg's auc: 0.783724 + 0.00348734
[1400]	cv_agg's auc: 0.782613 + 0.00345004
| [0m 16      [0m | [0m 0.7868  [0m | [0m 0.5008  [0m | [0m 0.08164 [0m | [0m 5.349   [0m | [0m 48.75   [0m | [0m 134.7   [0m | [0m 0.04643 [0m | [0m 2.006e+0[0m | [0m 36.72   [0m | [0m 0.5188  [0m | [0m 0.2833  [0m | [0m 0.942   [0m |




[200]	cv_agg's auc: 0.785189 + 0.00366745
[400]	cv_agg's auc: 0.786665 + 0.00386939
[600]	cv_agg's auc: 0.78625 + 0.00372069
[800]	cv_agg's auc: 0.78531 + 0.00390507
[1000]	cv_agg's auc: 0.783841 + 0.00383632
[1200]	cv_agg's auc: 0.782607 + 0.0038148
| [0m 17      [0m | [0m 0.7868  [0m | [0m 0.7164  [0m | [0m 0.0679  [0m | [0m 7.439   [0m | [0m 5.637   [0m | [0m 978.6   [0m | [0m 0.05338 [0m | [0m 2.176e+0[0m | [0m 29.69   [0m | [0m 1.176   [0m | [0m 1.309   [0m | [0m 0.836   [0m |




[200]	cv_agg's auc: 0.763655 + 0.00402959
[400]	cv_agg's auc: 0.777259 + 0.00411762
[600]	cv_agg's auc: 0.782758 + 0.00391994
[800]	cv_agg's auc: 0.785316 + 0.00369768
[1000]	cv_agg's auc: 0.786742 + 0.0035821
[1200]	cv_agg's auc: 0.787742 + 0.00349977
[1400]	cv_agg's auc: 0.788337 + 0.00348145
[1600]	cv_agg's auc: 0.788825 + 0.00349264
[1800]	cv_agg's auc: 0.789036 + 0.00343641
[2000]	cv_agg's auc: 0.789221 + 0.00340508
[2200]	cv_agg's auc: 0.789337 + 0.00344146
[2400]	cv_agg's auc: 0.789416 + 0.00352225
[2600]	cv_agg's auc: 0.789447 + 0.00361265
[2800]	cv_agg's auc: 0.789458 + 0.00360202
[3000]	cv_agg's auc: 0.789386 + 0.00358724
[3200]	cv_agg's auc: 0.789336 + 0.00353563
[3400]	cv_agg's auc: 0.789277 + 0.00352567
[3600]	cv_agg's auc: 0.789201 + 0.00351779
| [0m 18      [0m | [0m 0.7895  [0m | [0m 0.6786  [0m | [0m 0.0123  [0m | [0m 8.85    [0m | [0m 47.33   [0m | [0m 993.3   [0m | [0m 0.08713 [0m | [0m 4.655e+0[0m | [0m 28.04   [0m | [0m 0.822   [0m | [0m 1.43



[200]	cv_agg's auc: 0.775337 + 0.0040584
[400]	cv_agg's auc: 0.784083 + 0.0038569
[600]	cv_agg's auc: 0.786822 + 0.00351738
[800]	cv_agg's auc: 0.788128 + 0.00354719
[1000]	cv_agg's auc: 0.788745 + 0.00358885
[1200]	cv_agg's auc: 0.789068 + 0.00364356
[1400]	cv_agg's auc: 0.789126 + 0.00364058
[1600]	cv_agg's auc: 0.789148 + 0.00370437
[1800]	cv_agg's auc: 0.78901 + 0.00364752
[2000]	cv_agg's auc: 0.788909 + 0.00374783
[2200]	cv_agg's auc: 0.788693 + 0.0038189
[2400]	cv_agg's auc: 0.788458 + 0.00396473
| [0m 19      [0m | [0m 0.7892  [0m | [0m 0.6164  [0m | [0m 0.02309 [0m | [0m 7.995   [0m | [0m 49.85   [0m | [0m 980.9   [0m | [0m 0.06352 [0m | [0m 5.552e+0[0m | [0m 25.87   [0m | [0m 2.335   [0m | [0m 2.635   [0m | [0m 0.5393  [0m |




[200]	cv_agg's auc: 0.770235 + 0.00390553
[400]	cv_agg's auc: 0.779318 + 0.00401118
[600]	cv_agg's auc: 0.783306 + 0.00393738
[800]	cv_agg's auc: 0.785299 + 0.00382482
[1000]	cv_agg's auc: 0.786546 + 0.0037114
[1200]	cv_agg's auc: 0.787467 + 0.00369455
[1400]	cv_agg's auc: 0.788058 + 0.00366581
[1600]	cv_agg's auc: 0.788538 + 0.00354745
[1800]	cv_agg's auc: 0.788714 + 0.00354246
[2000]	cv_agg's auc: 0.788934 + 0.0035717
[2200]	cv_agg's auc: 0.789114 + 0.00359319
[2400]	cv_agg's auc: 0.789154 + 0.003617
[2600]	cv_agg's auc: 0.789162 + 0.00367435
[2800]	cv_agg's auc: 0.789129 + 0.00371337
[3000]	cv_agg's auc: 0.78909 + 0.00365137
[3200]	cv_agg's auc: 0.789088 + 0.00367409
[3400]	cv_agg's auc: 0.789003 + 0.00365174
[3600]	cv_agg's auc: 0.788918 + 0.00362479
| [0m 20      [0m | [0m 0.7892  [0m | [0m 0.3499  [0m | [0m 0.01867 [0m | [0m 5.133   [0m | [0m 49.73   [0m | [0m 977.4   [0m | [0m 0.01299 [0m | [0m 7.175e+0[0m | [0m 25.98   [0m | [0m 0.9443  [0m | [0m 1.841   