In [1]:
import pandas as pd
import numpy as np
import random

from lightgbm import LGBMClassifier
import lightgbm as lgbm
from tqdm import tqdm_notebook as tqdm

from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import seaborn as sns
import gc
from utils.utils import *

pd.options.display.max_columns = None
%matplotlib inline

In [2]:
all_data = pd.read_pickle('edit/tmp_data/all_data.pkl')
buro = pd.read_pickle('edit/tmp_data/buro.pkl')
cc_bal = pd.read_pickle('edit/tmp_data/cc_bal.pkl')
inst = pd.read_pickle('edit/tmp_data/inst.pkl')
pos = pd.read_pickle('edit/tmp_data/pos.pkl')
prev = pd.read_pickle('edit/tmp_data/prev.pkl')

In [5]:
def run(model=None, submit=None):
    merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
    FoldSubmit(merged, index_cols, model, submit)

In [4]:
# base model
clf = LGBMClassifier(
            nthread=4,
            n_estimators=10000,
            learning_rate=0.02,
            num_leaves=32,
            colsample_bytree=0.9497036,
            subsample=0.8715623,
            max_depth=8,
            reg_alpha=0.04,
            reg_lambda=0.073,
            min_split_gain=0.0222415,
            min_child_weight=40,
            random_state=seed,
            silent=-1,
            verbose=-1, )

In [7]:
run(model = clf, submit='180705.csv')

100%|██████████| 6/6 [00:30<00:00,  6.21s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.756855	valid_1's auc: 0.757352
[200]	training's auc: 0.781026	valid_1's auc: 0.773324
[300]	training's auc: 0.799594	valid_1's auc: 0.784216
[400]	training's auc: 0.811838	valid_1's auc: 0.789644
[500]	training's auc: 0.821885	valid_1's auc: 0.792641
[600]	training's auc: 0.830397	valid_1's auc: 0.794498
[700]	training's auc: 0.838269	valid_1's auc: 0.795691
[800]	training's auc: 0.845358	valid_1's auc: 0.796608
[900]	training's auc: 0.851725	valid_1's auc: 0.797389
[1000]	training's auc: 0.857851	valid_1's auc: 0.79801
[1100]	training's auc: 0.863841	valid_1's auc: 0.798184
[1200]	training's auc: 0.869315	valid_1's auc: 0.798538
[1300]	training's auc: 0.874317	valid_1's auc: 0.798686
[1400]	training's auc: 0.879343	valid_1's auc: 0.798714
[1500]	training's auc: 0.88397	valid_1's auc: 0.79882
[1600]	training's auc: 0.888281	valid_1's auc: 0.799112
[1700]	training's auc: 0.892471	valid_1's auc: 0.7992

In [21]:
#merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged.columns[400:500]

Index(['POS_NAME_CONTRACT_STATUS_nan_MEAN', 'POS_COUNT',
       'PREV_AMT_ANNUITY_MAX', 'PREV_AMT_ANNUITY_MEAN',
       'PREV_AMT_APPLICATION_MEAN', 'PREV_AMT_CREDIT_MAX',
       'PREV_AMT_CREDIT_MEAN', 'PREV_APP_CREDIT_PERC_MAX',
       'PREV_APP_CREDIT_PERC_MEAN', 'PREV_AMT_DOWN_PAYMENT_MAX',
       'PREV_AMT_DOWN_PAYMENT_MEAN', 'PREV_AMT_GOODS_PRICE_MAX',
       'PREV_AMT_GOODS_PRICE_MEAN', 'PREV_HOUR_APPR_PROCESS_START_MAX',
       'PREV_HOUR_APPR_PROCESS_START_MEAN', 'PREV_RATE_DOWN_PAYMENT_MIN',
       'PREV_RATE_DOWN_PAYMENT_MAX', 'PREV_RATE_DOWN_PAYMENT_MEAN',
       'PREV_DAYS_DECISION_MIN', 'PREV_DAYS_DECISION_MAX',
       'PREV_DAYS_DECISION_MEAN', 'PREV_CNT_PAYMENT_MEAN',
       'PREV_CNT_PAYMENT_SUM', 'PREV_NAME_CONTRACT_TYPE_Cash loans_MEAN',
       'PREV_NAME_CONTRACT_TYPE_Consumer loans_MEAN',
       'PREV_NAME_CONTRACT_TYPE_Revolving loans_MEAN',
       'PREV_NAME_CONTRACT_TYPE_XNA_MEAN', 'PREV_NAME_CONTRACT_TYPE_nan_MEAN',
       'PREV_WEEKDAY_APPR_PROCESS_START_FRIDA

In [18]:
pd.set_option('display.max_columns', 1000)

In [26]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_RATE_MEAN'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MEAN'] 
#merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT'] 
#merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE'] 

FoldSubmit(merged, index_cols, model = clf,submit='180705-2.csv')

100%|██████████| 6/6 [00:30<00:00,  6.00s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.757491	valid_1's auc: 0.757965
[200]	training's auc: 0.781399	valid_1's auc: 0.773663
[300]	training's auc: 0.799791	valid_1's auc: 0.784517
[400]	training's auc: 0.812248	valid_1's auc: 0.78971
[500]	training's auc: 0.822112	valid_1's auc: 0.792777
[600]	training's auc: 0.830738	valid_1's auc: 0.794307
[700]	training's auc: 0.838495	valid_1's auc: 0.795656
[800]	training's auc: 0.84547	valid_1's auc: 0.796459
[900]	training's auc: 0.851893	valid_1's auc: 0.797162
[1000]	training's auc: 0.857895	valid_1's auc: 0.797628
[1100]	training's auc: 0.863702	valid_1's auc: 0.797964
[1200]	training's auc: 0.86912	valid_1's auc: 0.798254
[1300]	training's auc: 0.874235	valid_1's auc: 0.798509
[1400]	training's auc: 0.879228	valid_1's auc: 0.798604
[1500]	training's auc: 0.883778	valid_1's auc: 0.79856
Early stopping, best iteration is:
[1389]	training's auc: 0.878664	valid_1's auc: 0.798623
Fold  1 AUC : 0.798

In [30]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_RATE_MAX'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MAX'] 
merged['AMT_ANNUITY_RATE_MEAN'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MEAN'] 
merged['AMT_CREDIT_RATE_MEAN'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT_MEAN'] 
merged['AMT_CREDIT_RATE_MAX'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT_MAX'] 
merged['AMT_GOODS_RATE_MEAN'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE_MEAN'] 
merged['AMT_GOODS_RATE_MAX'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE_MAX'] 

FoldSubmit(merged, index_cols, model = clf, submit='180705-3.csv')

100%|██████████| 6/6 [00:27<00:00,  5.43s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.757152	valid_1's auc: 0.757207
[200]	training's auc: 0.781416	valid_1's auc: 0.773754
[300]	training's auc: 0.7999	valid_1's auc: 0.78441
[400]	training's auc: 0.812248	valid_1's auc: 0.789698
[500]	training's auc: 0.822216	valid_1's auc: 0.792817
[600]	training's auc: 0.830718	valid_1's auc: 0.794903
[700]	training's auc: 0.838486	valid_1's auc: 0.796482
[800]	training's auc: 0.84541	valid_1's auc: 0.797277
[900]	training's auc: 0.851854	valid_1's auc: 0.797949
[1000]	training's auc: 0.858145	valid_1's auc: 0.798422
[1100]	training's auc: 0.86413	valid_1's auc: 0.798786
[1200]	training's auc: 0.869358	valid_1's auc: 0.798897
[1300]	training's auc: 0.874427	valid_1's auc: 0.799014
[1400]	training's auc: 0.879536	valid_1's auc: 0.799122
[1500]	training's auc: 0.884173	valid_1's auc: 0.799102
[1600]	training's auc: 0.888599	valid_1's auc: 0.799301
[1700]	training's auc: 0.892788	valid_1's auc: 0.799584

In [31]:
prev = pd.read_pickle('edit/tmp_data/prev2.pkl')
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
FoldSubmit(merged, index_cols, model = clf, submit='180705-4.csv')

100%|██████████| 6/6 [00:29<00:00,  4.31s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.757197	valid_1's auc: 0.757438
[200]	training's auc: 0.780769	valid_1's auc: 0.772804
[300]	training's auc: 0.798928	valid_1's auc: 0.783692
[400]	training's auc: 0.811131	valid_1's auc: 0.788723
[500]	training's auc: 0.821087	valid_1's auc: 0.791773
[600]	training's auc: 0.829432	valid_1's auc: 0.793818
[700]	training's auc: 0.836796	valid_1's auc: 0.795145
[800]	training's auc: 0.843559	valid_1's auc: 0.795994
[900]	training's auc: 0.849939	valid_1's auc: 0.79683
[1000]	training's auc: 0.855949	valid_1's auc: 0.797132
[1100]	training's auc: 0.861521	valid_1's auc: 0.797651
[1200]	training's auc: 0.8669	valid_1's auc: 0.798003
[1300]	training's auc: 0.871726	valid_1's auc: 0.798226
[1400]	training's auc: 0.876587	valid_1's auc: 0.798306
[1500]	training's auc: 0.881077	valid_1's auc: 0.798281
Early stopping, best iteration is:
[1361]	training's auc: 0.874663	valid_1's auc: 0.798417
Fold  1 AUC : 0.79

In [None]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_RATE_MAX'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MAX'] 
#merged['AMT_ANNUITY_RATE_MEAN'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MEAN'] 

FoldSubmit(merged, index_cols, model = clf)

100%|██████████| 6/6 [00:21<00:00,  3.07s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.756957	valid_1's auc: 0.756891
[200]	training's auc: 0.780862	valid_1's auc: 0.772729
[300]	training's auc: 0.799066	valid_1's auc: 0.783649
[400]	training's auc: 0.811558	valid_1's auc: 0.788937
[500]	training's auc: 0.821336	valid_1's auc: 0.792092
[600]	training's auc: 0.8296	valid_1's auc: 0.794144
[700]	training's auc: 0.837105	valid_1's auc: 0.795297
[800]	training's auc: 0.843982	valid_1's auc: 0.796121
[900]	training's auc: 0.850252	valid_1's auc: 0.796821
[1000]	training's auc: 0.856238	valid_1's auc: 0.797495
[1100]	training's auc: 0.861854	valid_1's auc: 0.797817
[1200]	training's auc: 0.867038	valid_1's auc: 0.798043
[1300]	training's auc: 0.871992	valid_1's auc: 0.798343
[1400]	training's auc: 0.876621	valid_1's auc: 0.7985
[1500]	training's auc: 0.881303	valid_1's auc: 0.798573
[1600]	training's auc: 0.885856	valid_1's auc: 0.798694
[1700]	training's auc: 0.890012	valid_1's auc: 0.79894

In [None]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
#merged['AMT_CREDIT_RATE_MEAN'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT_MEAN'] 
merged['AMT_CREDIT_RATE_MAX'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT_MAX'] 

FoldSubmit(merged, index_cols, model = clf)

100%|██████████| 6/6 [00:20<00:00,  2.99s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.75664	valid_1's auc: 0.756782
[200]	training's auc: 0.780676	valid_1's auc: 0.772554
[300]	training's auc: 0.79881	valid_1's auc: 0.783433
[400]	training's auc: 0.811337	valid_1's auc: 0.788605
[500]	training's auc: 0.821168	valid_1's auc: 0.791696
[600]	training's auc: 0.829399	valid_1's auc: 0.79378
[700]	training's auc: 0.836833	valid_1's auc: 0.795124
[800]	training's auc: 0.843668	valid_1's auc: 0.796164
[900]	training's auc: 0.850002	valid_1's auc: 0.796759
[1000]	training's auc: 0.855899	valid_1's auc: 0.797201
[1100]	training's auc: 0.861648	valid_1's auc: 0.797734
[1200]	training's auc: 0.866949	valid_1's auc: 0.797969
[1300]	training's auc: 0.87179	valid_1's auc: 0.79814
[1400]	training's auc: 0.876614	valid_1's auc: 0.798221
[1500]	training's auc: 0.881332	valid_1's auc: 0.798375
[1600]	training's auc: 0.885771	valid_1's auc: 0.79842
[1700]	training's auc: 0.88993	valid_1's auc: 0.798444
[

In [None]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
#merged['AMT_GOODS_RATE_MEAN'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE_MEAN'] 
merged['AMT_GOODS_RATE_MAX'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE_MAX'] 

FoldSubmit(merged, index_cols, model = clf)

In [None]:
# 0.789586
merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
#merged['AMT_ANNUITY_RATE_MEAN'] = merged['AMT_ANNUITY'] /merged['PREV_AMT_ANNUITY_MAX'] 
#merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT'] /merged['PREV_AMT_CREDIT'] 
merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE'] /merged['PREV_AMT_GOODS_PRICE_MAX'] 

FoldSubmit(merged, index_cols)

In [18]:
# 0.789586
index_cols = ['SK_ID_CURR','TEST','TARGET']
buro = pd.read_pickle('edit/tmp_data/buro.pkl')

merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_rate'] = merged['AMT_ANNUITY_x'] /merged['AMT_ANNUITY_y'] 
merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT_x'] /merged['AMT_CREDIT_y'] 
merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE_x'] /merged['AMT_GOODS_PRICE_y'] 

clf_set = FoldSubmit(merged, index_cols,return_clf=True)

100%|██████████| 6/6 [00:08<00:00,  1.44s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.764246	valid_1's auc: 0.762301
[200]	training's auc: 0.795982	valid_1's auc: 0.783074
[300]	training's auc: 0.815063	valid_1's auc: 0.790463
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.815063	valid_1's auc: 0.790463
Fold  1 AUC : 0.790463
Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.767546	valid_1's auc: 0.745799
[200]	training's auc: 0.798402	valid_1's auc: 0.768883
[300]	training's auc: 0.817315	valid_1's auc: 0.777389
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.817315	valid_1's auc: 0.777389
Fold  2 AUC : 0.777389
Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.765021	valid_1's auc: 0.745166
[200]	training's auc: 0.797105	valid_1's auc: 0.766359
[300]	training's auc: 0.815552	valid_1's auc: 0.773943
Did not meet early stopping. Best iteration is:
[300]	training's

In [12]:
# 0.789586
index_cols = ['SK_ID_CURR','TEST','TARGET']
cc_bal = pd.read_pickle('edit/tmp_data/cc_bal.pkl')
pos = pd.read_pickle('edit/tmp_data/pos.pkl')
inst = pd.read_pickle('edit/tmp_data/inst.pkl')
all_data = pd.read_pickle('edit/tmp_data/all_data.pkl')

merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_rate'] = merged['AMT_ANNUITY_x'] /merged['AMT_ANNUITY_y'] 
merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT_x'] /merged['AMT_CREDIT_y'] 
merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE_x'] /merged['AMT_GOODS_PRICE_y'] 

clf_set = FoldSubmit(merged, index_cols, model=clf, submit='180618.csv',return_clf=True)

100%|██████████| 6/6 [00:12<00:00,  1.95s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.75954	valid_1's auc: 0.74731
[200]	training's auc: 0.786311	valid_1's auc: 0.76243
[300]	training's auc: 0.806563	valid_1's auc: 0.77286
[400]	training's auc: 0.821499	valid_1's auc: 0.777849
[500]	training's auc: 0.83449	valid_1's auc: 0.78013
[600]	training's auc: 0.845498	valid_1's auc: 0.781316
[700]	training's auc: 0.85544	valid_1's auc: 0.78239
[800]	training's auc: 0.864609	valid_1's auc: 0.78329
[900]	training's auc: 0.873177	valid_1's auc: 0.783865
[1000]	training's auc: 0.880971	valid_1's auc: 0.784062
[1100]	training's auc: 0.88831	valid_1's auc: 0.784061
[1200]	training's auc: 0.894978	valid_1's auc: 0.784203
[1300]	training's auc: 0.901189	valid_1's auc: 0.784264
Early stopping, best iteration is:
[1242]	training's auc: 0.897831	valid_1's auc: 0.784364
Fold  1 AUC : 0.784364
Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.761212	valid_1's auc: 0.744

In [8]:
# 0.789586
index_cols = ['SK_ID_CURR','TEST','TARGET']
cc_bal = pd.read_pickle('edit/tmp_data/cc_bal.pkl')
pos = pd.read_pickle('edit/tmp_data/pos.pkl')
inst = pd.read_pickle('edit/tmp_data/inst.pkl')
all_data = pd.read_pickle('edit/tmp_data/all_data.pkl')

merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_rate'] = merged['AMT_ANNUITY_x'] /merged['AMT_ANNUITY_y'] 
merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT_x'] /merged['AMT_CREDIT_y'] 
merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE_x'] /merged['AMT_GOODS_PRICE_y'] 

clf_set = FoldSubmit(merged, index_cols, model=clf, submit='180618.csv',return_clf=True)

100%|██████████| 6/6 [00:12<00:00,  1.93s/it]


Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.757972	valid_1's auc: 0.748203
[200]	training's auc: 0.782353	valid_1's auc: 0.762715
[300]	training's auc: 0.801223	valid_1's auc: 0.773366
[400]	training's auc: 0.81374	valid_1's auc: 0.778345
[500]	training's auc: 0.823949	valid_1's auc: 0.781248
[600]	training's auc: 0.8326	valid_1's auc: 0.783197
[700]	training's auc: 0.840151	valid_1's auc: 0.784301
[800]	training's auc: 0.846925	valid_1's auc: 0.784888
[900]	training's auc: 0.853499	valid_1's auc: 0.78562
[1000]	training's auc: 0.859743	valid_1's auc: 0.785689
[1100]	training's auc: 0.865634	valid_1's auc: 0.785988
[1200]	training's auc: 0.871095	valid_1's auc: 0.786509
[1300]	training's auc: 0.87647	valid_1's auc: 0.786764
[1400]	training's auc: 0.881555	valid_1's auc: 0.786882
[1500]	training's auc: 0.886324	valid_1's auc: 0.786888
[1600]	training's auc: 0.891015	valid_1's auc: 0.78689
Early stopping, best iteration is:
[1528]	training's auc

In [None]:
# 0.789586
index_cols = ['SK_ID_CURR','TEST','TARGET']
cc_bal = pd.read_pickle('edit/tmp_data/cc_bal.pkl')
pos = pd.read_pickle('edit/tmp_data/pos.pkl')
inst = pd.read_pickle('edit/tmp_data/inst.pkl')
bburo = pd.read_pickle('edit/tmp_data/buroX.pkl')
all_data = pd.read_pickle('edit/tmp_data/all_data.pkl')

merged = DataMerger([all_data, buro, cc_bal, inst, pos, prev], 'SK_ID_CURR')
merged['AMT_ANNUITY_rate'] = merged['AMT_ANNUITY_x'] /merged['AMT_ANNUITY_y'] 
merged['AMT_CREDIT_rate'] = merged['AMT_CREDIT_x'] /merged['AMT_CREDIT_y'] 
merged['AMT_GOODS_PRICE_rate'] = merged['AMT_GOODS_PRICE_x'] /merged['AMT_GOODS_PRICE_y'] 

clf_set = FoldSubmit(merged, index_cols, model=clf, submit='180614.csv',return_clf=True)

In [20]:
fdf = pd.DataFrame(np.c_[np.array(merged.drop(index_cols,axis=1).columns),clf_set[2].feature_importances_.tolist()], columns=['feature','score']).sort_values('score',ascending=False)

In [22]:
fdf.head()

Unnamed: 0,feature,score
37,EXT_SOURCE_2,1136
38,EXT_SOURCE_3,1066
110,ANNUITY LENGTH,1009
22,DAYS_BIRTH,777
374,CNT_INSTALMENT_FUTURE,759


In [27]:
fdf[fdf.score>200]['feature'].values

array(['EXT_SOURCE_2', 'EXT_SOURCE_3', 'ANNUITY LENGTH', 'DAYS_BIRTH',
       'CNT_INSTALMENT_FUTURE', 'EXT_SOURCE_1p2', 'EXT_SOURCE_1',
       'max_buro_DAYS_CREDIT', 'AMT_ANNUITY_x', 'SK_DPD_DEF_y',
       'AMT_PAYMENT', 'DAYS_ID_PUBLISH', 'AMT_ANNUITY_rate',
       'SK_ID_CURR_CNT_POS_CASH', 'CNT_PAYMENT', 'AMT_GOODS_PRICE_x',
       'max_buro_DAYS_CREDIT_ENDDATE', 'AMT_CREDIT_x',
       'DAYS_LAST_DUE_1ST_VERSION', 'ANNUITY_INCOME_RATIO',
       'WORKING_LIFE_RATIO', 'max_buro_DAYS_ENDDATE_FACT',
       'DAYS_REGISTRATION', 'AMT_DOWN_PAYMENT', 'DAYS_EMPLOYED',
       'med_buro_AMT_CREDIT_SUM', 'med_buro_DAYS_CREDIT_ENDDATE',
       'NAME_CONTRACT_STATUS_Refused', 'OWN_CAR_AGE', 'EXT_SOURCE_12',
       'CODE_GENDER', 'CNT_DRAWINGS_CURRENT', 'DAYS_LAST_PHONE_CHANGE',
       'CNT_INSTALMENT', 'avg_buro_AMT_CREDIT_SUM_DEBT',
       'ORGANIZATION_TYPE', 'SK_ID_PREV_y', 'REGION_POPULATION_RELATIVE',
       'HOUR_APPR_PROCESS_START_y', 'min_buro_AMT_CREDIT_SUM',
       'RATE_DOWN_PAYMENT'

In [33]:
# base model
clf = LGBMClassifier(
            nthread=4,
            n_estimators=10000,
            learning_rate=0.03,
            num_leaves=34,
            colsample_bytree=0.9497036,
            subsample=0.8715623,
            max_depth=8,
            reg_alpha=0.041545473,
            reg_lambda=0.0735294,
            min_split_gain=0.0222415,
            min_child_weight=39.3259775,
            silent=-1,
            verbose=-1, )

In [34]:
FoldSubmit(merged, index_cols, model=clf, submit='1806142.csv',return_clf=True)

Training until validation scores don't improve for 150 rounds.
[100]	training's auc: 0.744824	valid_1's auc: 0.747261
[200]	training's auc: 0.75573	valid_1's auc: 0.756175
[300]	training's auc: 0.768898	valid_1's auc: 0.76486
[400]	training's auc: 0.781248	valid_1's auc: 0.773629
[500]	training's auc: 0.794105	valid_1's auc: 0.781848
[600]	training's auc: 0.800675	valid_1's auc: 0.785305
[700]	training's auc: 0.806689	valid_1's auc: 0.787553
[800]	training's auc: 0.811965	valid_1's auc: 0.789465
[900]	training's auc: 0.817324	valid_1's auc: 0.791181
[1000]	training's auc: 0.821766	valid_1's auc: 0.792205
[1100]	training's auc: 0.826491	valid_1's auc: 0.793373
[1200]	training's auc: 0.829627	valid_1's auc: 0.794012
[1300]	training's auc: 0.833501	valid_1's auc: 0.794861
[1400]	training's auc: 0.837567	valid_1's auc: 0.795459
[1500]	training's auc: 0.840797	valid_1's auc: 0.795942
[1600]	training's auc: 0.844071	valid_1's auc: 0.796286
[1700]	training's auc: 0.847801	valid_1's auc: 0.796

[LGBMClassifier(boosting_type='dart', class_weight=None,
         colsample_bytree=0.9497036, learning_rate=0.03, max_depth=8,
         min_child_samples=20, min_child_weight=39.3259775,
         min_split_gain=0.0222415, n_estimators=10000, n_jobs=-1, nthread=4,
         num_leaves=34, objective=None, random_state=None,
         reg_alpha=0.041545473, reg_lambda=0.0735294, silent=-1,
         subsample=0.8715623, subsample_for_bin=200000, subsample_freq=1,
         verbose=-1), LGBMClassifier(boosting_type='dart', class_weight=None,
         colsample_bytree=0.9497036, learning_rate=0.03, max_depth=8,
         min_child_samples=20, min_child_weight=39.3259775,
         min_split_gain=0.0222415, n_estimators=10000, n_jobs=-1, nthread=4,
         num_leaves=34, objective=None, random_state=None,
         reg_alpha=0.041545473, reg_lambda=0.0735294, silent=-1,
         subsample=0.8715623, subsample_for_bin=200000, subsample_freq=1,
         verbose=-1), LGBMClassifier(boosting_type='dar