####  Bayesian Optimization을 이용하여 application과 previous로 만들어진 집합의 하이퍼 파라미터 튜닝

In [1]:
import numpy as np
import pandas as pd
import gc
import time
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [2]:
def get_dataset():
    app_train = pd.read_csv('application_train.csv')
    app_test = pd.read_csv('application_test.csv')
    apps = pd.concat([app_train, app_test])
    prev = pd.read_csv('previous_application.csv')

    return apps, prev

apps, prev = get_dataset()

In [3]:
def get_apps_processed(apps):
    
    # EXT_SOURCE_X FEATURE 가공
    apps['APPS_EXT_SOURCE_MEAN'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps['APPS_EXT_SOURCE_STD'].fillna(apps['APPS_EXT_SOURCE_STD'].mean())
    
    # AMT_CREDIT 비율로 Feature 가공
    apps['APPS_ANNUITY_CREDIT_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_CREDIT']
    apps['APPS_GOODS_CREDIT_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_CREDIT']
    
    # AMT_INCOME_TOTAL 비율로 Feature 가공
    apps['APPS_ANNUITY_INCOME_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CREDIT_INCOME_RATIO'] = apps['AMT_CREDIT']/apps['AMT_INCOME_TOTAL']
    apps['APPS_GOODS_INCOME_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CNT_FAM_INCOME_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['CNT_FAM_MEMBERS']
    
    # DAYS_BIRTH, DAYS_EMPLOYED 비율로 Feature 가공
    apps['APPS_EMPLOYED_BIRTH_RATIO'] = apps['DAYS_EMPLOYED']/apps['DAYS_BIRTH']
    apps['APPS_INCOME_EMPLOYED_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_EMPLOYED']
    apps['APPS_INCOME_BIRTH_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_BIRTH']
    apps['APPS_CAR_BIRTH_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_BIRTH']
    apps['APPS_CAR_EMPLOYED_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_EMPLOYED']
    
    return apps

In [4]:
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier

def get_prev_processed(prev):
    # 대출 신청 금액과 실제 대출액/대출 상품금액 차이 및 비율
    prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
    prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
    prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
    # prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
    prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
    
    prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
    prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
    # 첫번째 만기일과 마지막 만기일까지의 기간
    prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
    # 매월 납부 금액과 납부 횟수 곱해서 전체 납부 금액 구함. 
    all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
    # 전체 납부 금액 대비 AMT_CREDIT 비율을 구하고 여기에 다시 납부횟수로 나누어서 이자율 계산. 
    prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
        
    return prev
    
    
def get_prev_amt_agg(prev):
    # 새롭게 생성된 대출 신청액 대비 다른 금액 차이 및 비율로 aggregation 수행. 
    agg_dict = {
         # 기존 컬럼. 
        'SK_ID_CURR':['count'],
        'AMT_CREDIT':['mean', 'max', 'sum'],
        'AMT_ANNUITY':['mean', 'max', 'sum'], 
        'AMT_APPLICATION':['mean', 'max', 'sum'],
        'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
        'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
        'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'DAYS_DECISION': ['min', 'max', 'mean'],
        'CNT_PAYMENT': ['mean', 'sum'],
        # 가공 컬럼
        'PREV_CREDIT_DIFF':['mean', 'max', 'sum'], 
        'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
        'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
        'PREV_GOODS_APPL_RATIO':['mean', 'max'],
        'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
        'PREV_INTERESTS_RATE':['mean', 'max']
    }

    prev_group = prev.groupby('SK_ID_CURR')
    prev_amt_agg = prev_group.agg(agg_dict)

    # multi index 컬럼을 '_'로 연결하여 컬럼명 변경
    prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]
    
    return prev_amt_agg

def get_prev_refused_appr_agg(prev):
    # 원래 groupby 컬럼 + 세부 기준 컬럼으로 groupby 수행. 세분화된 레벨로 aggregation 수행 한 뒤에 unstack()으로 컬럼레벨로 변형. 
    prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby([ 'SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
    prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
    # 컬럼명 변경. 
    prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT' ]
    # NaN값은 모두 0으로 변경. 
    prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
    
    return prev_refused_appr_agg

    

def get_prev_agg(prev):
    prev = get_prev_processed(prev)
    prev_amt_agg = get_prev_amt_agg(prev)
    prev_refused_appr_agg = get_prev_refused_appr_agg(prev)
    
    # prev_amt_agg와 조인. 
    prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
    # SK_ID_CURR별 과거 대출건수 대비 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성. 
    prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    # 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT' 컬럼 drop 
    prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
    
    return prev_agg

def get_apps_all_with_prev_agg(apps, prev):
    apps_all =  get_apps_processed(apps)
    prev_agg = get_prev_agg(prev)
    print('prev_agg shape:', prev_agg.shape)
    print('apps_all before merge shape:', apps_all.shape)
    apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
    print('apps_all after merge with prev_agg shape:', apps_all.shape)
    
    return apps_all

def get_apps_all_encoded(apps_all):
    object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
    for column in object_columns:
        apps_all[column] = pd.factorize(apps_all[column])[0]
    
    return apps_all

def get_apps_all_train_test(apps_all):
    apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
    apps_all_test = apps_all[apps_all['TARGET'].isnull()]

    apps_all_test = apps_all_test.drop('TARGET', axis=1)
    
    return apps_all_train, apps_all_test
    
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=2000,
                learning_rate=0.01,
                num_leaves=32,
                colsample_bytree=0.8,
                subsample=0.8,
                max_depth=8,
                reg_alpha=0.04,
                reg_lambda=0.07,
                min_child_weight=40,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

In [5]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)


  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)


### Bayesian Optimization

In [6]:
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier

In [7]:
# 함수의 입력값 search 범위(하이퍼 파라미터 별 입력 범위) 설정하기
baysian_params = {
    'max_depth' : (6, 16),
    'num_leaves': (24, 64),
    'min_child_samples': (10, 200),
    'min_child_weight': (1, 50),
    'subsample': (0.5, 1),
    'colsample_bytree': (0.5, 1),
    'max_bin': (10, 500),
    'reg_lambda': (0.001, 10),
    'reg_alpha': (0.01, 50)
}

In [8]:
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight,
                 subsample, colsample_bytree, max_bin, reg_lambda, reg_alpha):
    params = {
        'n_estimators': 500, 'learning_rate': 0.02,
        'max_depth': int(round(max_depth)),
        'num_leaves': int(round(num_leaves)),
        'min_child_samples': int(round(min_child_samples)),
        'min_child_weight': int(round(min_child_weight)),
        'subsample': max(min(subsample, 1), 0), # 음수가 들어오거나, 1보다 커질때 제한을 해줌
        'colsample_bytree': max(min(colsample_bytree, 1), 0),
        'max_bin': max(int(round(max_bin)), 10),
        'reg_lambda': max(reg_lambda, 0),
        'reg_alpha': max(reg_alpha, 0)
    }
    
    lgb_model = LGBMClassifier(**params)
    lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric='auc', verbose=100,
                  early_stopping_rounds=100)
    valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
    roc_auc = roc_auc_score(valid_y, valid_proba)
    return roc_auc


In [9]:
lbgBO = BayesianOptimization(
    f=lgb_roc_eval,
    pbounds=baysian_params,
    random_state=1
)

lbgBO.maximize(init_points=5, n_iter=25)

|   iter    |  target   | colsam... |  max_bin  | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------




[100]	training's auc: 0.762504	training's binary_logloss: 0.2479	valid_1's auc: 0.753638	valid_1's binary_logloss: 0.24943
[200]	training's auc: 0.778797	training's binary_logloss: 0.241178	valid_1's auc: 0.764735	valid_1's binary_logloss: 0.244671
[300]	training's auc: 0.788513	training's binary_logloss: 0.237491	valid_1's auc: 0.770055	valid_1's binary_logloss: 0.242744
[400]	training's auc: 0.795492	training's binary_logloss: 0.234889	valid_1's auc: 0.772857	valid_1's binary_logloss: 0.241745
[500]	training's auc: 0.801482	training's binary_logloss: 0.23268	valid_1's auc: 0.774752	valid_1's binary_logloss: 0.241084
| [0m1        [0m | [0m0.7748   [0m | [0m0.7085   [0m | [0m363.0    [0m | [0m6.001    [0m | [0m67.44    [0m | [0m8.191    [0m | [0m27.69    [0m | [0m9.321    [0m | [0m3.456    [0m | [0m0.6984   [0m |




[100]	training's auc: 0.758305	training's binary_logloss: 0.249178	valid_1's auc: 0.751171	valid_1's binary_logloss: 0.250295
[200]	training's auc: 0.773693	training's binary_logloss: 0.242877	valid_1's auc: 0.762591	valid_1's binary_logloss: 0.245534
[300]	training's auc: 0.782466	training's binary_logloss: 0.239579	valid_1's auc: 0.767914	valid_1's binary_logloss: 0.243582
[400]	training's auc: 0.788481	training's binary_logloss: 0.237356	valid_1's auc: 0.770862	valid_1's binary_logloss: 0.242523
[500]	training's auc: 0.793375	training's binary_logloss: 0.23563	valid_1's auc: 0.77268	valid_1's binary_logloss: 0.241896
| [0m2        [0m | [0m0.7727   [0m | [0m0.7694   [0m | [0m215.4    [0m | [0m12.85    [0m | [0m48.85    [0m | [0m44.03    [0m | [0m25.1     [0m | [0m33.53    [0m | [0m4.174    [0m | [0m0.7793   [0m |




[100]	training's auc: 0.767667	training's binary_logloss: 0.247799	valid_1's auc: 0.754776	valid_1's binary_logloss: 0.250018
[200]	training's auc: 0.781791	training's binary_logloss: 0.240807	valid_1's auc: 0.763695	valid_1's binary_logloss: 0.245306
[300]	training's auc: 0.791375	training's binary_logloss: 0.23701	valid_1's auc: 0.768574	valid_1's binary_logloss: 0.243419
[400]	training's auc: 0.798603	training's binary_logloss: 0.23428	valid_1's auc: 0.771475	valid_1's binary_logloss: 0.242405
[500]	training's auc: 0.804644	training's binary_logloss: 0.232042	valid_1's auc: 0.773122	valid_1's binary_logloss: 0.241831
| [0m3        [0m | [0m0.7731   [0m | [0m0.5702   [0m | [0m107.1    [0m | [0m14.01    [0m | [0m194.0    [0m | [0m16.36    [0m | [0m51.69    [0m | [0m43.82    [0m | [0m8.946    [0m | [0m0.5425   [0m |




[100]	training's auc: 0.773019	training's binary_logloss: 0.246309	valid_1's auc: 0.757109	valid_1's binary_logloss: 0.249334
[200]	training's auc: 0.788164	training's binary_logloss: 0.238668	valid_1's auc: 0.765957	valid_1's binary_logloss: 0.244482
[300]	training's auc: 0.799215	training's binary_logloss: 0.234287	valid_1's auc: 0.770575	valid_1's binary_logloss: 0.242676
[400]	training's auc: 0.807901	training's binary_logloss: 0.230959	valid_1's auc: 0.772958	valid_1's binary_logloss: 0.24179
[500]	training's auc: 0.81573	training's binary_logloss: 0.228057	valid_1's auc: 0.774291	valid_1's binary_logloss: 0.241288
| [0m4        [0m | [0m0.7743   [0m | [0m0.5195   [0m | [0m93.22    [0m | [0m14.78    [0m | [0m28.69    [0m | [0m21.63    [0m | [0m62.32    [0m | [0m26.66    [0m | [0m6.919    [0m | [0m0.6578   [0m |




[100]	training's auc: 0.769114	training's binary_logloss: 0.246552	valid_1's auc: 0.755193	valid_1's binary_logloss: 0.249081
[200]	training's auc: 0.785207	training's binary_logloss: 0.239522	valid_1's auc: 0.765419	valid_1's binary_logloss: 0.244468
[300]	training's auc: 0.794529	training's binary_logloss: 0.235774	valid_1's auc: 0.770098	valid_1's binary_logloss: 0.242675
[400]	training's auc: 0.800966	training's binary_logloss: 0.233216	valid_1's auc: 0.772403	valid_1's binary_logloss: 0.241785
[500]	training's auc: 0.806372	training's binary_logloss: 0.231121	valid_1's auc: 0.773807	valid_1's binary_logloss: 0.241265
| [0m5        [0m | [0m0.7738   [0m | [0m0.8433   [0m | [0m419.0    [0m | [0m6.183    [0m | [0m152.5    [0m | [0m49.45    [0m | [0m53.93    [0m | [0m14.03    [0m | [0m7.893    [0m | [0m0.5516   [0m |




[100]	training's auc: 0.770538	training's binary_logloss: 0.247255	valid_1's auc: 0.756131	valid_1's binary_logloss: 0.249796
[200]	training's auc: 0.784449	training's binary_logloss: 0.24004	valid_1's auc: 0.7644	valid_1's binary_logloss: 0.245069
[300]	training's auc: 0.794206	training's binary_logloss: 0.236081	valid_1's auc: 0.768949	valid_1's binary_logloss: 0.243245
[400]	training's auc: 0.801745	training's binary_logloss: 0.233222	valid_1's auc: 0.771383	valid_1's binary_logloss: 0.242343
[500]	training's auc: 0.808458	training's binary_logloss: 0.230708	valid_1's auc: 0.773123	valid_1's binary_logloss: 0.24173
| [0m6        [0m | [0m0.7731   [0m | [0m0.5021   [0m | [0m93.64    [0m | [0m13.03    [0m | [0m31.74    [0m | [0m24.13    [0m | [0m61.17    [0m | [0m39.98    [0m | [0m3.506    [0m | [0m0.7127   [0m |




[100]	training's auc: 0.758737	training's binary_logloss: 0.248743	valid_1's auc: 0.751098	valid_1's binary_logloss: 0.24996
[200]	training's auc: 0.775102	training's binary_logloss: 0.242311	valid_1's auc: 0.763199	valid_1's binary_logloss: 0.245169
[300]	training's auc: 0.78423	training's binary_logloss: 0.238917	valid_1's auc: 0.76878	valid_1's binary_logloss: 0.243176
[400]	training's auc: 0.790455	training's binary_logloss: 0.236605	valid_1's auc: 0.771435	valid_1's binary_logloss: 0.242221
[500]	training's auc: 0.79584	training's binary_logloss: 0.234674	valid_1's auc: 0.773296	valid_1's binary_logloss: 0.241554
| [0m7        [0m | [0m0.7733   [0m | [0m0.8678   [0m | [0m126.6    [0m | [0m7.921    [0m | [0m28.8     [0m | [0m9.69     [0m | [0m24.02    [0m | [0m16.91    [0m | [0m6.615    [0m | [0m0.9486   [0m |




[100]	training's auc: 0.771173	training's binary_logloss: 0.245083	valid_1's auc: 0.755845	valid_1's binary_logloss: 0.248337
[200]	training's auc: 0.790712	training's binary_logloss: 0.237082	valid_1's auc: 0.767381	valid_1's binary_logloss: 0.243641
[300]	training's auc: 0.803402	training's binary_logloss: 0.23224	valid_1's auc: 0.771905	valid_1's binary_logloss: 0.24199
[400]	training's auc: 0.813874	training's binary_logloss: 0.228376	valid_1's auc: 0.774608	valid_1's binary_logloss: 0.241052
[500]	training's auc: 0.823042	training's binary_logloss: 0.225008	valid_1's auc: 0.776183	valid_1's binary_logloss: 0.240519
| [95m8        [0m | [95m0.7762   [0m | [95m0.9126   [0m | [95m401.5    [0m | [95m8.41     [0m | [95m151.8    [0m | [95m4.134    [0m | [95m43.34    [0m | [95m4.129    [0m | [95m6.242    [0m | [95m0.8421   [0m |




[100]	training's auc: 0.766052	training's binary_logloss: 0.24767	valid_1's auc: 0.753334	valid_1's binary_logloss: 0.250004
[200]	training's auc: 0.781144	training's binary_logloss: 0.240789	valid_1's auc: 0.763497	valid_1's binary_logloss: 0.24527
[300]	training's auc: 0.790478	training's binary_logloss: 0.237083	valid_1's auc: 0.768602	valid_1's binary_logloss: 0.243364
[400]	training's auc: 0.797577	training's binary_logloss: 0.234425	valid_1's auc: 0.771603	valid_1's binary_logloss: 0.242313
[500]	training's auc: 0.803375	training's binary_logloss: 0.232281	valid_1's auc: 0.773245	valid_1's binary_logloss: 0.241729
| [0m9        [0m | [0m0.7732   [0m | [0m0.7513   [0m | [0m375.2    [0m | [0m9.405    [0m | [0m140.4    [0m | [0m15.21    [0m | [0m45.24    [0m | [0m47.31    [0m | [0m3.949    [0m | [0m0.9567   [0m |




[100]	training's auc: 0.762654	training's binary_logloss: 0.247691	valid_1's auc: 0.753472	valid_1's binary_logloss: 0.249357
[200]	training's auc: 0.779928	training's binary_logloss: 0.240725	valid_1's auc: 0.765106	valid_1's binary_logloss: 0.244527
[300]	training's auc: 0.790132	training's binary_logloss: 0.236874	valid_1's auc: 0.770473	valid_1's binary_logloss: 0.242604
[400]	training's auc: 0.798054	training's binary_logloss: 0.234031	valid_1's auc: 0.773156	valid_1's binary_logloss: 0.241669
[500]	training's auc: 0.804825	training's binary_logloss: 0.231577	valid_1's auc: 0.77509	valid_1's binary_logloss: 0.240985
| [0m10       [0m | [0m0.7751   [0m | [0m0.7641   [0m | [0m189.1    [0m | [0m11.26    [0m | [0m87.43    [0m | [0m30.17    [0m | [0m28.59    [0m | [0m8.651    [0m | [0m9.464    [0m | [0m0.875    [0m |




[100]	training's auc: 0.770007	training's binary_logloss: 0.245963	valid_1's auc: 0.753841	valid_1's binary_logloss: 0.249248
[200]	training's auc: 0.787888	training's binary_logloss: 0.238339	valid_1's auc: 0.765022	valid_1's binary_logloss: 0.244582
[300]	training's auc: 0.799279	training's binary_logloss: 0.233956	valid_1's auc: 0.770249	valid_1's binary_logloss: 0.242725
[400]	training's auc: 0.808167	training's binary_logloss: 0.230629	valid_1's auc: 0.772771	valid_1's binary_logloss: 0.241822
[500]	training's auc: 0.815874	training's binary_logloss: 0.227723	valid_1's auc: 0.774539	valid_1's binary_logloss: 0.24121
| [0m11       [0m | [0m0.7745   [0m | [0m0.9634   [0m | [0m481.1    [0m | [0m9.868    [0m | [0m154.2    [0m | [0m16.6     [0m | [0m56.71    [0m | [0m33.91    [0m | [0m3.129    [0m | [0m0.5961   [0m |




[100]	training's auc: 0.763226	training's binary_logloss: 0.247801	valid_1's auc: 0.752095	valid_1's binary_logloss: 0.249798
[200]	training's auc: 0.77946	training's binary_logloss: 0.241003	valid_1's auc: 0.763433	valid_1's binary_logloss: 0.245092
[300]	training's auc: 0.789185	training's binary_logloss: 0.237314	valid_1's auc: 0.768657	valid_1's binary_logloss: 0.243225
[400]	training's auc: 0.796367	training's binary_logloss: 0.234679	valid_1's auc: 0.771542	valid_1's binary_logloss: 0.242257
[500]	training's auc: 0.802338	training's binary_logloss: 0.232511	valid_1's auc: 0.77342	valid_1's binary_logloss: 0.241615
| [0m12       [0m | [0m0.7734   [0m | [0m0.9757   [0m | [0m283.0    [0m | [0m14.43    [0m | [0m42.81    [0m | [0m40.66    [0m | [0m37.7     [0m | [0m37.45    [0m | [0m8.349    [0m | [0m0.6578   [0m |




[100]	training's auc: 0.764623	training's binary_logloss: 0.247222	valid_1's auc: 0.753795	valid_1's binary_logloss: 0.249262
[200]	training's auc: 0.781528	training's binary_logloss: 0.240238	valid_1's auc: 0.764755	valid_1's binary_logloss: 0.244614
[300]	training's auc: 0.792153	training's binary_logloss: 0.236255	valid_1's auc: 0.769977	valid_1's binary_logloss: 0.242765
[400]	training's auc: 0.800162	training's binary_logloss: 0.233345	valid_1's auc: 0.772366	valid_1's binary_logloss: 0.241925
[500]	training's auc: 0.80709	training's binary_logloss: 0.230862	valid_1's auc: 0.773862	valid_1's binary_logloss: 0.24141
| [0m13       [0m | [0m0.7739   [0m | [0m0.8052   [0m | [0m32.11    [0m | [0m10.36    [0m | [0m99.46    [0m | [0m7.165    [0m | [0m36.61    [0m | [0m12.09    [0m | [0m8.402    [0m | [0m0.9259   [0m |




[100]	training's auc: 0.775925	training's binary_logloss: 0.245031	valid_1's auc: 0.75946	valid_1's binary_logloss: 0.248299
[200]	training's auc: 0.793424	training's binary_logloss: 0.236695	valid_1's auc: 0.76858	valid_1's binary_logloss: 0.24342
[300]	training's auc: 0.806262	training's binary_logloss: 0.231617	valid_1's auc: 0.772687	valid_1's binary_logloss: 0.241754
[400]	training's auc: 0.81745	training's binary_logloss: 0.227443	valid_1's auc: 0.7747	valid_1's binary_logloss: 0.24104
[500]	training's auc: 0.827298	training's binary_logloss: 0.223747	valid_1's auc: 0.775997	valid_1's binary_logloss: 0.240552
| [0m14       [0m | [0m0.776    [0m | [0m0.5144   [0m | [0m113.2    [0m | [0m12.86    [0m | [0m133.8    [0m | [0m48.37    [0m | [0m53.47    [0m | [0m5.095    [0m | [0m9.321    [0m | [0m0.7577   [0m |




[100]	training's auc: 0.764634	training's binary_logloss: 0.248309	valid_1's auc: 0.753414	valid_1's binary_logloss: 0.250233
[200]	training's auc: 0.778645	training's binary_logloss: 0.241606	valid_1's auc: 0.76285	valid_1's binary_logloss: 0.24549
[300]	training's auc: 0.787346	training's binary_logloss: 0.23813	valid_1's auc: 0.767485	valid_1's binary_logloss: 0.243675
[400]	training's auc: 0.794185	training's binary_logloss: 0.235575	valid_1's auc: 0.770536	valid_1's binary_logloss: 0.242606
[500]	training's auc: 0.799812	training's binary_logloss: 0.233516	valid_1's auc: 0.772497	valid_1's binary_logloss: 0.241942
| [0m15       [0m | [0m0.7725   [0m | [0m0.595    [0m | [0m180.3    [0m | [0m15.83    [0m | [0m64.95    [0m | [0m22.79    [0m | [0m41.12    [0m | [0m45.48    [0m | [0m8.293    [0m | [0m0.9934   [0m |




[100]	training's auc: 0.761172	training's binary_logloss: 0.24835	valid_1's auc: 0.75341	valid_1's binary_logloss: 0.249572
[200]	training's auc: 0.777152	training's binary_logloss: 0.241735	valid_1's auc: 0.764446	valid_1's binary_logloss: 0.244778
[300]	training's auc: 0.786583	training's binary_logloss: 0.238109	valid_1's auc: 0.769196	valid_1's binary_logloss: 0.24294
[400]	training's auc: 0.793736	training's binary_logloss: 0.235477	valid_1's auc: 0.771777	valid_1's binary_logloss: 0.24202
[500]	training's auc: 0.799829	training's binary_logloss: 0.233293	valid_1's auc: 0.773461	valid_1's binary_logloss: 0.241433
| [0m16       [0m | [0m0.7735   [0m | [0m0.7062   [0m | [0m41.45    [0m | [0m6.223    [0m | [0m179.6    [0m | [0m9.368    [0m | [0m26.71    [0m | [0m1.877    [0m | [0m9.575    [0m | [0m0.6605   [0m |




[100]	training's auc: 0.768084	training's binary_logloss: 0.246523	valid_1's auc: 0.753594	valid_1's binary_logloss: 0.24943
[200]	training's auc: 0.785086	training's binary_logloss: 0.239207	valid_1's auc: 0.76449	valid_1's binary_logloss: 0.244793
[300]	training's auc: 0.795791	training's binary_logloss: 0.235079	valid_1's auc: 0.769624	valid_1's binary_logloss: 0.242965
[400]	training's auc: 0.804105	training's binary_logloss: 0.231989	valid_1's auc: 0.772377	valid_1's binary_logloss: 0.242009
[500]	training's auc: 0.811215	training's binary_logloss: 0.229364	valid_1's auc: 0.773913	valid_1's binary_logloss: 0.241486
| [0m17       [0m | [0m0.7739   [0m | [0m0.8955   [0m | [0m221.6    [0m | [0m9.909    [0m | [0m120.5    [0m | [0m14.62    [0m | [0m50.54    [0m | [0m34.58    [0m | [0m1.319    [0m | [0m0.7147   [0m |




[100]	training's auc: 0.766219	training's binary_logloss: 0.247154	valid_1's auc: 0.754685	valid_1's binary_logloss: 0.249306
[200]	training's auc: 0.78256	training's binary_logloss: 0.240111	valid_1's auc: 0.76543	valid_1's binary_logloss: 0.244564
[300]	training's auc: 0.792787	training's binary_logloss: 0.236177	valid_1's auc: 0.770647	valid_1's binary_logloss: 0.242659
[400]	training's auc: 0.800571	training's binary_logloss: 0.233297	valid_1's auc: 0.77327	valid_1's binary_logloss: 0.241723
[500]	training's auc: 0.807223	training's binary_logloss: 0.23088	valid_1's auc: 0.774683	valid_1's binary_logloss: 0.241215
| [0m18       [0m | [0m0.7747   [0m | [0m0.6853   [0m | [0m155.6    [0m | [0m16.0     [0m | [0m120.8    [0m | [0m40.93    [0m | [0m40.18    [0m | [0m24.13    [0m | [0m2.289    [0m | [0m0.5043   [0m |




[100]	training's auc: 0.765396	training's binary_logloss: 0.247426	valid_1's auc: 0.755146	valid_1's binary_logloss: 0.249334
[200]	training's auc: 0.780913	training's binary_logloss: 0.240536	valid_1's auc: 0.765481	valid_1's binary_logloss: 0.244517
[300]	training's auc: 0.790712	training's binary_logloss: 0.236718	valid_1's auc: 0.770699	valid_1's binary_logloss: 0.24255
[400]	training's auc: 0.797853	training's binary_logloss: 0.234028	valid_1's auc: 0.773339	valid_1's binary_logloss: 0.241593
[500]	training's auc: 0.804101	training's binary_logloss: 0.231729	valid_1's auc: 0.775071	valid_1's binary_logloss: 0.240992
| [0m19       [0m | [0m0.7751   [0m | [0m0.6009   [0m | [0m294.2    [0m | [0m6.992    [0m | [0m61.34    [0m | [0m19.63    [0m | [0m33.36    [0m | [0m12.7     [0m | [0m7.577    [0m | [0m0.7208   [0m |




[100]	training's auc: 0.759603	training's binary_logloss: 0.249118	valid_1's auc: 0.751783	valid_1's binary_logloss: 0.250329
[200]	training's auc: 0.774098	training's binary_logloss: 0.242908	valid_1's auc: 0.762703	valid_1's binary_logloss: 0.245568
[300]	training's auc: 0.782236	training's binary_logloss: 0.239743	valid_1's auc: 0.767983	valid_1's binary_logloss: 0.243591
[400]	training's auc: 0.787941	training's binary_logloss: 0.23759	valid_1's auc: 0.770969	valid_1's binary_logloss: 0.242509
[500]	training's auc: 0.792417	training's binary_logloss: 0.235916	valid_1's auc: 0.77268	valid_1's binary_logloss: 0.241866
| [0m20       [0m | [0m0.7727   [0m | [0m0.8361   [0m | [0m180.4    [0m | [0m6.4      [0m | [0m137.9    [0m | [0m34.23    [0m | [0m28.65    [0m | [0m38.12    [0m | [0m9.619    [0m | [0m0.6246   [0m |




[100]	training's auc: 0.765749	training's binary_logloss: 0.24721	valid_1's auc: 0.752813	valid_1's binary_logloss: 0.24969
[200]	training's auc: 0.782274	training's binary_logloss: 0.240166	valid_1's auc: 0.763962	valid_1's binary_logloss: 0.244975
[300]	training's auc: 0.792311	training's binary_logloss: 0.236308	valid_1's auc: 0.769222	valid_1's binary_logloss: 0.243086
[400]	training's auc: 0.79999	training's binary_logloss: 0.233475	valid_1's auc: 0.772065	valid_1's binary_logloss: 0.242109
[500]	training's auc: 0.806447	training's binary_logloss: 0.231083	valid_1's auc: 0.773818	valid_1's binary_logloss: 0.241504
| [0m21       [0m | [0m0.7738   [0m | [0m0.937    [0m | [0m404.9    [0m | [0m8.822    [0m | [0m146.4    [0m | [0m11.05    [0m | [0m46.34    [0m | [0m41.82    [0m | [0m4.554    [0m | [0m0.8903   [0m |




[100]	training's auc: 0.768877	training's binary_logloss: 0.246325	valid_1's auc: 0.754479	valid_1's binary_logloss: 0.2491
[200]	training's auc: 0.786097	training's binary_logloss: 0.239048	valid_1's auc: 0.765392	valid_1's binary_logloss: 0.244453
[300]	training's auc: 0.796442	training's binary_logloss: 0.234969	valid_1's auc: 0.770484	valid_1's binary_logloss: 0.242593
[400]	training's auc: 0.804156	training's binary_logloss: 0.232014	valid_1's auc: 0.772846	valid_1's binary_logloss: 0.241739
[500]	training's auc: 0.810798	training's binary_logloss: 0.229503	valid_1's auc: 0.774352	valid_1's binary_logloss: 0.24118
| [0m22       [0m | [0m0.7744   [0m | [0m0.9717   [0m | [0m233.6    [0m | [0m7.091    [0m | [0m129.2    [0m | [0m16.19    [0m | [0m54.22    [0m | [0m26.29    [0m | [0m5.379    [0m | [0m0.922    [0m |




[100]	training's auc: 0.767273	training's binary_logloss: 0.246934	valid_1's auc: 0.754059	valid_1's binary_logloss: 0.249432
[200]	training's auc: 0.78356	training's binary_logloss: 0.239832	valid_1's auc: 0.764784	valid_1's binary_logloss: 0.244712
[300]	training's auc: 0.793858	training's binary_logloss: 0.235846	valid_1's auc: 0.770016	valid_1's binary_logloss: 0.242821
[400]	training's auc: 0.801742	training's binary_logloss: 0.232882	valid_1's auc: 0.772783	valid_1's binary_logloss: 0.241847
[500]	training's auc: 0.808444	training's binary_logloss: 0.230439	valid_1's auc: 0.774425	valid_1's binary_logloss: 0.24127
| [0m23       [0m | [0m0.7744   [0m | [0m0.7888   [0m | [0m360.7    [0m | [0m9.317    [0m | [0m77.34    [0m | [0m31.8     [0m | [0m47.15    [0m | [0m33.48    [0m | [0m5.403    [0m | [0m0.9758   [0m |




[100]	training's auc: 0.757725	training's binary_logloss: 0.249271	valid_1's auc: 0.750678	valid_1's binary_logloss: 0.250329
[200]	training's auc: 0.772861	training's binary_logloss: 0.243097	valid_1's auc: 0.762139	valid_1's binary_logloss: 0.245614
[300]	training's auc: 0.781629	training's binary_logloss: 0.239851	valid_1's auc: 0.767759	valid_1's binary_logloss: 0.243595
[400]	training's auc: 0.787434	training's binary_logloss: 0.237708	valid_1's auc: 0.770491	valid_1's binary_logloss: 0.242603
[500]	training's auc: 0.792209	training's binary_logloss: 0.236001	valid_1's auc: 0.772203	valid_1's binary_logloss: 0.241994
| [0m24       [0m | [0m0.7722   [0m | [0m0.8967   [0m | [0m75.06    [0m | [0m13.24    [0m | [0m175.7    [0m | [0m12.26    [0m | [0m26.07    [0m | [0m35.86    [0m | [0m9.655    [0m | [0m0.5453   [0m |




[100]	training's auc: 0.76713	training's binary_logloss: 0.246973	valid_1's auc: 0.753158	valid_1's binary_logloss: 0.249747
[200]	training's auc: 0.783592	training's binary_logloss: 0.239805	valid_1's auc: 0.763924	valid_1's binary_logloss: 0.245071
[300]	training's auc: 0.794051	training's binary_logloss: 0.235812	valid_1's auc: 0.768936	valid_1's binary_logloss: 0.243251
[400]	training's auc: 0.801858	training's binary_logloss: 0.232933	valid_1's auc: 0.77196	valid_1's binary_logloss: 0.242253
[500]	training's auc: 0.808582	training's binary_logloss: 0.230488	valid_1's auc: 0.773651	valid_1's binary_logloss: 0.241662
| [0m25       [0m | [0m0.7737   [0m | [0m0.8264   [0m | [0m482.0    [0m | [0m14.41    [0m | [0m111.9    [0m | [0m1.149    [0m | [0m49.73    [0m | [0m44.7     [0m | [0m0.06606  [0m | [0m0.6456   [0m |




[100]	training's auc: 0.777145	training's binary_logloss: 0.244225	valid_1's auc: 0.758342	valid_1's binary_logloss: 0.248192
[200]	training's auc: 0.796038	training's binary_logloss: 0.235651	valid_1's auc: 0.768443	valid_1's binary_logloss: 0.243431
[300]	training's auc: 0.80981	training's binary_logloss: 0.230226	valid_1's auc: 0.773393	valid_1's binary_logloss: 0.241591
[400]	training's auc: 0.821475	training's binary_logloss: 0.225829	valid_1's auc: 0.775757	valid_1's binary_logloss: 0.24074
[500]	training's auc: 0.831886	training's binary_logloss: 0.221888	valid_1's auc: 0.777286	valid_1's binary_logloss: 0.240192
| [95m26       [0m | [95m0.7773   [0m | [95m0.6791   [0m | [95m495.6    [0m | [95m13.5     [0m | [95m154.9    [0m | [95m41.06    [0m | [95m59.61    [0m | [95m10.89    [0m | [95m7.263    [0m | [95m0.9834   [0m |




[100]	training's auc: 0.779513	training's binary_logloss: 0.242562	valid_1's auc: 0.758305	valid_1's binary_logloss: 0.247564
[200]	training's auc: 0.801996	training's binary_logloss: 0.233156	valid_1's auc: 0.769182	valid_1's binary_logloss: 0.243
[300]	training's auc: 0.817809	training's binary_logloss: 0.227005	valid_1's auc: 0.773205	valid_1's binary_logloss: 0.241551
[400]	training's auc: 0.831313	training's binary_logloss: 0.22181	valid_1's auc: 0.775798	valid_1's binary_logloss: 0.240638
[500]	training's auc: 0.843017	training's binary_logloss: 0.217342	valid_1's auc: 0.777013	valid_1's binary_logloss: 0.240222
| [0m27       [0m | [0m0.777    [0m | [0m0.9566   [0m | [0m402.0    [0m | [0m10.79    [0m | [0m41.55    [0m | [0m26.34    [0m | [0m62.53    [0m | [0m5.918    [0m | [0m2.265    [0m | [0m0.8735   [0m |




[100]	training's auc: 0.782237	training's binary_logloss: 0.241871	valid_1's auc: 0.759836	valid_1's binary_logloss: 0.247288
[200]	training's auc: 0.805036	training's binary_logloss: 0.23209	valid_1's auc: 0.769754	valid_1's binary_logloss: 0.242751
[300]	training's auc: 0.821977	training's binary_logloss: 0.225544	valid_1's auc: 0.773663	valid_1's binary_logloss: 0.241275
[400]	training's auc: 0.836157	training's binary_logloss: 0.220134	valid_1's auc: 0.775896	valid_1's binary_logloss: 0.240522
[500]	training's auc: 0.848054	training's binary_logloss: 0.215562	valid_1's auc: 0.777102	valid_1's binary_logloss: 0.240092
| [0m28       [0m | [0m0.7771   [0m | [0m0.71     [0m | [0m494.0    [0m | [0m10.77    [0m | [0m162.3    [0m | [0m31.09    [0m | [0m60.92    [0m | [0m0.1603   [0m | [0m0.9464   [0m | [0m0.5215   [0m |




[100]	training's auc: 0.761858	training's binary_logloss: 0.247982	valid_1's auc: 0.753428	valid_1's binary_logloss: 0.249459
[200]	training's auc: 0.778724	training's binary_logloss: 0.24109	valid_1's auc: 0.765405	valid_1's binary_logloss: 0.244501
[300]	training's auc: 0.7888	training's binary_logloss: 0.237295	valid_1's auc: 0.770746	valid_1's binary_logloss: 0.242547
[400]	training's auc: 0.796399	training's binary_logloss: 0.234516	valid_1's auc: 0.773817	valid_1's binary_logloss: 0.241459
[500]	training's auc: 0.802752	training's binary_logloss: 0.232224	valid_1's auc: 0.775655	valid_1's binary_logloss: 0.240807
| [0m29       [0m | [0m0.7757   [0m | [0m0.7145   [0m | [0m406.7    [0m | [0m14.3     [0m | [0m133.6    [0m | [0m29.27    [0m | [0m24.92    [0m | [0m6.614    [0m | [0m4.345    [0m | [0m0.5224   [0m |




[100]	training's auc: 0.776723	training's binary_logloss: 0.244378	valid_1's auc: 0.759144	valid_1's binary_logloss: 0.248089
[200]	training's auc: 0.795496	training's binary_logloss: 0.235765	valid_1's auc: 0.769046	valid_1's binary_logloss: 0.243183
[300]	training's auc: 0.809237	training's binary_logloss: 0.230399	valid_1's auc: 0.773284	valid_1's binary_logloss: 0.241536
[400]	training's auc: 0.820955	training's binary_logloss: 0.225945	valid_1's auc: 0.775525	valid_1's binary_logloss: 0.240714
[500]	training's auc: 0.831519	training's binary_logloss: 0.222	valid_1's auc: 0.777019	valid_1's binary_logloss: 0.240179
| [0m30       [0m | [0m0.777    [0m | [0m0.6166   [0m | [0m498.4    [0m | [0m12.9     [0m | [0m170.8    [0m | [0m47.75    [0m | [0m52.59    [0m | [0m4.512    [0m | [0m8.353    [0m | [0m0.9305   [0m |


In [10]:
lbgBO.res

[{'target': 0.7747515318161347,
  'params': {'colsample_bytree': 0.7085110023512871,
   'max_bin': 362.9590017866575,
   'max_depth': 6.001143748173449,
   'min_child_samples': 67.44318880004955,
   'min_child_weight': 8.19103865003854,
   'num_leaves': 27.69354379075191,
   'reg_alpha': 9.321147966769768,
   'reg_lambda': 3.4562617097034343,
   'subsample': 0.698383737115335}},
 {'target': 0.7726800872577807,
  'params': {'colsample_bytree': 0.7694083670016785,
   'max_bin': 215.40531205761445,
   'max_depth': 12.852195003967594,
   'min_child_samples': 48.84592744898831,
   'min_child_weight': 44.02775438315633,
   'num_leaves': 25.09550372791705,
   'reg_alpha': 33.52667083381833,
   'reg_lambda': 4.173630718868903,
   'subsample': 0.7793449142228759}},
 {'target': 0.7731216247078669,
  'params': {'colsample_bytree': 0.5701934692976169,
   'max_bin': 107.0697296515906,
   'max_depth': 14.007445686755366,
   'min_child_samples': 193.96969938668553,
   'min_child_weight': 16.357784729

In [11]:
# lgbBO.res에서 target이 max인것만 추려내기
target_list = []
for result in lbgBO.res:
    target = result['target']
    target_list.append(target)
print(target_list)

# argmax를 통해 가장 큰 target의 index가져오기
print('maximum target index:', np.argmax(np.array(target_list)))

[0.7747515318161347, 0.7726800872577807, 0.7731216247078669, 0.7742910108353326, 0.7738070427875996, 0.7731230971982839, 0.7732957185047409, 0.7761834961834931, 0.7732448310149904, 0.7750903841065592, 0.7745394471311235, 0.7734197984577427, 0.7738622429110955, 0.7759966455683464, 0.7724974571464506, 0.7734608963483132, 0.7739129381987528, 0.7746834716253075, 0.775070804908199, 0.7726797489185586, 0.7738176107258349, 0.7743523074186134, 0.7744248629145971, 0.7722034149473564, 0.7736508364929756, 0.7772857958381745, 0.7770127624397868, 0.7771019060883321, 0.7756554201380265, 0.7770188493688892]
maximum target index: 25


In [12]:
# 가장 큰 target값을 가지는 params 추출
max_dict = lbgBO.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.7772857958381745, 'params': {'colsample_bytree': 0.6790874358630945, 'max_bin': 495.5883417872887, 'max_depth': 13.495577949310704, 'min_child_samples': 154.92432612082445, 'min_child_weight': 41.056153505864344, 'num_leaves': 59.614555940082454, 'reg_alpha': 10.88727137584814, 'reg_lambda': 7.263008197687969, 'subsample': 0.9833530340999077}}


In [13]:
# 최적화된 파라미터로 다시 테스트
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=1000,
                learning_rate=0.02,
                max_depth = 13,
                num_leaves=60,
                colsample_bytree=0.679,
                subsample=0.983,
                max_bin=495,
                reg_alpha=10.887,
                reg_lambda=7.2630,
                min_child_weight=41,
                min_child_samples=154,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

In [14]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)

  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)




[100]	training's auc: 0.777229	training's binary_logloss: 0.244199	valid_1's auc: 0.758265	valid_1's binary_logloss: 0.248217
[200]	training's auc: 0.796064	training's binary_logloss: 0.235625	valid_1's auc: 0.768193	valid_1's binary_logloss: 0.243475
[300]	training's auc: 0.809745	training's binary_logloss: 0.230233	valid_1's auc: 0.7729	valid_1's binary_logloss: 0.241713
[400]	training's auc: 0.821329	training's binary_logloss: 0.225846	valid_1's auc: 0.775437	valid_1's binary_logloss: 0.240852
[500]	training's auc: 0.831812	training's binary_logloss: 0.221851	valid_1's auc: 0.777047	valid_1's binary_logloss: 0.240281
[600]	training's auc: 0.8412	training's binary_logloss: 0.218269	valid_1's auc: 0.777948	valid_1's binary_logloss: 0.239961
[700]	training's auc: 0.849701	training's binary_logloss: 0.214961	valid_1's auc: 0.77841	valid_1's binary_logloss: 0.239814
[800]	training's auc: 0.857426	training's binary_logloss: 0.211839	valid_1's auc: 0.778648	valid_1's binary_logloss: 0.2397

In [15]:
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_tuning_02.csv', index=False)