In [1]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_columns=1000
import math
import re
import datetime as dt
from bureau_fc import get_bureau_feats
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
import copy
from sklearn.metrics import f1_score

In [2]:
import sys
sys.path.append('ml_lib/')
from encoding import FreqeuncyEncoding
from custom_classifier_mutliclass import Estimator
from hyperopt_multiclass import HyperOptModelSelection
from hyperopt import hp
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier

In [3]:
DATA_DIR = '/home/sahil/data/av/AV_LTFS3/data/'

In [4]:
train = pd.read_csv(DATA_DIR+'train_Data.csv')
test = pd.read_csv(DATA_DIR+'test_Data.csv')
bur_df = pd.read_pickle('bureau_future_feats.pkl')

In [5]:
target_map = {'No Top-up Service': 0,
 '12-18 Months': 1,
 '18-24 Months': 2,
 '24-30 Months': 3,
 '30-36 Months': 4,
 '36-48 Months': 5,
 ' > 48 Months': 6,
 }
train['Top-up Month'] = train['Top-up Month'].map(target_map)

In [6]:
reverse_map = {v:k for k,v in target_map.items()}

In [7]:
reverse_map

{0: 'No Top-up Service',
 1: '12-18 Months',
 2: '18-24 Months',
 3: '24-30 Months',
 4: '30-36 Months',
 5: '36-48 Months',
 6: ' > 48 Months'}

In [8]:
train['Top-up Month'].value_counts()

0    106677
6      8366
5      3656
3      3492
4      3062
2      2368
1      1034
Name: Top-up Month, dtype: int64

In [9]:
df = pd.concat((train,test),axis=0)
df.shape

(143400, 26)

In [10]:
bur_df.shape

(139964, 112)

In [11]:
df = df.merge(bur_df,on='ID',how='left')
df['DisbursalDate'] = pd.to_datetime(df['DisbursalDate'])
df['MaturityDAte'] = pd.to_datetime(df['MaturityDAte'])

In [12]:
df.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans
0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,275000.0,275000.0,24000.0,2012-02-10,2016-01-15,2012-02-10 00:00:00,4022465,1568.0,21946,61.11,M,49.0,35833.33,RAISEN,MADHYA PRADESH,464993.0,6.0,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0
1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,350000.0,350000.0,10500.0,2012-03-31,2016-02-15,2012-03-31 00:00:00,4681175,1062.0,34802,70.0,M,23.0,666.67,SEHORE,MADHYA PRADESH,466001.0,0.0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0
2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,519728.0,519728.0,38300.0,2017-06-17,2023-02-10,2017-06-17 00:00:00,25328146,1060.0,127335,69.77,M,39.0,45257.0,BHOPAL,MADHYA PRADESH,462030.0,1.0,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0
3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,400000.0,400000.0,11600.0,2013-11-29,2017-11-10,2013-11-29 00:00:00,13021591,1060.0,25094,80.92,M,24.0,20833.33,ASHOK NAGAR,MADHYA PRADESH,473335.0,6.0,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0
4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,440000.0,440000.0,15000.0,2011-12-08,2015-07-05,2011-12-08 00:00:00,3291320,1046.0,21853,71.05,M,56.0,27313.67,BILASPUR,CHATTISGARH,495442.0,5.0,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0


In [13]:
df['pin1'] = df['ZiPCODE'].fillna('000000').astype('str').str[:2]
df['pin2'] = df['ZiPCODE'].fillna('000000').astype('str').str[2:4]
df['pin3'] = df['ZiPCODE'].fillna('000000').astype('str').str[4:6:]
df['cal_tenor'] = (df['MaturityDAte'] - df['DisbursalDate']).dt.days
df['emi_sal_ratio'] = df['EMI']/df['MonthlyIncome']
df['ltv2'] = df['DisbursalAmount']/df['AssetCost']

In [14]:
df['disb_year'] = df['DisbursalDate'].dt.year
df['disb_mon'] = df['DisbursalDate'].dt.month
df['disb_day'] = df['DisbursalDate'].dt.day
df['disb_dow'] = df['DisbursalDate'].dt.dayofweek

df['mat_year'] = df['MaturityDAte'].dt.year
df['mat_mon'] = df['MaturityDAte'].dt.month
df['mat_day'] = df['MaturityDAte'].dt.day
df['mat_dow'] = df['MaturityDAte'].dt.dayofweek

In [15]:
cat_cols = ['Frequency', 'InstlmentMode', 'LoanStatus', 'PaymentMode', 'Area','ManufacturerID','SupplierID','pin1','pin2','pin3', 'SEX', 'City', 'State','BranchID']
target = 'Top-up Month'
drop_cols = ['ID','DisbursalDate','MaturityDAte','AuthDate','AssetID','ZiPCODE']
num_cols = (df.columns[~df.columns.isin([target]+drop_cols+cat_cols)].tolist())
use_cols = cat_cols+num_cols

In [16]:
len(use_cols)

144

In [17]:
fe = FreqeuncyEncoding(categorical_columns=cat_cols,normalize=True,return_df=True)
df = fe.fit_transform(df)

In [18]:
df.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans,pin1,pin2,pin3,cal_tenor,emi_sal_ratio,ltv2,disb_year,disb_mon,disb_day,disb_dow,mat_year,mat_mon,mat_day,mat_dow
0,1,0.233849,0.952483,0.737259,0.076199,0.004505,0.0,48,450000,275000.0,275000.0,24000.0,2012-02-10,2016-01-15,2012-02-10 00:00:00,4022465,0.122059,0.000404,61.11,0.950724,49.0,35833.33,0.007817,0.156869,464993.0,6.0,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0,0.053445,0.001876,0.001722,1435.0,0.669768,0.611111,2012,2,10,4,2016.0,1.0,15.0,4.0
1,2,0.233849,0.047517,0.737259,0.210237,0.004372,0.016303,47,485000,350000.0,350000.0,10500.0,2012-03-31,2016-02-15,2012-03-31 00:00:00,4681175,0.139653,0.000404,70.0,0.950724,23.0,666.67,0.007795,0.156869,466001.0,0.0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0,0.053445,0.013033,0.133229,1416.0,15.749921,0.721649,2012,3,31,5,2016.0,2.0,15.0,0.0
2,3,0.15553,0.952483,0.262741,0.244582,0.004505,0.0,68,690000,519728.0,519728.0,38300.0,2017-06-17,2023-02-10,2017-06-17 00:00:00,25328146,0.120887,4.9e-05,69.77,0.950724,39.0,45257.0,0.001286,0.156869,462030.0,1.0,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0,0.053445,0.029972,0.017483,2064.0,0.846278,0.753229,2017,6,17,5,2023.0,2.0,10.0,4.0
3,7,0.233849,0.047517,0.737259,0.2044,0.004972,0.007226,48,480000,400000.0,400000.0,11600.0,2013-11-29,2017-11-10,2013-11-29 00:00:00,13021591,0.120887,0.001374,80.92,0.950724,24.0,20833.33,0.004291,0.156869,473335.0,6.0,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0,0.021946,0.020635,0.013605,1442.0,0.5568,0.833333,2013,11,29,4,2017.0,11.0,10.0,4.0
4,8,0.233849,0.952483,0.737259,0.2044,0.006555,0.013064,44,619265,440000.0,440000.0,15000.0,2011-12-08,2015-07-05,2011-12-08 00:00:00,3291320,0.197987,0.000314,71.05,0.950724,56.0,27313.67,0.005267,0.039603,495442.0,5.0,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0,0.03977,0.018354,0.004296,1305.0,0.549176,0.71052,2011,12,8,3,2015.0,7.0,5.0,6.0


In [19]:
train = df[df.ID.isin(train.ID)]
test = df[df.ID.isin(test.ID)]
train = train.sort_values('ID',ascending=True)
test = test.sort_values('ID',ascending=True)
train[target] = train[target].astype('int')

In [20]:
train.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans,pin1,pin2,pin3,cal_tenor,emi_sal_ratio,ltv2,disb_year,disb_mon,disb_day,disb_dow,mat_year,mat_mon,mat_day,mat_dow
0,1,0.233849,0.952483,0.737259,0.076199,0.004505,0.0,48,450000,275000.0,275000.0,24000.0,2012-02-10,2016-01-15,2012-02-10 00:00:00,4022465,0.122059,0.000404,61.11,0.950724,49.0,35833.33,0.007817,0.156869,464993.0,6,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0,0.053445,0.001876,0.001722,1435.0,0.669768,0.611111,2012,2,10,4,2016.0,1.0,15.0,4.0
1,2,0.233849,0.047517,0.737259,0.210237,0.004372,0.016303,47,485000,350000.0,350000.0,10500.0,2012-03-31,2016-02-15,2012-03-31 00:00:00,4681175,0.139653,0.000404,70.0,0.950724,23.0,666.67,0.007795,0.156869,466001.0,0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0,0.053445,0.013033,0.133229,1416.0,15.749921,0.721649,2012,3,31,5,2016.0,2.0,15.0,0.0
2,3,0.15553,0.952483,0.262741,0.244582,0.004505,0.0,68,690000,519728.0,519728.0,38300.0,2017-06-17,2023-02-10,2017-06-17 00:00:00,25328146,0.120887,4.9e-05,69.77,0.950724,39.0,45257.0,0.001286,0.156869,462030.0,1,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0,0.053445,0.029972,0.017483,2064.0,0.846278,0.753229,2017,6,17,5,2023.0,2.0,10.0,4.0
3,7,0.233849,0.047517,0.737259,0.2044,0.004972,0.007226,48,480000,400000.0,400000.0,11600.0,2013-11-29,2017-11-10,2013-11-29 00:00:00,13021591,0.120887,0.001374,80.92,0.950724,24.0,20833.33,0.004291,0.156869,473335.0,6,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0,0.021946,0.020635,0.013605,1442.0,0.5568,0.833333,2013,11,29,4,2017.0,11.0,10.0,4.0
4,8,0.233849,0.952483,0.737259,0.2044,0.006555,0.013064,44,619265,440000.0,440000.0,15000.0,2011-12-08,2015-07-05,2011-12-08 00:00:00,3291320,0.197987,0.000314,71.05,0.950724,56.0,27313.67,0.005267,0.039603,495442.0,5,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0,0.03977,0.018354,0.004296,1305.0,0.549176,0.71052,2011,12,8,3,2015.0,7.0,5.0,6.0


In [21]:
folds = StratifiedKFold(5, shuffle = True, random_state = 2)
folds = [(x,y) for x,y in folds.split(train,train[target])]

In [22]:
lgbm_space = {
    "n_estimators": 5000,
    "num_leaves": hp.quniform("num_leaves", 32, 128, 32),
    "min_child_weight": hp.quniform("min_child_weight", 10, 100, 20),
    "subsample": hp.quniform("subsample", 0.5, 1, 0.1),
    "colsample_bytree": hp.quniform("colsample_bytree", 0.5, 1, 0.1),
    "subsample_freq": 5,
    "objective": "multiclass",
    "boosting_type": "gbdt",
    "learning_rate": 0.1,
    "n_jobs": -1,
}

In [23]:
est = Estimator(model = LGBMClassifier(),validation_scheme=folds,early_stopping_rounds=100,n_jobs=-1)

In [22]:
%%time
hyp= HyperOptModelSelection(model=est,space=lgbm_space,max_evals=20,is_maximize=True,log_file_path='hyp.txt')

CPU times: user 167 µs, sys: 25 µs, total: 192 µs
Wall time: 147 µs


In [23]:
%%time
hyp.fit(train[use_cols].values,train[target].values)

Starting HyperOpt 20 Evals with Dataset of Shape ((128655, 140),(128655,))


  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]


Iteration: 1, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 100.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.386726	valid_1's multi_logloss: 0.339749
[200]	valid_0's multi_logloss: 0.38297	valid_1's multi_logloss: 0.298577
[300]	valid_0's multi_logloss: 0.383247	valid_1's multi_logloss: 0.267356
Early stopping, best iteration is:                    
[247]	valid_0's multi_logloss: 0.382912	valid_1's multi_logloss: 0.283153
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.385503	valid_1's multi_logloss: 0.339835
[200]	valid_0's multi_logloss: 0.381962	valid_1's multi_logloss: 0.298276
[300]	valid_0's multi_logloss: 0.382253	valid_1's multi_logloss: 0.266993
Early stopping, best iteration is:                    
[249]	valid_0's multi_logloss: 0.381808	valid_1's multi_logloss: 0.282088
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.382665	valid_1's multi_logloss: 0.340781
[200]	valid_0's multi_logloss: 0.378551

Score - 0.5861428785530058, Std - 0.009641915924062758, Eval Score - 0.5861428785530058
Score across folds - [0.5733636017570625, 0.5831165768629223, 0.6016444891498589, 0.5811949523635097, 0.5913947726316756].


  5%|▌         | 1/20 [02:18<43:50, 138.47s/trial, best loss: 0.41385712144699416]


Iteration: 2, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376155	valid_1's multi_logloss: 0.28877          
[200]	valid_0's multi_logloss: 0.375224	valid_1's multi_logloss: 0.225423         
Early stopping, best iteration is:                                                
[132]	valid_0's multi_logloss: 0.374725	valid_1's multi_logloss: 0.265511
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.375302	valid_1's multi_logloss: 0.289366         
[200]	valid_0's multi_logloss: 0.374919	valid_1's multi_logloss: 0.226122         
Early stopping, best iteration is:                                                
[159]	valid_0's multi_logloss: 0.374178	valid_1's multi_logloss: 0.249097
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.370718	valid_1's multi_logloss: 0.290128         
[200]	valid_0's multi_

Score - 0.5934086935109631, Std - 0.0074537398690435065, Eval Score - 0.5934086935109631
Score across folds - [0.5890921633398474, 0.5863774012851829, 0.6026281144438232, 0.5866480326352251, 0.6022977558507366].


 10%|█         | 2/20 [04:30<40:56, 136.49s/trial, best loss: 0.40659130648903685]


Iteration: 3, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.384207	valid_1's multi_logloss: 0.327608         
[200]	valid_0's multi_logloss: 0.382115	valid_1's multi_logloss: 0.280778         
[300]	valid_0's multi_logloss: 0.383421	valid_1's multi_logloss: 0.245457         
Early stopping, best iteration is:                                                
[206]	valid_0's multi_logloss: 0.381985	valid_1's multi_logloss: 0.278331
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.382623	valid_1's multi_logloss: 0.327971         
[200]	valid_0's multi_logloss: 0.380609	valid_1's multi_logloss: 0.280399         
Early stopping, best iteration is:                                                
[178]	valid_0's multi_logloss: 0.380237	valid_1's multi_logloss: 0.289305
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_

Score - 0.587433732117594, Std - 0.005942636941413724, Eval Score - 0.587433732117594
Score across folds - [0.5800603093782295, 0.5864147386636953, 0.5940559566869253, 0.5821646297244548, 0.5944730261346648].


 15%|█▌        | 3/20 [07:01<39:57, 141.02s/trial, best loss: 0.40659130648903685]


Iteration: 4, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 96, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.380549	valid_1's multi_logloss: 0.310853         
[200]	valid_0's multi_logloss: 0.37921	valid_1's multi_logloss: 0.255563          
Early stopping, best iteration is:                                                
[139]	valid_0's multi_logloss: 0.37879	valid_1's multi_logloss: 0.286768
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.378204	valid_1's multi_logloss: 0.311192         
[200]	valid_0's multi_logloss: 0.375839	valid_1's multi_logloss: 0.255319         
Early stopping, best iteration is:                                                
[197]	valid_0's multi_logloss: 0.375777	valid_1's multi_logloss: 0.256672
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376021	valid_1's multi_logloss: 0.311601         
[200]	valid_0's multi_l

Score - 0.5901552373192429, Std - 0.008764932554550131, Eval Score - 0.5901552373192429
Score across folds - [0.5774967211588706, 0.5911654009750167, 0.6020862182263611, 0.5836706784190043, 0.5963571678169612].


 20%|██        | 4/20 [09:58<40:29, 151.83s/trial, best loss: 0.40659130648903685]


Iteration: 5, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 60.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.9, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.373933	valid_1's multi_logloss: 0.296102         
[200]	valid_0's multi_logloss: 0.372747	valid_1's multi_logloss: 0.236445         
Early stopping, best iteration is:                                                
[169]	valid_0's multi_logloss: 0.372205	valid_1's multi_logloss: 0.252475
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372891	valid_1's multi_logloss: 0.296483         
[200]	valid_0's multi_logloss: 0.371393	valid_1's multi_logloss: 0.237762         
Early stopping, best iteration is:                                                
[179]	valid_0's multi_logloss: 0.370849	valid_1's multi_logloss: 0.248321
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.370681	valid_1's multi_logloss: 0.29723          
[200]	valid_0's multi_

Score - 0.5975204981210367, Std - 0.006590883958804602, Eval Score - 0.5975204981210367
Score across folds - [0.5919303598940734, 0.5924032288684753, 0.600708406920908, 0.5934915224663787, 0.6090689724553481].


 25%|██▌       | 5/20 [12:53<39:39, 158.61s/trial, best loss: 0.4024795018789633] 


Iteration: 6, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 20.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 96, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.374939	valid_1's multi_logloss: 0.231824        
Early stopping, best iteration is:                                               
[91]	valid_0's multi_logloss: 0.374625	valid_1's multi_logloss: 0.241943
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.37491	valid_1's multi_logloss: 0.232585         
Early stopping, best iteration is:                                               
[97]	valid_0's multi_logloss: 0.374858	valid_1's multi_logloss: 0.235728
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.371208	valid_1's multi_logloss: 0.232744        
[200]	valid_0's multi_logloss: 0.373642	valid_1's multi_logloss: 0.152828        
Early stopping, best iteration is:                                               
[121]	valid_0's multi_logloss: 0.3

Score - 0.592234992204406, Std - 0.006930632425256945, Eval Score - 0.592234992204406
Score across folds - [0.5890477535850834, 0.5826479483455685, 0.6009397473219218, 0.5890714320694534, 0.599468079700003].


 30%|███       | 6/20 [15:11<35:33, 152.37s/trial, best loss: 0.4024795018789633]


Iteration: 7, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.8, 'learning_rate': 0.1, 'min_child_weight': 20.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 96, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.377187	valid_1's multi_logloss: 0.238005        
Early stopping, best iteration is:                                               
[93]	valid_0's multi_logloss: 0.376851	valid_1's multi_logloss: 0.245521
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.373914	valid_1's multi_logloss: 0.237851        
Early stopping, best iteration is:                                               
[93]	valid_0's multi_logloss: 0.373683	valid_1's multi_logloss: 0.245334
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.373172	valid_1's multi_logloss: 0.238435        
Early stopping, best iteration is:                                               
[96]	valid_0's multi_logloss: 0.37306	valid_1's multi_logloss: 0.242624
Training until validation scores don't impro

Score - 0.5906542885912521, Std - 0.004784456042562038, Eval Score - 0.5906542885912521
Score across folds - [0.5845588948551173, 0.5864980211541262, 0.593676972616937, 0.5907501808334471, 0.597787373496633].


 35%|███▌      | 7/20 [17:36<32:33, 150.25s/trial, best loss: 0.4024795018789633]


Iteration: 8, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.5, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 96, 'objective': 'multiclass', 'subsample': 0.8, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.380449	valid_1's multi_logloss: 0.310209        
[200]	valid_0's multi_logloss: 0.377848	valid_1's multi_logloss: 0.254221        
Early stopping, best iteration is:                                               
[147]	valid_0's multi_logloss: 0.377533	valid_1's multi_logloss: 0.281166
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.378835	valid_1's multi_logloss: 0.310124        
[200]	valid_0's multi_logloss: 0.376322	valid_1's multi_logloss: 0.254129        
Early stopping, best iteration is:                                               
[184]	valid_0's multi_logloss: 0.375938	valid_1's multi_logloss: 0.261833
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.376185	valid_1's multi_logloss: 0.311569        
[200]	valid_0's multi_logloss: 0

Score - 0.5920911370626133, Std - 0.007171536526485912, Eval Score - 0.5920911370626133
Score across folds - [0.5836803085459774, 0.5881290263595842, 0.6002946161158622, 0.5872938232787155, 0.601057911012927].


 40%|████      | 8/20 [19:44<28:41, 143.44s/trial, best loss: 0.4024795018789633]


Iteration: 9, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 32, 'objective': 'multiclass', 'subsample': 0.8, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.379646	valid_1's multi_logloss: 0.331163        
[200]	valid_0's multi_logloss: 0.376845	valid_1's multi_logloss: 0.287974        
[300]	valid_0's multi_logloss: 0.37684	valid_1's multi_logloss: 0.255277         
Early stopping, best iteration is:                                               
[263]	valid_0's multi_logloss: 0.376465	valid_1's multi_logloss: 0.266457
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.379507	valid_1's multi_logloss: 0.330613        
[200]	valid_0's multi_logloss: 0.375972	valid_1's multi_logloss: 0.287655        
[300]	valid_0's multi_logloss: 0.376466	valid_1's multi_logloss: 0.255585        
Early stopping, best iteration is:                                               
[236]	valid_0's multi_logloss: 0.375616	valid_1's multi_logloss: 0.275183
Training until validation scores

Score - 0.5901676522722545, Std - 0.008858429854965042, Eval Score - 0.5901676522722545
Score across folds - [0.5794565840982624, 0.5860476932747287, 0.6027214541648546, 0.5842187041725254, 0.5983938256509015].


 45%|████▌     | 9/20 [22:53<28:51, 157.37s/trial, best loss: 0.4024795018789633]


Iteration: 10, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 1.0, 'learning_rate': 0.1, 'min_child_weight': 60.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.8, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.375622	valid_1's multi_logloss: 0.296552        
[200]	valid_0's multi_logloss: 0.374915	valid_1's multi_logloss: 0.236164        
Early stopping, best iteration is:                                               
[149]	valid_0's multi_logloss: 0.374394	valid_1's multi_logloss: 0.264052
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.375122	valid_1's multi_logloss: 0.297235        
[200]	valid_0's multi_logloss: 0.374189	valid_1's multi_logloss: 0.236898        
Early stopping, best iteration is:                                               
[154]	valid_0's multi_logloss: 0.373839	valid_1's multi_logloss: 0.261869
Training until validation scores don't improve for 100 rounds                    
[100]	valid_0's multi_logloss: 0.372552	valid_1's multi_logloss: 0.297829        
[200]	valid_0's multi_logloss: 0

Score - 0.5923559569889054, Std - 0.004101502503883834, Eval Score - 0.5923559569889054
Score across folds - [0.5878545001259522, 0.5902632157976256, 0.5915353721546995, 0.5921069566266522, 0.6000197402395975].


 50%|█████     | 10/20 [25:48<27:06, 162.64s/trial, best loss: 0.4024795018789633]


Iteration: 11, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 20.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.9, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372006	valid_1's multi_logloss: 0.246023         
[200]	valid_0's multi_logloss: 0.373798	valid_1's multi_logloss: 0.1704           
Early stopping, best iteration is:                                                
[117]	valid_0's multi_logloss: 0.371385	valid_1's multi_logloss: 0.230138
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.370514	valid_1's multi_logloss: 0.245671         
[200]	valid_0's multi_logloss: 0.373475	valid_1's multi_logloss: 0.17124          
Early stopping, best iteration is:                                                
[102]	valid_0's multi_logloss: 0.370341	valid_1's multi_logloss: 0.243755
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.367966	valid_1's multi_logloss: 0.245844         
[200]	valid_0's multi_

Score - 0.5978993155627059, Std - 0.006392028341682928, Eval Score - 0.5978993155627059
Score across folds - [0.5939514494350895, 0.5949751631043727, 0.6064259764156568, 0.589789105193946, 0.6043548836644648].


 55%|█████▌    | 11/20 [28:44<24:58, 166.48s/trial, best loss: 0.4021006844372941]


Iteration: 12, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 32, 'objective': 'multiclass', 'subsample': 0.9, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.37871	valid_1's multi_logloss: 0.326492          
[200]	valid_0's multi_logloss: 0.374979	valid_1's multi_logloss: 0.281606         
[300]	valid_0's multi_logloss: 0.375787	valid_1's multi_logloss: 0.247651         
Early stopping, best iteration is:                                                
[220]	valid_0's multi_logloss: 0.374891	valid_1's multi_logloss: 0.274171
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.377675	valid_1's multi_logloss: 0.326875         
[200]	valid_0's multi_logloss: 0.374198	valid_1's multi_logloss: 0.282003         
[300]	valid_0's multi_logloss: 0.374682	valid_1's multi_logloss: 0.248224         
Early stopping, best iteration is:                                                
[233]	valid_0's multi_logloss: 0.374059	valid_1's multi_logloss: 0.269965
Training until validat

Score - 0.5942602562565229, Std - 0.009456676895162917, Eval Score - 0.5942602562565229
Score across folds - [0.5860352488979778, 0.5891661147816512, 0.6075857834402489, 0.5848769559141829, 0.6036371782485537].


 60%|██████    | 12/20 [31:54<23:09, 173.69s/trial, best loss: 0.4021006844372941]


Iteration: 13, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.375009	valid_1's multi_logloss: 0.286721         
[200]	valid_0's multi_logloss: 0.374698	valid_1's multi_logloss: 0.222826         
Early stopping, best iteration is:                                                
[149]	valid_0's multi_logloss: 0.373764	valid_1's multi_logloss: 0.252214
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.374509	valid_1's multi_logloss: 0.28708          
[200]	valid_0's multi_logloss: 0.374411	valid_1's multi_logloss: 0.223149         
Early stopping, best iteration is:                                                
[157]	valid_0's multi_logloss: 0.373998	valid_1's multi_logloss: 0.247446
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.371283	valid_1's multi_logloss: 0.287399         
[200]	valid_0's multi_

Score - 0.5931968869688692, Std - 0.006855976504024081, Eval Score - 0.5931968869688692
Score across folds - [0.5894559928759138, 0.5869348129314694, 0.5999163117920657, 0.5867370319038665, 0.6029402853410305].


 65%|██████▌   | 13/20 [34:14<19:05, 163.60s/trial, best loss: 0.4021006844372941]


Iteration: 14, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 60.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 1.0, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.371958	valid_1's multi_logloss: 0.289158         
[200]	valid_0's multi_logloss: 0.371847	valid_1's multi_logloss: 0.228452         
Early stopping, best iteration is:                                                
[137]	valid_0's multi_logloss: 0.370991	valid_1's multi_logloss: 0.263734
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.371714	valid_1's multi_logloss: 0.289593         
[200]	valid_0's multi_logloss: 0.370429	valid_1's multi_logloss: 0.229263         
Early stopping, best iteration is:                                                
[173]	valid_0's multi_logloss: 0.36977	valid_1's multi_logloss: 0.243109
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.368888	valid_1's multi_logloss: 0.290371         
[200]	valid_0's multi_l

Score - 0.5981019888921185, Std - 0.006013429967604631, Eval Score - 0.5981019888921185
Score across folds - [0.5952248997493763, 0.5906973675737738, 0.6050445238654066, 0.5941179582040507, 0.6054251950679859].


 70%|███████   | 14/20 [37:00<16:26, 164.34s/trial, best loss: 0.4018980111078815]


Iteration: 15, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 1.0, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 32, 'objective': 'multiclass', 'subsample': 0.8, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376403	valid_1's multi_logloss: 0.312054         
[200]	valid_0's multi_logloss: 0.374739	valid_1's multi_logloss: 0.258948         
Early stopping, best iteration is:                                                
[139]	valid_0's multi_logloss: 0.374486	valid_1's multi_logloss: 0.28891
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.375094	valid_1's multi_logloss: 0.311663         
[200]	valid_0's multi_logloss: 0.373284	valid_1's multi_logloss: 0.259282         
Early stopping, best iteration is:                                                
[163]	valid_0's multi_logloss: 0.372962	valid_1's multi_logloss: 0.276152
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372106	valid_1's multi_logloss: 0.312406         
[200]	valid_0's multi_l

Score - 0.5918779095193496, Std - 0.006068199785616109, Eval Score - 0.5918779095193496
Score across folds - [0.5817496426736428, 0.5907982887124495, 0.5995262114775864, 0.5908705869397827, 0.5964448177932858].


 75%|███████▌  | 15/20 [39:47<13:44, 164.87s/trial, best loss: 0.4018980111078815]


Iteration: 16, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 20.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.8, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372059	valid_1's multi_logloss: 0.253638         
[200]	valid_0's multi_logloss: 0.374586	valid_1's multi_logloss: 0.179319         
Early stopping, best iteration is:                                                
[118]	valid_0's multi_logloss: 0.371477	valid_1's multi_logloss: 0.237074
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.370999	valid_1's multi_logloss: 0.254206         
[200]	valid_0's multi_logloss: 0.372914	valid_1's multi_logloss: 0.180079         
Early stopping, best iteration is:                                                
[122]	valid_0's multi_logloss: 0.370714	valid_1's multi_logloss: 0.234483
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.36871	valid_1's multi_logloss: 0.254818          
[200]	valid_0's multi_

Score - 0.5959853680553256, Std - 0.006036047046364535, Eval Score - 0.5959853680553256
Score across folds - [0.5976629956735727, 0.5866762239400318, 0.6046610586917566, 0.5925579442315653, 0.598368617739702].


 80%|████████  | 16/20 [42:03<10:25, 156.38s/trial, best loss: 0.4018980111078815]


Iteration: 17, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.8, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 96, 'objective': 'multiclass', 'subsample': 0.5, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.381207	valid_1's multi_logloss: 0.292883         
[200]	valid_0's multi_logloss: 0.382349	valid_1's multi_logloss: 0.229586         
Early stopping, best iteration is:                                                
[137]	valid_0's multi_logloss: 0.380795	valid_1's multi_logloss: 0.26645
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.380915	valid_1's multi_logloss: 0.293283         
[200]	valid_0's multi_logloss: 0.382113	valid_1's multi_logloss: 0.229817         
Early stopping, best iteration is:                                                
[139]	valid_0's multi_logloss: 0.380604	valid_1's multi_logloss: 0.26548
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376475	valid_1's multi_logloss: 0.293821         
[200]	valid_0's multi_lo

Score - 0.5869974756827933, Std - 0.005623500094808942, Eval Score - 0.5869974756827933
Score across folds - [0.5867918847510678, 0.5802845780266213, 0.592741360634057, 0.5812867046679658, 0.5938828503342546].


 85%|████████▌ | 17/20 [44:11<07:23, 147.76s/trial, best loss: 0.4018980111078815]


Iteration: 18, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 60.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 128, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.382123	valid_1's multi_logloss: 0.301004         
[200]	valid_0's multi_logloss: 0.381983	valid_1's multi_logloss: 0.242373         
Early stopping, best iteration is:                                                
[154]	valid_0's multi_logloss: 0.381183	valid_1's multi_logloss: 0.266457
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.379451	valid_1's multi_logloss: 0.301146         
[200]	valid_0's multi_logloss: 0.378837	valid_1's multi_logloss: 0.241963         
Early stopping, best iteration is:                                                
[158]	valid_0's multi_logloss: 0.378492	valid_1's multi_logloss: 0.263936
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.375665	valid_1's multi_logloss: 0.301859         
[200]	valid_0's multi_

Score - 0.588483114352507, Std - 0.007144516200621854, Eval Score - 0.588483114352507
Score across folds - [0.5793835420576006, 0.5846066681231206, 0.5961754289949618, 0.5846341635899394, 0.5976157689969132].


 90%|█████████ | 18/20 [46:23<04:46, 143.01s/trial, best loss: 0.4018980111078815]


Iteration: 19, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 20.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 32, 'objective': 'multiclass', 'subsample': 0.9, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.373349	valid_1's multi_logloss: 0.301737         
[200]	valid_0's multi_logloss: 0.370883	valid_1's multi_logloss: 0.246579         
Early stopping, best iteration is:                                                
[193]	valid_0's multi_logloss: 0.370695	valid_1's multi_logloss: 0.249835
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372799	valid_1's multi_logloss: 0.301787         
[200]	valid_0's multi_logloss: 0.371719	valid_1's multi_logloss: 0.247637         
Early stopping, best iteration is:                                                
[183]	valid_0's multi_logloss: 0.371384	valid_1's multi_logloss: 0.255601
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.369558	valid_1's multi_logloss: 0.302281         
[200]	valid_0's multi_

Score - 0.5946369395506742, Std - 0.004550978037876724, Eval Score - 0.5946369395506742
Score across folds - [0.5942837972344568, 0.5888746520336813, 0.5986114015251692, 0.5906015386933641, 0.6008133082666998].


 95%|█████████▌| 19/20 [49:20<02:33, 153.26s/trial, best loss: 0.4018980111078815]


Iteration: 20, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 1.0, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376324	valid_1's multi_logloss: 0.283561         
[200]	valid_0's multi_logloss: 0.377764	valid_1's multi_logloss: 0.217724         
Early stopping, best iteration is:                                                
[141]	valid_0's multi_logloss: 0.375156	valid_1's multi_logloss: 0.253233
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.37476	valid_1's multi_logloss: 0.283877          
[200]	valid_0's multi_logloss: 0.375443	valid_1's multi_logloss: 0.217994         
Early stopping, best iteration is:                                                
[150]	valid_0's multi_logloss: 0.374262	valid_1's multi_logloss: 0.247271
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.372729	valid_1's multi_logloss: 0.284171         
[200]	valid_0's multi_

Score - 0.592015451502973, Std - 0.0054581528993644665, Eval Score - 0.592015451502973
Score across folds - [0.5840151198770185, 0.592478622232907, 0.5954325391326148, 0.588393351989207, 0.5997576242831176].


100%|██████████| 20/20 [52:10<00:00, 156.54s/trial, best loss: 0.4018980111078815]

Best Score- 0.5981019888921185, Best Params- {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 60.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 1.0, 'subsample_freq': 5}



CPU times: user 10h 17min 53s, sys: 2min 39s, total: 10h 20min 32s
Wall time: 52min 10s


<hyperopt_multiclass.HyperOptModelSelection at 0x7fe00f83bdd0>

In [25]:
est = hyp.best_estimator

In [28]:
est.model

LGBMClassifier(colsample_bytree=0.7000000000000001, min_child_weight=60.0,
               n_estimators=5000, num_leaves=64, objective='multiclass',
               subsample_freq=5)

In [24]:

est = Estimator(model = LGBMClassifier(colsample_bytree=0.7000000000000001, min_child_weight=60.0,
               n_estimators=5000, num_leaves=64, objective='multiclass',
               subsample_freq=5),validation_scheme=folds,early_stopping_rounds=100,n_jobs=-1)

In [25]:
temp = est.fit_transform(train[use_cols].values,train[target].values)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.368877	valid_1's multi_logloss: 0.285773
[200]	valid_0's multi_logloss: 0.368643	valid_1's multi_logloss: 0.226002
Early stopping, best iteration is:
[133]	valid_0's multi_logloss: 0.367803	valid_1's multi_logloss: 0.263242
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.366863	valid_1's multi_logloss: 0.286554
[200]	valid_0's multi_logloss: 0.366283	valid_1's multi_logloss: 0.226743
Early stopping, best iteration is:
[142]	valid_0's multi_logloss: 0.365394	valid_1's multi_logloss: 0.258266
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.363922	valid_1's multi_logloss: 0.28657
[200]	valid_0's multi_logloss: 0.363344	valid_1's multi_logloss: 0.226571
Early stopping, best iteration is:
[152]	valid_0's multi_logloss: 0.362625	valid_1's multi_logloss: 0.252403
Training until validation scores don't impro

In [26]:
est.cv_scores,est.overall_cv_score

([0.5912962042616193,
  0.5957715842452403,
  0.6107143305149728,
  0.5953119760766492,
  0.6069270465163694],
 0.6002773353920517)

In [25]:
est.cv_scores,est.overall_cv_score

([0.5934327991858405,
  0.5950261041479538,
  0.6066884451930313,
  0.591713788937269,
  0.6098318741845684],
 0.5995219417799544)

In [27]:
est.feature_importance_df(use_cols)

Unnamed: 0,column,feature_importance,rank
48,max_tenor,0.026789,1
21,MonthlyIncome,0.026525,2
133,cal_tenor,0.024521,3
49,min_tenor,0.021925,4
99,total_sanctioned_amount_Gold Loan,0.020212,5
47,median_tenor,0.019565,6
134,emi_sal_ratio,0.019018,7
11,City,0.018861,8
12,State,0.018692,9
6,SupplierID,0.017934,10


In [28]:
pd.Series(temp.argmax(axis=1)).value_counts(1)

0    0.901745
6    0.038856
5    0.016027
3    0.014893
4    0.012895
2    0.010835
1    0.004749
dtype: float64

In [29]:
f1_score(train[target],pd.Series(temp.argmax(axis=1)),average='macro')

0.6002773353920517

In [30]:
test_preds = [est.predict_proba(test[use_cols].values) for est in est.fitted_models]

In [31]:
test['Top-up Month'] = np.sum(test_preds,axis=0).argmax(axis=1)

In [62]:
test['Top-up Month'].value_counts(1)

0    0.927840
6    0.038725
5    0.013428
4    0.006850
2    0.005629
3    0.004747
1    0.002781
Name: Top-up Month, dtype: float64

In [32]:
test['Top-up Month'].value_counts(1)

0    0.924788
6    0.040624
5    0.013021
4    0.007189
3    0.006307
2    0.005290
1    0.002781
Name: Top-up Month, dtype: float64

In [33]:
sub = test[['ID','Top-up Month']]

In [34]:
sub['Top-up Month'] = sub['Top-up Month'].map(reverse_map)

In [35]:
sub.to_csv('LGBM_CV0.60.csv',index=False)

In [79]:
# est.transform(test[use_cols].values)

In [61]:
# est.feature_importance_df(use_cols)

CPU times: user 312 µs, sys: 1 µs, total: 313 µs
Wall time: 221 µs


In [None]:
f1_score(average='macro')

Starting HyperOpt 50 Evals with Dataset of Shape ((128655, 140),(128655,))


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]


Iteration: 1, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 100.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.386726	valid_1's multi_logloss: 0.339749
[200]	valid_0's multi_logloss: 0.38297	valid_1's multi_logloss: 0.298577
[300]	valid_0's multi_logloss: 0.383247	valid_1's multi_logloss: 0.267356
Early stopping, best iteration is:                    
[247]	valid_0's multi_logloss: 0.382912	valid_1's multi_logloss: 0.283153
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.385503	valid_1's multi_logloss: 0.339835
[200]	valid_0's multi_logloss: 0.381962	valid_1's multi_logloss: 0.298276
[300]	valid_0's multi_logloss: 0.382253	valid_1's multi_logloss: 0.266993
Early stopping, best iteration is:                    
[249]	valid_0's multi_logloss: 0.381808	valid_1's multi_logloss: 0.282088
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.382665	valid_1's multi_logloss: 0.340781
[200]	valid_0's multi_logloss: 0.378551

Score - 0.8875597528273289, Std - 0.0014793520349300454, Eval Score - 0.8875597528273289
Score across folds - [0.8853134351560374, 0.8879561618281451, 0.8896661614395087, 0.8866347984920913, 0.888228207220862].


  2%|▏         | 1/50 [02:34<2:05:59, 154.27s/trial, best loss: 0.8875597528273289]


Iteration: 2, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.376155	valid_1's multi_logloss: 0.28877           
[200]	valid_0's multi_logloss: 0.375224	valid_1's multi_logloss: 0.225423          
Early stopping, best iteration is:                                                 
[132]	valid_0's multi_logloss: 0.374725	valid_1's multi_logloss: 0.265511
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.375302	valid_1's multi_logloss: 0.289366          
[200]	valid_0's multi_logloss: 0.374919	valid_1's multi_logloss: 0.226122          
Early stopping, best iteration is:                                                 
[159]	valid_0's multi_logloss: 0.374178	valid_1's multi_logloss: 0.249097
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.370718	valid_1's multi_logloss: 0.290128          
[200]	valid_

Score - 0.889596206909953, Std - 0.0010380008568623045, Eval Score - 0.889596206909953
Score across folds - [0.888461388986048, 0.8898993432046948, 0.8908709338929696, 0.888305934475924, 0.8904434339901286].


  4%|▍         | 2/50 [04:50<1:59:07, 148.90s/trial, best loss: 0.8875597528273289]


Iteration: 3, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.384207	valid_1's multi_logloss: 0.327608          
[200]	valid_0's multi_logloss: 0.382115	valid_1's multi_logloss: 0.280778          
[300]	valid_0's multi_logloss: 0.383421	valid_1's multi_logloss: 0.245457          
Early stopping, best iteration is:                                                 
[206]	valid_0's multi_logloss: 0.381985	valid_1's multi_logloss: 0.278331
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.382623	valid_1's multi_logloss: 0.327971          
[200]	valid_0's multi_logloss: 0.380609	valid_1's multi_logloss: 0.280399          
Early stopping, best iteration is:                                                 
[178]	valid_0's multi_logloss: 0.380237	valid_1's multi_logloss: 0.289305
Training until validation scores don't improve for 100 rounds                      
  4%|▍      

KeyboardInterrupt: 