In [1]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_columns=1000
import math
import re
import datetime as dt
from bureau_fc import get_bureau_feats
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
import copy
from sklearn.metrics import f1_score

In [2]:
import sys
sys.path.append('ml_lib/')
from encoding import FreqeuncyEncoding
from custom_classifier_mutliclass import Estimator
from hyperopt_multiclass import HyperOptModelSelection
from hyperopt import hp
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier

In [3]:
DATA_DIR = '/home/sahil/data/av/AV_LTFS3/data/'

In [4]:
train = pd.read_csv(DATA_DIR+'train_Data.csv')
test = pd.read_csv(DATA_DIR+'test_Data.csv')
bur_df = pd.read_pickle('bureau_future_feats.pkl')

In [5]:
target_map = {'No Top-up Service': 0,
 '12-18 Months': 1,
 '18-24 Months': 2,
 '24-30 Months': 3,
 '30-36 Months': 4,
 '36-48 Months': 5,
 ' > 48 Months': 6,
 }
train['Top-up Month'] = train['Top-up Month'].map(target_map)

In [6]:
reverse_map = {v:k for k,v in target_map.items()}

In [7]:
reverse_map

{0: 'No Top-up Service',
 1: '12-18 Months',
 2: '18-24 Months',
 3: '24-30 Months',
 4: '30-36 Months',
 5: '36-48 Months',
 6: ' > 48 Months'}

In [8]:
train['Top-up Month'].value_counts()

0    106677
6      8366
5      3656
3      3492
4      3062
2      2368
1      1034
Name: Top-up Month, dtype: int64

In [9]:
df = pd.concat((train,test),axis=0)
df.shape

(143400, 26)

In [10]:
bur_df.shape

(139964, 112)

In [11]:
df = df.merge(bur_df,on='ID',how='left')
df['DisbursalDate'] = pd.to_datetime(df['DisbursalDate'])
df['MaturityDAte'] = pd.to_datetime(df['MaturityDAte'])

In [12]:
df.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans
0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,275000.0,275000.0,24000.0,2012-02-10,2016-01-15,2012-02-10 00:00:00,4022465,1568.0,21946,61.11,M,49.0,35833.33,RAISEN,MADHYA PRADESH,464993.0,6.0,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0
1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,350000.0,350000.0,10500.0,2012-03-31,2016-02-15,2012-03-31 00:00:00,4681175,1062.0,34802,70.0,M,23.0,666.67,SEHORE,MADHYA PRADESH,466001.0,0.0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0
2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,519728.0,519728.0,38300.0,2017-06-17,2023-02-10,2017-06-17 00:00:00,25328146,1060.0,127335,69.77,M,39.0,45257.0,BHOPAL,MADHYA PRADESH,462030.0,1.0,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0
3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,400000.0,400000.0,11600.0,2013-11-29,2017-11-10,2013-11-29 00:00:00,13021591,1060.0,25094,80.92,M,24.0,20833.33,ASHOK NAGAR,MADHYA PRADESH,473335.0,6.0,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0
4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,440000.0,440000.0,15000.0,2011-12-08,2015-07-05,2011-12-08 00:00:00,3291320,1046.0,21853,71.05,M,56.0,27313.67,BILASPUR,CHATTISGARH,495442.0,5.0,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0


In [13]:
df['pin1'] = df['ZiPCODE'].fillna('000000').astype('str').str[:2]
df['pin2'] = df['ZiPCODE'].fillna('000000').astype('str').str[2:4]
df['pin3'] = df['ZiPCODE'].fillna('000000').astype('str').str[4:6:]
df['cal_tenor'] = (df['MaturityDAte'] - df['DisbursalDate']).dt.days
df['emi_sal_ratio'] = df['EMI']/df['MonthlyIncome']
df['ltv2'] = df['DisbursalAmount']/df['AssetCost']

In [14]:
cat_cols = ['Frequency', 'InstlmentMode', 'LoanStatus', 'PaymentMode', 'Area','ManufacturerID','SupplierID','pin1','pin2','pin3',
       'DisbursalDate', 'MaturityDAte', 'AuthDate', 'SEX', 'City', 'State','BranchID']
target = 'Top-up Month'
drop_cols = ['ID','DisbursalDate','MaturityDAte','AuthDate','AssetID']
num_cols = (df.columns[~df.columns.isin([target]+drop_cols+cat_cols)].tolist())
use_cols = cat_cols+num_cols

In [15]:
fe = FreqeuncyEncoding(categorical_columns=cat_cols,normalize=True,return_df=True)
df = fe.fit_transform(df)

In [16]:
df.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans,pin1,pin2,pin3,cal_tenor,emi_sal_ratio,ltv2
0,1,0.233849,0.952483,0.737259,0.076199,0.004505,0.0,48,450000,275000.0,275000.0,24000.0,0.000314,0.001562,0.000286,4022465,0.122059,0.000404,61.11,0.950724,49.0,35833.33,0.007817,0.156869,464993.0,6.0,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0,0.053445,0.001876,0.001722,1435.0,0.669768,0.611111
1,2,0.233849,0.047517,0.737259,0.210237,0.004372,0.016303,47,485000,350000.0,350000.0,10500.0,0.002789,0.000858,0.002559,4681175,0.139653,0.000404,70.0,0.950724,23.0,666.67,0.007795,0.156869,466001.0,0.0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0,0.053445,0.013033,0.133229,1416.0,15.749921,0.721649
2,3,0.15553,0.952483,0.262741,0.244582,0.004505,0.0,68,690000,519728.0,519728.0,38300.0,0.001144,0.000837,0.001046,25328146,0.120887,4.9e-05,69.77,0.950724,39.0,45257.0,0.001286,0.156869,462030.0,1.0,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0,0.053445,0.029972,0.017483,2064.0,0.846278,0.753229
3,7,0.233849,0.047517,0.737259,0.2044,0.004972,0.007226,48,480000,400000.0,400000.0,11600.0,0.000809,0.009463,0.000656,13021591,0.120887,0.001374,80.92,0.950724,24.0,20833.33,0.004291,0.156869,473335.0,6.0,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0,0.021946,0.020635,0.013605,1442.0,0.5568,0.833333
4,8,0.233849,0.952483,0.737259,0.2044,0.006555,0.013064,44,619265,440000.0,440000.0,15000.0,0.000209,0.00136,0.000195,3291320,0.197987,0.000314,71.05,0.950724,56.0,27313.67,0.005267,0.039603,495442.0,5.0,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0,0.03977,0.018354,0.004296,1305.0,0.549176,0.71052


In [17]:
train = df[df.ID.isin(train.ID)]
test = df[df.ID.isin(test.ID)]
train = train.sort_values('ID',ascending=True)
test = test.sort_values('ID',ascending=True)
train[target] = train[target].astype('int')

In [18]:
train.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,AssetID,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,individual_accounts,joint_accounts,guarantor_accounts,curr_bal_grtr_0,num_accounts,mean_correctedDISBURSED-AMT/HIGH CREDIT,min_correctedDISBURSED-AMT/HIGH CREDIT,max_correctedDISBURSED-AMT/HIGH CREDIT,sum_correctedDISBURSED-AMT/HIGH CREDIT,mean_correctedCURRENT-BAL,min_correctedCURRENT-BAL,max_correctedCURRENT-BAL,sum_correctedCURRENT-BAL,mean_correctedOVERDUE-AMT,min_correctedOVERDUE-AMT,max_correctedOVERDUE-AMT,sum_correctedOVERDUE-AMT,num_closed_accounts,num_open_accounts,num_delinq_accounts,total_written_off_amount,mean_percent_paid_off,min_percent_paid_off,max_percent_paid_off,overall_percent_paid_off,median_tenor,max_tenor,min_tenor,std_count,ddd_count,xxx_count,late_count,_30_count,_60_count,_90_count,_180_count,total_count,std_count_application_loan,ddd_count_application_loan,xxx_count_application_loan,late_count_application_loan,_30_count_application_loan,_60_count_application_loan,_90_count_application_loan,_180_count_application_loan,total_count_application_loan,mean_dpd_str,min_dpd_str,max_dpd_str,mean_dpd_str_application_loan,min_dpd_str_application_loan,max_dpd_str_application_loan,num_accounts__between_0_and_365_days,total_sanctioned_amount__between_0_and_365_days,total_curr_bal__between_0_and_365_days,overall_percentage_paid_off__between_0_and_365_days,num_accounts__between_365_and_730_days,total_sanctioned_amount__between_365_and_730_days,total_curr_bal__between_365_and_730_days,overall_percentage_paid_off__between_365_and_730_days,num_accounts__between_730_and_1095_days,total_sanctioned_amount__between_730_and_1095_days,total_curr_bal__between_730_and_1095_days,overall_percentage_paid_off__between_730_and_1095_days,num_accounts__between_1095_and_1460_days,total_sanctioned_amount__between_1095_and_1460_days,total_curr_bal__between_1095_and_1460_days,overall_percentage_paid_off__between_1095_and_1460_days,num_accounts__between_1460_and_3650_days,total_sanctioned_amount__between_1460_and_3650_days,total_curr_bal__between_1460_and_3650_days,overall_percentage_paid_off__between_1460_and_3650_days,num_accounts_Tractor Loan,total_sanctioned_amount_Tractor Loan,total_curr_bal_Tractor Loan,overall_percentage_paid_off_Tractor Loan,num_accounts_Gold Loan,total_sanctioned_amount_Gold Loan,total_curr_bal_Gold Loan,overall_percentage_paid_off_Gold Loan,num_accounts_Business Loan Priority Sector Agriculture,total_sanctioned_amount_Business Loan Priority Sector Agriculture,total_curr_bal_Business Loan Priority Sector Agriculture,overall_percentage_paid_off_Business Loan Priority Sector Agriculture,num_accounts_Kisan Credit Card,total_sanctioned_amount_Kisan Credit Card,total_curr_bal_Kisan Credit Card,overall_percentage_paid_off_Kisan Credit Card,num_accounts_Auto Loan (Personal),total_sanctioned_amount_Auto Loan (Personal),total_curr_bal_Auto Loan (Personal),overall_percentage_paid_off_Auto Loan (Personal),num_accounts_Personal Loan,total_sanctioned_amount_Personal Loan,total_curr_bal_Personal Loan,overall_percentage_paid_off_Personal Loan,num_accounts_Other,total_sanctioned_amount_Other,total_curr_bal_Other,overall_percentage_paid_off_Other,num_accounts_Overdraft,total_sanctioned_amount_Overdraft,total_curr_bal_Overdraft,overall_percentage_paid_off_Overdraft,mean_day_start_day_diff_app_vs_other,min_day_start_day_diff_app_vs_other,max_day_start_day_diff_app_vs_other,mean_days_bw_loans,min_days_bw_loans,max_days_bw_loans,sum_days_bw_loans,pin1,pin2,pin3,cal_tenor,emi_sal_ratio,ltv2
0,1,0.233849,0.952483,0.737259,0.076199,0.004505,0.0,48,450000,275000.0,275000.0,24000.0,0.000314,0.001562,0.000286,4022465,0.122059,0.000404,61.11,0.950724,49.0,35833.33,0.007817,0.156869,464993.0,6,6.0,0.0,0.0,1.0,6.0,252058.666667,0.0,500000.0,1512352.0,6312.166667,0.0,37873.0,37873.0,7574.6,0.0,37873.0,37873.0,5.0,0.0,1.0,0.0,0.79721,-0.013948,1.0,0.974958,1472.0,1704.0,109.0,72.0,3.0,0.0,74.0,5.0,1.0,0.0,0.0,149.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,12.919463,0.0,87.0,0.0,0.0,0.0,2.0,775000.0,0.0,1.0,2.0,700000.0,0.0,1.0,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,1.0,0.0,0.0,,1.0,275000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,1200000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,37352.0,37873.0,-0.013948,986.4,355.0,2162.0,432.4,108.0,829.0,2162.0,0.053445,0.001876,0.001722,1435.0,0.669768,0.611111
1,2,0.233849,0.047517,0.737259,0.210237,0.004372,0.016303,47,485000,350000.0,350000.0,10500.0,0.002789,0.000858,0.002559,4681175,0.139653,0.000404,70.0,0.950724,23.0,666.67,0.007795,0.156869,466001.0,0,8.0,0.0,1.0,3.0,9.0,893342.333333,300000.0,3000000.0,8040081.0,58074.555556,0.0,247887.0,522671.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.948397,0.684584,1.0,0.934992,1401.0,2222.0,392.0,152.0,15.0,6.0,31.0,2.0,0.0,0.0,0.0,204.0,5.0,0.0,0.0,6.0,4.0,4.0,4.0,0.0,35.0,2.77451,0.0,47.0,15.722222,0.0,47.0,1.0,350000.0,0.0,1.0,2.0,654176.0,0.0,1.0,1.0,450000.0,0.0,1.0,4.0,5800000.0,274784.0,0.952623,1.0,785905.0,247887.0,0.684584,2.0,704176.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,2150000.0,0.0,1.0,1202.75,456.0,1736.0,217.0,0.0,456.0,1736.0,0.053445,0.013033,0.133229,1416.0,15.749921,0.721649
2,3,0.15553,0.952483,0.262741,0.244582,0.004505,0.0,68,690000,519728.0,519728.0,38300.0,0.001144,0.000837,0.001046,25328146,0.120887,4.9e-05,69.77,0.950724,39.0,45257.0,0.001286,0.156869,462030.0,1,9.0,1.0,1.0,7.0,11.0,195769.454545,8703.0,950000.0,2153464.0,134547.909091,0.0,811839.0,1480027.0,0.0,0.0,0.0,0.0,4.0,7.0,0.0,0.0,0.562115,0.005757,1.0,0.312723,290.5,958.0,39.0,93.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,102.0,3.0,0.0,0.0,5.0,3.0,3.0,3.0,0.0,35.0,0.333333,0.0,15.0,0.0,0.0,0.0,4.0,637655.0,319224.0,0.499378,5.0,1207106.0,862330.0,0.285622,2.0,308703.0,298473.0,0.033139,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,519728.0,307637.0,0.408081,2.0,252000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,487.8,136.0,919.0,91.9,3.0,178.0,919.0,0.053445,0.029972,0.017483,2064.0,0.846278,0.753229
3,7,0.233849,0.047517,0.737259,0.2044,0.004972,0.007226,48,480000,400000.0,400000.0,11600.0,0.000809,0.009463,0.000656,13021591,0.120887,0.001374,80.92,0.950724,24.0,20833.33,0.004291,0.156869,473335.0,6,4.0,0.0,0.0,1.0,4.0,318138.25,100000.0,542553.0,1272553.0,156000.0,0.0,624000.0,624000.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.71247,-0.150118,1.0,0.509647,567.5,1462.0,122.0,79.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,400000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,3.0,872553.0,624000.0,0.284857,3.0,1172553.0,624000.0,0.467828,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,,1682.333333,1522.0,1820.0,606.666667,115.0,1522.0,1820.0,0.021946,0.020635,0.013605,1442.0,0.5568,0.833333
4,8,0.233849,0.952483,0.737259,0.2044,0.006555,0.013064,44,619265,440000.0,440000.0,15000.0,0.000209,0.00136,0.000195,3291320,0.197987,0.000314,71.05,0.950724,56.0,27313.67,0.005267,0.039603,495442.0,5,3.0,1.0,1.0,2.0,5.0,279043.2,100000.0,463975.0,1395216.0,28263.8,0.0,134330.0,141319.0,11728.333333,0.0,35185.0,35185.0,2.0,1.0,1.0,0.0,0.935107,0.71048,1.0,0.898712,1331.0,1567.0,799.0,66.0,8.0,48.0,19.0,16.0,12.0,5.0,0.0,141.0,7.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,35.0,9.425532,0.0,210.0,0.0,0.0,0.0,1.0,440000.0,0.0,1.0,0.0,0.0,0.0,,1.0,100000.0,0.0,1.0,2.0,663975.0,141319.0,0.787162,1.0,191241.0,0.0,1.0,2.0,903975.0,134330.0,0.851401,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,2.0,300000.0,6989.0,0.976703,0.0,0.0,0.0,,0.0,0.0,0.0,,1425.0,1051.0,1850.0,462.5,19.0,1051.0,1850.0,0.03977,0.018354,0.004296,1305.0,0.549176,0.71052


In [19]:
folds = StratifiedKFold(5, shuffle = True, random_state = 2)
folds = [(x,y) for x,y in folds.split(train,train[target])]

In [20]:
lgbm_space = {
    "n_estimators": 5000,
    "num_leaves": hp.quniform("num_leaves", 32, 128, 32),
    "min_child_weight": hp.quniform("min_child_weight", 10, 100, 20),
    "subsample": hp.quniform("subsample", 0.5, 1, 0.1),
    "colsample_bytree": hp.quniform("colsample_bytree", 0.5, 1, 0.1),
    "subsample_freq": 5,
    "objective": "multiclass",
    "boosting_type": "gbdt",
    "learning_rate": 0.1,
    "n_jobs": -1,
}

In [21]:
est = Estimator(model = LGBMClassifier(),validation_scheme=folds,early_stopping_rounds=100,n_jobs=-1)

In [22]:
%%time
hyp= HyperOptModelSelection(model=est,space=lgbm_space,max_evals=20,is_maximize=True,log_file_path='hyp.txt')

CPU times: user 167 µs, sys: 25 µs, total: 192 µs
Wall time: 147 µs


In [None]:
%%time
hyp.fit(train[use_cols].values,train[target].values)

Starting HyperOpt 20 Evals with Dataset of Shape ((128655, 140),(128655,))


  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]


Iteration: 1, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 100.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.386726	valid_1's multi_logloss: 0.339749
[200]	valid_0's multi_logloss: 0.38297	valid_1's multi_logloss: 0.298577
[300]	valid_0's multi_logloss: 0.383247	valid_1's multi_logloss: 0.267356
Early stopping, best iteration is:                    
[247]	valid_0's multi_logloss: 0.382912	valid_1's multi_logloss: 0.283153
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.385503	valid_1's multi_logloss: 0.339835
[200]	valid_0's multi_logloss: 0.381962	valid_1's multi_logloss: 0.298276
[300]	valid_0's multi_logloss: 0.382253	valid_1's multi_logloss: 0.266993
Early stopping, best iteration is:                    
[249]	valid_0's multi_logloss: 0.381808	valid_1's multi_logloss: 0.282088
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.382665	valid_1's multi_logloss: 0.340781
[200]	valid_0's multi_logloss: 0.378551

Score - 0.5861428785530058, Std - 0.009641915924062758, Eval Score - 0.5861428785530058
Score across folds - [0.5733636017570625, 0.5831165768629223, 0.6016444891498589, 0.5811949523635097, 0.5913947726316756].


  5%|▌         | 1/20 [02:18<43:50, 138.47s/trial, best loss: 0.41385712144699416]


Iteration: 2, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.376155	valid_1's multi_logloss: 0.28877          
[200]	valid_0's multi_logloss: 0.375224	valid_1's multi_logloss: 0.225423         
Early stopping, best iteration is:                                                
[132]	valid_0's multi_logloss: 0.374725	valid_1's multi_logloss: 0.265511
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.375302	valid_1's multi_logloss: 0.289366         
[200]	valid_0's multi_logloss: 0.374919	valid_1's multi_logloss: 0.226122         
Early stopping, best iteration is:                                                
[159]	valid_0's multi_logloss: 0.374178	valid_1's multi_logloss: 0.249097
Training until validation scores don't improve for 100 rounds                     
[100]	valid_0's multi_logloss: 0.370718	valid_1's multi_logloss: 0.290128         
[200]	valid_0's multi_

In [20]:
temp = est.fit_transform(train[use_cols].values,train[target].values)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.376074	valid_1's multi_logloss: 0.283575
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.376074	valid_1's multi_logloss: 0.283575
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.376098	valid_1's multi_logloss: 0.283661
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.376098	valid_1's multi_logloss: 0.283661
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.373734	valid_1's multi_logloss: 0.283949
Did not meet early stopping. Best iteration is:
[98]	valid_0's multi_logloss: 0.373696	valid_1's multi_logloss: 0.285394
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.378215	valid_1's multi_logloss: 0.283217
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.378215	val

In [30]:
est.cv_scores,est.overall_cv_score

([0.5869312154882586,
  0.5791403813182364,
  0.5965691092217057,
  0.5831439986973833,
  0.5931938870208837],
 0.5880172423235558)

In [28]:
pd.Series(temp.argmax(axis=1)).value_counts(1)

0    0.904069
6    0.038731
5    0.015406
3    0.013983
4    0.012654
2    0.010516
1    0.004640
dtype: float64

In [29]:
f1_score(train[target],pd.Series(temp.argmax(axis=1)),average='macro')

0.5880172423235558

In [52]:
test_preds = [est.predict_proba(test[use_cols].values) for est in est.fitted_models]

In [58]:
test['Top-up Month'] = np.sum(test_preds,axis=0).argmax(axis=1)

In [62]:
test['Top-up Month'].value_counts(1)

0    0.927840
6    0.038725
5    0.013428
4    0.006850
2    0.005629
3    0.004747
1    0.002781
Name: Top-up Month, dtype: float64

In [65]:
sub = test[['ID','Top-up Month']]

In [74]:
sub['Top-up Month'] = sub['Top-up Month'].map(reverse_map)

In [76]:
sub.to_csv('baseline.csv',index=False)

In [79]:
# est.transform(test[use_cols].values)

In [61]:
# est.feature_importance_df(use_cols)

CPU times: user 312 µs, sys: 1 µs, total: 313 µs
Wall time: 221 µs


In [None]:
f1_score(average='macro')

Starting HyperOpt 50 Evals with Dataset of Shape ((128655, 140),(128655,))


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]


Iteration: 1, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.1, 'min_child_weight': 100.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.386726	valid_1's multi_logloss: 0.339749
[200]	valid_0's multi_logloss: 0.38297	valid_1's multi_logloss: 0.298577
[300]	valid_0's multi_logloss: 0.383247	valid_1's multi_logloss: 0.267356
Early stopping, best iteration is:                    
[247]	valid_0's multi_logloss: 0.382912	valid_1's multi_logloss: 0.283153
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.385503	valid_1's multi_logloss: 0.339835
[200]	valid_0's multi_logloss: 0.381962	valid_1's multi_logloss: 0.298276
[300]	valid_0's multi_logloss: 0.382253	valid_1's multi_logloss: 0.266993
Early stopping, best iteration is:                    
[249]	valid_0's multi_logloss: 0.381808	valid_1's multi_logloss: 0.282088
Training until validation scores don't improve for 100 rounds
[100]	valid_0's multi_logloss: 0.382665	valid_1's multi_logloss: 0.340781
[200]	valid_0's multi_logloss: 0.378551

Score - 0.8875597528273289, Std - 0.0014793520349300454, Eval Score - 0.8875597528273289
Score across folds - [0.8853134351560374, 0.8879561618281451, 0.8896661614395087, 0.8866347984920913, 0.888228207220862].


  2%|▏         | 1/50 [02:34<2:05:59, 154.27s/trial, best loss: 0.8875597528273289]


Iteration: 2, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.6000000000000001, 'learning_rate': 0.1, 'min_child_weight': 40.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.7000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.376155	valid_1's multi_logloss: 0.28877           
[200]	valid_0's multi_logloss: 0.375224	valid_1's multi_logloss: 0.225423          
Early stopping, best iteration is:                                                 
[132]	valid_0's multi_logloss: 0.374725	valid_1's multi_logloss: 0.265511
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.375302	valid_1's multi_logloss: 0.289366          
[200]	valid_0's multi_logloss: 0.374919	valid_1's multi_logloss: 0.226122          
Early stopping, best iteration is:                                                 
[159]	valid_0's multi_logloss: 0.374178	valid_1's multi_logloss: 0.249097
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.370718	valid_1's multi_logloss: 0.290128          
[200]	valid_

Score - 0.889596206909953, Std - 0.0010380008568623045, Eval Score - 0.889596206909953
Score across folds - [0.888461388986048, 0.8898993432046948, 0.8908709338929696, 0.888305934475924, 0.8904434339901286].


  4%|▍         | 2/50 [04:50<1:59:07, 148.90s/trial, best loss: 0.8875597528273289]


Iteration: 3, Training with params: {'boosting_type': 'gbdt', 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'min_child_weight': 80.0, 'n_estimators': 5000, 'n_jobs': -1, 'num_leaves': 64, 'objective': 'multiclass', 'subsample': 0.6000000000000001, 'subsample_freq': 5}


Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.384207	valid_1's multi_logloss: 0.327608          
[200]	valid_0's multi_logloss: 0.382115	valid_1's multi_logloss: 0.280778          
[300]	valid_0's multi_logloss: 0.383421	valid_1's multi_logloss: 0.245457          
Early stopping, best iteration is:                                                 
[206]	valid_0's multi_logloss: 0.381985	valid_1's multi_logloss: 0.278331
Training until validation scores don't improve for 100 rounds                      
[100]	valid_0's multi_logloss: 0.382623	valid_1's multi_logloss: 0.327971          
[200]	valid_0's multi_logloss: 0.380609	valid_1's multi_logloss: 0.280399          
Early stopping, best iteration is:                                                 
[178]	valid_0's multi_logloss: 0.380237	valid_1's multi_logloss: 0.289305
Training until validation scores don't improve for 100 rounds                      
  4%|▍      

KeyboardInterrupt: 