# IMPORT LIBRARIES

In [1]:
!pip install lightgbm
!pip install optuna



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, RobustScaler, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

import lightgbm as lgb
import optuna

# GET THE DATA

In [3]:
train= pd.read_csv('dataset/train.csv')
test= pd.read_csv('dataset/test.csv')
test_ids= test['customer_id']

# PREPROCESS

In [4]:
train.loc[28498, 'gender']= 'M'

In [5]:
train[train['gender']== 'XNA']

Unnamed: 0,customer_id,name,age,gender,owns_car,owns_house,no_of_children,net_yearly_income,no_of_days_employed,occupation_type,total_family_members,migrant_worker,yearly_debt_payments,credit_limit,credit_limit_used(%),credit_score,prev_defaults,default_in_last_6months,credit_card_default


In [6]:
train[train['net_yearly_income']>= 1e8]

Unnamed: 0,customer_id,name,age,gender,owns_car,owns_house,no_of_children,net_yearly_income,no_of_days_employed,occupation_type,total_family_members,migrant_worker,yearly_debt_payments,credit_limit,credit_limit_used(%),credit_score,prev_defaults,default_in_last_6months,credit_card_default
26662,CST_114967,Jonathan Stempel,51,F,N,Y,1.0,140759000.0,924.0,Laborers,3.0,0.0,32825.6,31129970.49,82,526.0,0,1,1


In [7]:
train[train['credit_limit']>= 1e7]

Unnamed: 0,customer_id,name,age,gender,owns_car,owns_house,no_of_children,net_yearly_income,no_of_days_employed,occupation_type,total_family_members,migrant_worker,yearly_debt_payments,credit_limit,credit_limit_used(%),credit_score,prev_defaults,default_in_last_6months,credit_card_default
26662,CST_114967,Jonathan Stempel,51,F,N,Y,1.0,140759000.0,924.0,Laborers,3.0,0.0,32825.6,31129970.49,82,526.0,0,1,1


In [8]:
train= train.drop(26662, axis= 0)

In [9]:
t= train['no_of_days_employed']

In [10]:
u= t.loc[t < 300000]

In [11]:
train.loc[train['no_of_days_employed']> 300000, 'no_of_days_employed']= u.mean()

In [12]:
categorical_features= ['gender', 'owns_car', 'owns_house', 'occupation_type']

In [13]:
for i in categorical_features:
    df1= pd.get_dummies(train[i], prefix= '{}'.format(i))
    train= pd.concat([train, df1], axis= 1)
    train= train.drop([i], axis= 1)

In [14]:
train= train.drop(['gender_M', 'owns_car_N', 'owns_house_N'], axis= 1)

In [15]:
test.loc[3510, 'gender']= 'F'

In [16]:
for i in categorical_features:
    df1= pd.get_dummies(test[i], prefix= '{}'.format(i))
    test= pd.concat([test, df1], axis= 1)
    test= test.drop([i], axis= 1)
    
test= test.drop(['gender_M', 'owns_car_N', 'owns_house_N'], axis= 1)

In [17]:
y= train['credit_card_default']
X= train.drop(['customer_id', 'name', 'credit_card_default'], axis= 1)
test= test.drop(['customer_id', 'name'], axis= 1)

In [18]:
scaler= RobustScaler()
scaler.fit(X)
X= scaler.transform(X)
test= scaler.transform(test)

# TRAIN AND MAKE PREDS

In [19]:
oof_preds= np.empty(train.shape[0])
test_preds= np.empty((test.shape[0], 10))

def hyper_tune(trial, data= X, target= y, oof_preds= oof_preds, test_preds= test_preds):
    kfold= StratifiedKFold(n_splits= 10, shuffle= True, random_state= 2021)
    
    params= {'objective': 'binary',
             'metric': 'binary_logloss',
             'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
             'learning_rate': trial.suggest_float('learning_rate', 0.001, 1),
             'num_leaves': trial.suggest_int('num_leaves', 30, 3000),
             'max_depth': trial.suggest_int('max_depth', 3, 50),
             "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
             "max_bin": trial.suggest_int("max_bin", 200, 300),
             "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
             "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
             "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
             "bagging_fraction": trial.suggest_float("bagging_fraction", 0.2, 0.95, step=0.1),
             "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
             "feature_fraction": trial.suggest_float("feature_fraction", 0.2, 0.95, step=0.1),
             'random_state': 2021
            }
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
        X_train, X_val= X[train_idx], X[val_idx]
        y_train, y_val= y.iloc[train_idx], y.iloc[val_idx]
        model= lgb.LGBMClassifier(**params)
        model.fit(X_train, 
                  y_train,
                  eval_set= [(X_val, y_val)],
                  eval_metric= 'binary_logloss',
                  callbacks= [lgb.early_stopping(stopping_rounds= trial.suggest_int('stopping_rounds', 20, 200)),
                             lgb.log_evaluation(period= 50)])
        
        oof_preds[val_idx]= model.predict(X_val)
        test_preds[:, fold]= model.predict(test)
        
    score= 100 * f1_score(y, oof_preds, average= 'macro')
    return score

In [20]:
study= optuna.create_study(direction= 'maximize')
study.optimize(hyper_tune, n_trials= 10)

[32m[I 2021-12-25 23:20:27,630][0m A new study created in memory with name: no-name-4b500cbd-29ca-4ed5-9a2d-0a732efd6b1f[0m


Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.07837
[100]	valid_0's binary_logloss: 0.0782639
[150]	valid_0's binary_logloss: 0.0780526
[200]	valid_0's binary_logloss: 0.0773145
[250]	valid_0's binary_logloss: 0.0773145
Early stopping, best iteration is:
[189]	valid_0's binary_logloss: 0.0773145
Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.075913
[100]	valid_0's binary_logloss: 0.075913
Early stopping, best iteration is:
[47]	valid_0's binary_logloss: 0.075913
Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.0738679
[100]	valid_0's binary_logloss: 0.0738679
Early stopping, best iteration is:
[48]	valid_0's binary_logloss: 0.0738679
Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.0699588
[100]	valid_0's binary_logloss: 0.0696395
[150]	valid_0's binary_logloss: 0.0696395
Early stopping, best iteration i

[50]	valid_0's binary_logloss: 0.0783114
[100]	valid_0's binary_logloss: 0.0777527
[150]	valid_0's binary_logloss: 0.0777527
Early stopping, best iteration is:
[95]	valid_0's binary_logloss: 0.0777527
Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.0749053
[100]	valid_0's binary_logloss: 0.074515
[150]	valid_0's binary_logloss: 0.0738476
[200]	valid_0's binary_logloss: 0.0738476
Early stopping, best iteration is:
[131]	valid_0's binary_logloss: 0.0738476
Training until validation scores don't improve for 93 rounds
[50]	valid_0's binary_logloss: 0.0739836
[100]	valid_0's binary_logloss: 0.0729957
[150]	valid_0's binary_logloss: 0.0722179


[32m[I 2021-12-25 23:20:32,895][0m Trial 0 finished with value: 91.45246886538582 and parameters: {'n_estimators': 703, 'learning_rate': 0.19305426632035, 'num_leaves': 1896, 'max_depth': 32, 'min_data_in_leaf': 7300, 'max_bin': 285, 'lambda_l1': 80, 'lambda_l2': 5, 'min_gain_to_split': 5.688896301002334, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8, 'stopping_rounds': 93}. Best is trial 0 with value: 91.45246886538582.[0m


[200]	valid_0's binary_logloss: 0.0720401
[250]	valid_0's binary_logloss: 0.0720401
Early stopping, best iteration is:
[184]	valid_0's binary_logloss: 0.0720401




Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.402156
Early stopping, best ite

Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.401132
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.401132


[32m[I 2021-12-25 23:20:35,518][0m Trial 1 finished with value: 47.884566954371664 and parameters: {'n_estimators': 832, 'learning_rate': 0.15240881819196186, 'num_leaves': 2361, 'max_depth': 42, 'min_data_in_leaf': 9000, 'max_bin': 262, 'lambda_l1': 90, 'lambda_l2': 80, 'min_gain_to_split': 13.734482720015313, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8, 'stopping_rounds': 95}. Best is trial 0 with value: 91.45246886538582.[0m


Training until validation scores don't improve for 95 rounds
[50]	valid_0's binary_logloss: 0.401132
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.401132




Training until validation scores don't improve for 175 rounds
[50]	valid_0's binary_logloss: 0.0673673
[100]	valid_0's binary_logloss: 0.0548431
[150]	valid_0's binary_logloss: 0.054585
[200]	valid_0's binary_logloss: 0.054585
[250]	valid_0's binary_logloss: 0.0545055
[300]	valid_0's binary_logloss: 0.0545055
[350]	valid_0's binary_logloss: 0.0545055
Early stopping, best iteration is:
[222]	valid_0's binary_logloss: 0.0545055
Training until validation scores don't improve for 175 rounds
[50]	valid_0's binary_logloss: 0.065895
[100]	valid_0's binary_logloss: 0.0536082
[150]	valid_0's binary_logloss: 0.0531068
[200]	valid_0's binary_logloss: 0.053078
[250]	valid_0's binary_logloss: 0.0530024
[300]	valid_0's binary_logloss: 0.0530024
[350]	valid_0's binary_logloss: 0.0530024
Early stopping, best iteration is:
[214]	valid_0's binary_logloss: 0.0530024
Training until validation scores don't improve for 175 rounds
[50]	valid_0's binary_logloss: 0.0643443
[100]	valid_0's binary_logloss: 0.052

[50]	valid_0's binary_logloss: 0.0608153
[100]	valid_0's binary_logloss: 0.0481382
[150]	valid_0's binary_logloss: 0.0475901
[200]	valid_0's binary_logloss: 0.0475222
[250]	valid_0's binary_logloss: 0.0475222
[300]	valid_0's binary_logloss: 0.0475222
Early stopping, best iteration is:
[160]	valid_0's binary_logloss: 0.0475222
Training until validation scores don't improve for 175 rounds
[50]	valid_0's binary_logloss: 0.0664183
[100]	valid_0's binary_logloss: 0.0537377
[150]	valid_0's binary_logloss: 0.0533951
[200]	valid_0's binary_logloss: 0.0532888
[250]	valid_0's binary_logloss: 0.0532888
[300]	valid_0's binary_logloss: 0.0532129
[350]	valid_0's binary_logloss: 0.0532129
[400]	valid_0's binary_logloss: 0.0532129
[450]	valid_0's binary_logloss: 0.0532129
Early stopping, best iteration is:
[276]	valid_0's binary_logloss: 0.0532129
Training until validation scores don't improve for 175 rounds
[50]	valid_0's binary_logloss: 0.0626153
[100]	valid_0's binary_logloss: 0.051027
[150]	valid_

[32m[I 2021-12-25 23:20:43,682][0m Trial 2 finished with value: 92.37623451327973 and parameters: {'n_estimators': 789, 'learning_rate': 0.09372475878137862, 'num_leaves': 525, 'max_depth': 32, 'min_data_in_leaf': 7000, 'max_bin': 208, 'lambda_l1': 75, 'lambda_l2': 50, 'min_gain_to_split': 3.932612368618828, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5, 'stopping_rounds': 175}. Best is trial 2 with value: 92.37623451327973.[0m


Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.084912
[100]	valid_0's binary_logloss: 0.0707851
[150]	valid_0's binary_logloss: 0.0697599
Did not meet early stopping. Best iteration is:
[140]	valid_0's binary_logloss: 0.0697599
Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.0841866
[100]	valid_0's binary_logloss: 0.0675791
[150]	valid_0's binary_logloss: 0.0664534
Did not meet early stopping. Best iteration is:
[149]	valid_0's binary_logloss: 0.0664534
Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.0850209
[100]	valid_0's binary_logloss: 0.0674219
[150]	valid_0's binary_logloss: 0.0661216
Did not meet early stopping. Best iteration is:
[137]	valid_0's binary_logloss: 0.0661216
Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.078736
[100]	valid_0's binary_logloss: 0.0628269
[150]	valid_0's binary_log

Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.0848494
[100]	valid_0's binary_logloss: 0.0683987
[150]	valid_0's binary_logloss: 0.0675815
Did not meet early stopping. Best iteration is:
[155]	valid_0's binary_logloss: 0.0669272
Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.0838427
[100]	valid_0's binary_logloss: 0.0673039
[150]	valid_0's binary_logloss: 0.0661622
Did not meet early stopping. Best iteration is:
[152]	valid_0's binary_logloss: 0.0660098
Training until validation scores don't improve for 177 rounds
[50]	valid_0's binary_logloss: 0.0827906


[32m[I 2021-12-25 23:20:47,992][0m Trial 3 finished with value: 92.12732911312804 and parameters: {'n_estimators': 163, 'learning_rate': 0.12961371793031198, 'num_leaves': 1920, 'max_depth': 47, 'min_data_in_leaf': 7000, 'max_bin': 238, 'lambda_l1': 40, 'lambda_l2': 0, 'min_gain_to_split': 9.336420220709048, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004, 'stopping_rounds': 177}. Best is trial 2 with value: 92.37623451327973.[0m


[100]	valid_0's binary_logloss: 0.0663455
[150]	valid_0's binary_logloss: 0.0651422
Did not meet early stopping. Best iteration is:
[158]	valid_0's binary_logloss: 0.0649566




Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0749591
[100]	valid_0's binary_logloss: 0.0749591
[150]	valid_0's binary_logloss: 0.0742259
[200]	valid_0's binary_logloss: 0.0743792
Did not meet early stopping. Best iteration is:
[117]	valid_0's binary_logloss: 0.0742259
Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0729158
[100]	valid_0's binary_logloss: 0.0729036
[150]	valid_0's binary_logloss: 0.0729036
Early stopping, best iteration is:
[69]	valid_0's binary_logloss: 0.0729036
Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0724924
[100]	valid_0's binary_logloss: 0.0721281
[150]	valid_0's binary_logloss: 0.0721554
[200]	valid_0's binary_logloss: 0.0719453
Did not meet early stopping. Best iteration is:
[190]	valid_0's binary_logloss: 0.0719453
Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0660569


[100]	valid_0's binary_logloss: 0.075451
[150]	valid_0's binary_logloss: 0.075085
[200]	valid_0's binary_logloss: 0.075085
Did not meet early stopping. Best iteration is:
[136]	valid_0's binary_logloss: 0.075085
Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0750946
[100]	valid_0's binary_logloss: 0.0750946
[150]	valid_0's binary_logloss: 0.0748427
[200]	valid_0's binary_logloss: 0.0748427
Did not meet early stopping. Best iteration is:
[139]	valid_0's binary_logloss: 0.0748427
Training until validation scores don't improve for 91 rounds
[50]	valid_0's binary_logloss: 0.0725839


[32m[I 2021-12-25 23:20:52,314][0m Trial 4 finished with value: 91.56146725942278 and parameters: {'n_estimators': 201, 'learning_rate': 0.8392266724985119, 'num_leaves': 2452, 'max_depth': 7, 'min_data_in_leaf': 8900, 'max_bin': 290, 'lambda_l1': 20, 'lambda_l2': 90, 'min_gain_to_split': 2.6490122313813727, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8, 'stopping_rounds': 91}. Best is trial 2 with value: 92.37623451327973.[0m


[100]	valid_0's binary_logloss: 0.0710298
[150]	valid_0's binary_logloss: 0.0710298
Early stopping, best iteration is:
[69]	valid_0's binary_logloss: 0.0710298




Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.402156
[100]	valid_0's binary_logloss: 0.402156
[150]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.402156
[100]	valid_0's binary_logloss: 0.402156
[150]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.402156
[100]	valid_0's binary_logloss: 0.402156
[150]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.402156
Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.402156
[100]	valid_0's binary_logloss: 0.402156
[150]	valid_0's binary_logloss: 0.402156
Early stopping, best iteration is:
[1]	val

[150]	valid_0's binary_logloss: 0.401132
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.401132
Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.401132
[100]	valid_0's binary_logloss: 0.401132
[150]	valid_0's binary_logloss: 0.401132
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.401132
Training until validation scores don't improve for 196 rounds
[50]	valid_0's binary_logloss: 0.401132
[100]	valid_0's binary_logloss: 0.401132
[150]	valid_0's binary_logloss: 0.401132
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.401132

[32m[I 2021-12-25 23:20:55,309][0m Trial 5 finished with value: 47.884566954371664 and parameters: {'n_estimators': 445, 'learning_rate': 0.016198257557585975, 'num_leaves': 587, 'max_depth': 43, 'min_data_in_leaf': 9900, 'max_bin': 254, 'lambda_l1': 20, 'lambda_l2': 45, 'min_gain_to_split': 7.549437853167168, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001, 'stopping_rounds': 196}. Best is trial 2 with value: 92.37623451327973.[0m



Training until validation scores don't improve for 35 rounds




[50]	valid_0's binary_logloss: 0.0546641
Early stopping, best iteration is:
[49]	valid_0's binary_logloss: 0.0546641
Training until validation scores don't improve for 35 rounds
[50]	valid_0's binary_logloss: 0.0524911
Early stopping, best iteration is:
[45]	valid_0's binary_logloss: 0.0524911
Training until validation scores don't improve for 35 rounds
[50]	valid_0's binary_logloss: 0.0512758
[100]	valid_0's binary_logloss: 0.051158
Early stopping, best iteration is:
[76]	valid_0's binary_logloss: 0.051158
Training until validation scores don't improve for 35 rounds
[50]	valid_0's binary_logloss: 0.0471379
Early stopping, best iteration is:
[32]	valid_0's binary_logloss: 0.0471379
Training until validation scores don't improve for 35 rounds
[50]	valid_0's binary_logloss: 0.050757
Early stopping, best iteration is:
[30]	valid_0's binary_logloss: 0.050757
Training until validation scores don't improve for 35 rounds
[50]	valid_0's binary_logloss: 0.0479727
Early stopping, best iteration 

[32m[I 2021-12-25 23:20:57,964][0m Trial 6 finished with value: 92.46325623519564 and parameters: {'n_estimators': 465, 'learning_rate': 0.2510975318910183, 'num_leaves': 2510, 'max_depth': 34, 'min_data_in_leaf': 2300, 'max_bin': 269, 'lambda_l1': 100, 'lambda_l2': 15, 'min_gain_to_split': 4.590018688309162, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5, 'stopping_rounds': 35}. Best is trial 6 with value: 92.46325623519564.[0m


[50]	valid_0's binary_logloss: 0.0507976
Early stopping, best iteration is:
[32]	valid_0's binary_logloss: 0.0507976
Training until validation scores don't improve for 119 rounds




[50]	valid_0's binary_logloss: 0.0608227
[100]	valid_0's binary_logloss: 0.0569052
[150]	valid_0's binary_logloss: 0.0566914
[200]	valid_0's binary_logloss: 0.0565962
[250]	valid_0's binary_logloss: 0.0564916
[300]	valid_0's binary_logloss: 0.0564916
[350]	valid_0's binary_logloss: 0.0563973
[400]	valid_0's binary_logloss: 0.0563973
[450]	valid_0's binary_logloss: 0.0563973
Early stopping, best iteration is:
[333]	valid_0's binary_logloss: 0.0563973
Training until validation scores don't improve for 119 rounds
[50]	valid_0's binary_logloss: 0.0599794
[100]	valid_0's binary_logloss: 0.0557317
[150]	valid_0's binary_logloss: 0.0556289
[200]	valid_0's binary_logloss: 0.0555048
[250]	valid_0's binary_logloss: 0.0554517
[300]	valid_0's binary_logloss: 0.0554517
[350]	valid_0's binary_logloss: 0.0553135
[400]	valid_0's binary_logloss: 0.0553135
[450]	valid_0's binary_logloss: 0.0553135
Early stopping, best iteration is:
[331]	valid_0's binary_logloss: 0.0553135
Training until validation scor

[100]	valid_0's binary_logloss: 0.0496007
[150]	valid_0's binary_logloss: 0.0496177
[200]	valid_0's binary_logloss: 0.0495572
[250]	valid_0's binary_logloss: 0.0495572
Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.0495572
Training until validation scores don't improve for 119 rounds
[50]	valid_0's binary_logloss: 0.0601121
[100]	valid_0's binary_logloss: 0.0558357
[150]	valid_0's binary_logloss: 0.0558357
[200]	valid_0's binary_logloss: 0.0556893
[250]	valid_0's binary_logloss: 0.0555999
[300]	valid_0's binary_logloss: 0.0555999
[350]	valid_0's binary_logloss: 0.0555346
[400]	valid_0's binary_logloss: 0.0555346
[450]	valid_0's binary_logloss: 0.0554542
[500]	valid_0's binary_logloss: 0.0554542
Early stopping, best iteration is:
[401]	valid_0's binary_logloss: 0.0554542
Training until validation scores don't improve for 119 rounds
[50]	valid_0's binary_logloss: 0.0569742
[100]	valid_0's binary_logloss: 0.0532027
[150]	valid_0's binary_logloss: 0.0530647
[200]	vali

[32m[I 2021-12-25 23:21:03,785][0m Trial 7 finished with value: 92.40418561291547 and parameters: {'n_estimators': 891, 'learning_rate': 0.11125705961271147, 'num_leaves': 98, 'max_depth': 29, 'min_data_in_leaf': 5000, 'max_bin': 266, 'lambda_l1': 25, 'lambda_l2': 100, 'min_gain_to_split': 12.115764105334023, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9, 'stopping_rounds': 119}. Best is trial 6 with value: 92.46325623519564.[0m


[300]	valid_0's binary_logloss: 0.0526272
[350]	valid_0's binary_logloss: 0.0525532
[400]	valid_0's binary_logloss: 0.0525532
[450]	valid_0's binary_logloss: 0.0525532
Early stopping, best iteration is:
[348]	valid_0's binary_logloss: 0.0525532




Training until validation scores don't improve for 78 rounds
[50]	valid_0's binary_logloss: 0.10334
[100]	valid_0's binary_logloss: 0.0781175
[150]	valid_0's binary_logloss: 0.073282
[200]	valid_0's binary_logloss: 0.0712031
[250]	valid_0's binary_logloss: 0.0702315
[300]	valid_0's binary_logloss: 0.0701301
Did not meet early stopping. Best iteration is:
[318]	valid_0's binary_logloss: 0.0698938
Training until validation scores don't improve for 78 rounds
[50]	valid_0's binary_logloss: 0.102087
[100]	valid_0's binary_logloss: 0.0770556
[150]	valid_0's binary_logloss: 0.0717485
[200]	valid_0's binary_logloss: 0.0694805
[250]	valid_0's binary_logloss: 0.0688544
[300]	valid_0's binary_logloss: 0.0685336
Did not meet early stopping. Best iteration is:
[324]	valid_0's binary_logloss: 0.0684466
Training until validation scores don't improve for 78 rounds
[50]	valid_0's binary_logloss: 0.0993657
[100]	valid_0's binary_logloss: 0.0745262
[150]	valid_0's binary_logloss: 0.0689782
[200]	valid_0'

[50]	valid_0's binary_logloss: 0.0972169
[100]	valid_0's binary_logloss: 0.0721553
[150]	valid_0's binary_logloss: 0.0663371
[200]	valid_0's binary_logloss: 0.0640822
[250]	valid_0's binary_logloss: 0.0633393
[300]	valid_0's binary_logloss: 0.0630841
Did not meet early stopping. Best iteration is:
[328]	valid_0's binary_logloss: 0.0629187
Training until validation scores don't improve for 78 rounds
[50]	valid_0's binary_logloss: 0.100785
[100]	valid_0's binary_logloss: 0.0768935
[150]	valid_0's binary_logloss: 0.0718124
[200]	valid_0's binary_logloss: 0.069579
[250]	valid_0's binary_logloss: 0.0690407
[300]	valid_0's binary_logloss: 0.0686208
Did not meet early stopping. Best iteration is:
[320]	valid_0's binary_logloss: 0.0682737
Training until validation scores don't improve for 78 rounds
[50]	valid_0's binary_logloss: 0.0979155
[100]	valid_0's binary_logloss: 0.0733942
[150]	valid_0's binary_logloss: 0.0684402
[200]	valid_0's binary_logloss: 0.0662006
[250]	valid_0's binary_logloss:

[32m[I 2021-12-25 23:21:10,738][0m Trial 8 finished with value: 92.48844494674942 and parameters: {'n_estimators': 339, 'learning_rate': 0.04682638928330207, 'num_leaves': 2185, 'max_depth': 23, 'min_data_in_leaf': 500, 'max_bin': 287, 'lambda_l1': 95, 'lambda_l2': 80, 'min_gain_to_split': 5.351756498903151, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.5, 'stopping_rounds': 78}. Best is trial 8 with value: 92.48844494674942.[0m


Training until validation scores don't improve for 24 rounds
[50]	valid_0's binary_logloss: 0.149074
[100]	valid_0's binary_logloss: 0.102668
[150]	valid_0's binary_logloss: 0.0865443
[200]	valid_0's binary_logloss: 0.0818836
[250]	valid_0's binary_logloss: 0.0807004
[300]	valid_0's binary_logloss: 0.0804653
[350]	valid_0's binary_logloss: 0.0803246
Early stopping, best iteration is:
[332]	valid_0's binary_logloss: 0.0803246
Training until validation scores don't improve for 24 rounds
[50]	valid_0's binary_logloss: 0.149689
[100]	valid_0's binary_logloss: 0.101064
[150]	valid_0's binary_logloss: 0.0843233
[200]	valid_0's binary_logloss: 0.0796482
[250]	valid_0's binary_logloss: 0.0788503
[300]	valid_0's binary_logloss: 0.07858
[350]	valid_0's binary_logloss: 0.0783528
Early stopping, best iteration is:
[332]	valid_0's binary_logloss: 0.0783528
Training until validation scores don't improve for 24 rounds
[50]	valid_0's binary_logloss: 0.148717
[100]	valid_0's binary_logloss: 0.100591
[1

[50]	valid_0's binary_logloss: 0.145369
[100]	valid_0's binary_logloss: 0.0966722
[150]	valid_0's binary_logloss: 0.0786343
[200]	valid_0's binary_logloss: 0.0732854
[250]	valid_0's binary_logloss: 0.0718217
Early stopping, best iteration is:
[249]	valid_0's binary_logloss: 0.0718217
Training until validation scores don't improve for 24 rounds
[50]	valid_0's binary_logloss: 0.149495
[100]	valid_0's binary_logloss: 0.102351
[150]	valid_0's binary_logloss: 0.0851989
[200]	valid_0's binary_logloss: 0.0802743
[250]	valid_0's binary_logloss: 0.07926
[300]	valid_0's binary_logloss: 0.0789101
Early stopping, best iteration is:
[312]	valid_0's binary_logloss: 0.0788214
Training until validation scores don't improve for 24 rounds
[50]	valid_0's binary_logloss: 0.147956
[100]	valid_0's binary_logloss: 0.0996992
[150]	valid_0's binary_logloss: 0.0829773
[200]	valid_0's binary_logloss: 0.0780276
[250]	valid_0's binary_logloss: 0.0771097
[300]	valid_0's binary_logloss: 0.0767909
Early stopping, bes

[32m[I 2021-12-25 23:21:18,464][0m Trial 9 finished with value: 91.76796362840845 and parameters: {'n_estimators': 851, 'learning_rate': 0.029841058300664556, 'num_leaves': 2903, 'max_depth': 45, 'min_data_in_leaf': 7200, 'max_bin': 232, 'lambda_l1': 85, 'lambda_l2': 40, 'min_gain_to_split': 11.986863146721895, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8, 'stopping_rounds': 24}. Best is trial 8 with value: 92.48844494674942.[0m


[250]	valid_0's binary_logloss: 0.0763002
Early stopping, best iteration is:
[255]	valid_0's binary_logloss: 0.0762609


In [21]:
study.best_trial.params

{'n_estimators': 339,
 'learning_rate': 0.04682638928330207,
 'num_leaves': 2185,
 'max_depth': 23,
 'min_data_in_leaf': 500,
 'max_bin': 287,
 'lambda_l1': 95,
 'lambda_l2': 80,
 'min_gain_to_split': 5.351756498903151,
 'bagging_fraction': 0.30000000000000004,
 'bagging_freq': 1,
 'feature_fraction': 0.5,
 'stopping_rounds': 78}

In [22]:
study.best_value

92.48844494674942

In [23]:
test_preds= pd.DataFrame(test_preds).mode(axis= 1)[0]

In [24]:
sub= pd.DataFrame({'customer_id': test_ids, 'credit_card_default': test_preds})
sub.to_csv('submission_lgbm.csv', index= False)