In [1]:
import datatable as dt
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import seaborn as sns

import lightgbm as lgb
from fastprogress import progress_bar
from collections import defaultdict
from sklearn.model_selection import GroupKFold

#import riiideducation

In [2]:
data_path = '../input/riiid-test-answer-prediction/train.csv'
questions_path = '../input/riiid-test-answer-prediction/questions.csv'
data_types_dict = {
    'user_id': 'int32',
    'content_id': 'int16',
    'answered_correctly': 'int8',
    'prior_question_elapsed_time': 'float32',
    'prior_question_had_explanation': 'bool'
}
target = 'answered_correctly'

In [3]:
train_df = dt.fread(data_path, columns=set(data_types_dict.keys())).to_pandas()
questions_df = pd.read_csv(
    questions_path,
    usecols=[0, 3],
    dtype={'question_id': 'int16', 'part': 'int8'}
)

In [4]:
# preprocess
train_df = train_df[train_df[target] != -1].reset_index(drop=True)
train_df['prior_question_had_explanation'].fillna(False, inplace=True)
train_df = train_df.astype(data_types_dict)

train_df['lag'] = train_df.groupby('user_id')[target].shift()
cum = train_df.groupby('user_id')['lag'].agg(['cumsum', 'cumcount'])
train_df['user_correctness'] = cum['cumsum'] / cum['cumcount']
train_df.drop(columns=['lag'], inplace=True)

user_agg = train_df.groupby('user_id')[target].agg(['sum', 'count'])
content_agg = train_df.groupby('content_id')[target].agg(['sum', 'count'])

train_df = train_df.groupby('user_id').tail(30).reset_index(drop=True)

train_df = pd.merge(train_df, 
                    questions_df,
                    left_on='content_id', 
                    right_on='question_id', 
                    how='left')
train_df.drop(columns=['question_id'], inplace=True)

train_df['content_count'] = train_df['content_id'].map(content_agg['count']).astype('int32')
train_df['content_id'] = train_df['content_id'].map(content_agg['sum'] / content_agg['count'])

train_df

Unnamed: 0,user_id,content_id,answered_correctly,prior_question_elapsed_time,prior_question_had_explanation,user_correctness,part,content_count
0,115,0.784906,1,21000.0,False,0.812500,1,21307
1,115,0.730689,0,20000.0,False,0.823529,1,31057
2,115,0.766426,1,18000.0,False,0.777778,1,22708
3,115,0.727708,1,17000.0,False,0.789474,1,36314
4,115,0.613215,0,29000.0,False,0.800000,1,31736
...,...,...,...,...,...,...,...,...
10746450,2147482888,0.741063,1,18000.0,True,0.500000,5,4364
10746451,2147482888,0.527789,1,14000.0,True,0.521739,5,10220
10746452,2147482888,0.616202,1,14000.0,True,0.541667,5,31415
10746453,2147482888,0.661683,0,22000.0,True,0.560000,5,5752


In [5]:
gkf = GroupKFold(5)
for n, fold_idx in enumerate(gkf.split(train_df, groups=train_df.user_id)):
    print(n, fold_idx)

0 (array([       0,        1,        2, ..., 10746452, 10746453, 10746454]), array([     109,      110,      111, ..., 10746335, 10746336, 10746337]))
1 (array([       0,        1,        2, ..., 10746452, 10746453, 10746454]), array([     156,      157,      158, ..., 10746305, 10746306, 10746307]))
2 (array([       0,        1,        2, ..., 10746452, 10746453, 10746454]), array([     139,      140,      141, ..., 10746365, 10746366, 10746367]))
3 (array([       0,        1,        2, ..., 10746452, 10746453, 10746454]), array([      30,       31,       32, ..., 10746041, 10746042, 10746043]))
4 (array([      30,       31,       32, ..., 10746365, 10746366, 10746367]), array([       0,        1,        2, ..., 10746452, 10746453, 10746454]))


In [6]:
features = [
    'content_id', 
    'prior_question_elapsed_time',
    'prior_question_had_explanation', 
    'user_correctness', 
    'part',
    'content_count'
]

trn_idx, val_idx = fold_idx

trn_data = (train_df.loc[trn_idx][features], train_df.loc[trn_idx][target])
val_data = (train_df.loc[val_idx][features], train_df.loc[val_idx][target])

# 1.

In [None]:
# 1.
params = {
    'objective': 'binary',
    'num_rounds': 1000,
    'seed': 42,
    'metric': 'auc',
    'learning_rate': 0.1,
    'force_row_wise': True,
    
    'max_depth': 15,
    'num_leaves': 800,
}

search_cv = []
search_param = {
    'max_depth': [-1],
    'num_leaves': [5, 100, 200, 400, 1000],
}
best = 0.

p0, p1 = search_param.items()

for i in progress_bar(p0[1]):
    for j in progress_bar(p1[1]):
        params[p0[0]] = i
        params[p1[0]] = j
        
        model = lgb.LGBMClassifier(**params)
        model.fit(
            *trn_data,
            eval_set=[trn_data, val_data], 
            verbose=50,
            early_stopping_rounds=50,
        )
        score = model.evals_result_['valid_1']['auc'][0]
        search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
        if score > best:
            best = score
            best_param = [{p0[0]: i}, {p1[0]: j}, score]

# 2.

In [14]:
# 2.
params = {
    'objective': 'binary',
    'num_rounds': 1000,
    'seed': 42,
    'metric': 'auc',
    'learning_rate': 0.1,
    'force_row_wise': True,
    
    'max_depth': -1,
    'num_leaves': 1000,

}

search_cv = []
search_param = {
    'min_child_samples': [10, 50, 100, 150, 800],
    'min_child_weight': [0.001, 0.005],
}
best = 0.

p0, p1 = search_param.items()

for i in progress_bar(p0[1]):
    for j in progress_bar(p1[1]):
        params[p0[0]] = i
        params[p1[0]] = j
        
        model = lgb.LGBMClassifier(**params)
        model.fit(
            *trn_data,
            eval_set=[trn_data, val_data], 
            verbose=50,
            early_stopping_rounds=50,
        )
        score = model.evals_result_['valid_1']['auc'][0]
        search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
        if score > best:
            best = score
            best_param = [{p0[0]: i}, {p1[0]: j}, score]
search_cv

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.753014	valid_1's auc: 0.749961
[100]	training's auc: 0.755935	valid_1's auc: 0.750062
Early stopping, best iteration is:
[69]	training's auc: 0.754361	valid_1's auc: 0.750092
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.753014	valid_1's auc: 0.749961
[100]	training's auc: 0.755935	valid_1's auc: 0.750062
Early stopping, best iteration is:
[69]	training's auc: 0.754361	valid_1's auc: 0.750092


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.753014	valid_1's auc: 0.750085
[100]	training's auc: 0.755769	valid_1's auc: 0.750246
Early stopping, best iteration is:
[92]	training's auc: 0.755472	valid_1's auc: 0.750258
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.753014	valid_1's auc: 0.750085
[100]	training's auc: 0.755769	valid_1's auc: 0.750246
Early stopping, best iteration is:
[92]	training's auc: 0.755472	valid_1's auc: 0.750258


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752792	valid_1's auc: 0.75006
[100]	training's auc: 0.75536	valid_1's auc: 0.750222
Early stopping, best iteration is:
[92]	training's auc: 0.755024	valid_1's auc: 0.750236
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752792	valid_1's auc: 0.75006
[100]	training's auc: 0.75536	valid_1's auc: 0.750222
Early stopping, best iteration is:
[92]	training's auc: 0.755024	valid_1's auc: 0.750236


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752328	valid_1's auc: 0.749993
[100]	training's auc: 0.754625	valid_1's auc: 0.75032
[150]	training's auc: 0.756373	valid_1's auc: 0.750297
Early stopping, best iteration is:
[106]	training's auc: 0.754863	valid_1's auc: 0.750336
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752328	valid_1's auc: 0.749993
[100]	training's auc: 0.754625	valid_1's auc: 0.75032
[150]	training's auc: 0.756373	valid_1's auc: 0.750297
Early stopping, best iteration is:
[106]	training's auc: 0.754863	valid_1's auc: 0.750336


# 3.

In [19]:
# 2.
params = {
    'objective': 'binary',
    'num_rounds': 1000,
    'seed': 42,
    'metric': 'auc',
    'learning_rate': 0.1,
    'force_row_wise': True,
    
    'max_depth': -1,
    'num_leaves': 1000,
    
    'min_child_samples': 100,
    
    
}

search_cv = []
search_param = {
    'bagging_fraction': [0.5, 0.7, 0.9, 1],
    'bagging_freq': [0, 2, 4, 6],
}
best = 0.

p0, p1 = search_param.items()

for i in progress_bar(p0[1]):
    for j in progress_bar(p1[1]):
        params[p0[0]] = i
        params[p1[0]] = j
        
        model = lgb.LGBMClassifier(**params)
        model.fit(
            *trn_data,
            eval_set=[trn_data, val_data], 
            verbose=50,
            early_stopping_rounds=50,
        )
        score = model.evals_result_['valid_1']['auc'][0]
        search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
        if score > best:
            best = score
            best_param = [{p0[0]: i}, {p1[0]: j}, score]
search_cv

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752751	valid_1's auc: 0.749854
[100]	training's auc: 0.755544	valid_1's auc: 0.749927
Early stopping, best iteration is:
[69]	training's auc: 0.753935	valid_1's auc: 0.74997
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752668	valid_1's auc: 0.749747
[100]	training's auc: 0.755413	valid_1's auc: 0.749822
Early stopping, best iteration is:
[76]	training's auc: 0.754278	valid_1's auc: 0.749867
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752641	valid_1's auc: 0.749774
[100]	training's auc: 0.75526	valid_1's auc: 0.749824
Early stopping, best iteration is:
[75]	training's auc: 0

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752818	valid_1's auc: 0.750001
[100]	training's auc: 0.755791	valid_1's auc: 0.750229
[150]	training's auc: 0.758133	valid_1's auc: 0.750082
Early stopping, best iteration is:
[100]	training's auc: 0.755791	valid_1's auc: 0.750229
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752768	valid_1's auc: 0.749957
[100]	training's auc: 0.755438	valid_1's auc: 0.750081
Early stopping, best iteration is:
[91]	training's auc: 0.755068	valid_1's auc: 0.750108
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752786	valid_1's auc: 0.749946
[100]	training's auc: 0.755575	valid_1's auc: 0.750101

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752855	valid_1's auc: 0.750009
[100]	training's auc: 0.755528	valid_1's auc: 0.750174
Early stopping, best iteration is:
[95]	training's auc: 0.755289	valid_1's auc: 0.750188
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.75287	valid_1's auc: 0.750034
[100]	training's auc: 0.755487	valid_1's auc: 0.750198
[150]	training's auc: 0.757748	valid_1's auc: 0.750162
Early stopping, best iteration is:
[123]	training's auc: 0.756542	valid_1's auc: 0.750215
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752861	valid_1's auc: 0.750027
[100]	training's auc: 0.755573	valid_1's auc: 0.750203


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.75

[[{'bagging_fraction': 0.5}, {'bagging_freq': 0}, 0.74703312099168],
 [{'bagging_fraction': 0.5}, {'bagging_freq': 2}, 0.7466035384108708],
 [{'bagging_fraction': 0.5}, {'bagging_freq': 4}, 0.7466035384108708],
 [{'bagging_fraction': 0.5}, {'bagging_freq': 6}, 0.7466035384108708],
 [{'bagging_fraction': 0.7}, {'bagging_freq': 0}, 0.74703312099168],
 [{'bagging_fraction': 0.7}, {'bagging_freq': 2}, 0.7468983637933233],
 [{'bagging_fraction': 0.7}, {'bagging_freq': 4}, 0.7468983637933233],
 [{'bagging_fraction': 0.7}, {'bagging_freq': 6}, 0.7468983637933233],
 [{'bagging_fraction': 0.9}, {'bagging_freq': 0}, 0.74703312099168],
 [{'bagging_fraction': 0.9}, {'bagging_freq': 2}, 0.7470224813893984],
 [{'bagging_fraction': 0.9}, {'bagging_freq': 4}, 0.7470224813893984],
 [{'bagging_fraction': 0.9}, {'bagging_freq': 6}, 0.7470224813893984],
 [{'bagging_fraction': 1}, {'bagging_freq': 0}, 0.74703312099168],
 [{'bagging_fraction': 1}, {'bagging_freq': 2}, 0.74703312099168],
 [{'bagging_fraction

# 4.

In [20]:
# 2.
params = {
    'objective': 'binary',
    'num_rounds': 1000,
    'seed': 42,
    'metric': 'auc',
    'learning_rate': 0.1,
    'force_row_wise': True,
    
    'max_depth': -1,
    'num_leaves': 1000,
    
    'min_child_samples': 100,
    
    
}

search_cv = []
search_param = {
    'reg_alpha': np.arange(0, 2.1, 0.5),
    'reg_lambda': np.arange(0, 2.1, 0.5),
}
best = 0.

p0, p1 = search_param.items()

for i in progress_bar(p0[1]):
    for j in progress_bar(p1[1]):
        params[p0[0]] = i
        params[p1[0]] = j
        
        model = lgb.LGBMClassifier(**params)
        model.fit(
            *trn_data,
            eval_set=[trn_data, val_data], 
            verbose=50,
            early_stopping_rounds=50,
        )
        score = model.evals_result_['valid_1']['auc'][0]
        search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
        if score > best:
            best = score
            best_param = [{p0[0]: i}, {p1[0]: j}, score]
search_cv



Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752892	valid_1's auc: 0.750056
[100]	training's auc: 0.755547	valid_1's auc: 0.750209
Early stopping, best iteration is:
[87]	training's auc: 0.754984	valid_1's auc: 0.750232
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752863	valid_1's auc: 0.750087
[100]	training's auc: 0.755392	valid_1's auc: 0.750261
Early stopping, best iteration is:
[83]	training's auc: 0.754664	valid_1's auc: 0.750271
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752858	valid_1's auc: 0.750075
[100]	training's auc: 0.75546	valid_1's auc: 0.750259
[150]	training's auc: 0.757481	valid_1's auc: 0.750219
E

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752904	valid_1's auc: 0.750087
[100]	training's auc: 0.755484	valid_1's auc: 0.750279
Early stopping, best iteration is:
[99]	training's auc: 0.755438	valid_1's auc: 0.750283
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752904	valid_1's auc: 0.750119
[100]	training's auc: 0.755393	valid_1's auc: 0.750245
Early stopping, best iteration is:
[79]	training's auc: 0.754577	valid_1's auc: 0.75027
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752832	valid_1's auc: 0.75008
[100]	training's auc: 0.7554	valid_1's auc: 0.750245
[150]	training's auc: 0.75731	valid_1's auc: 0.750219
Early stopping, best iteration is:
[109]	training's auc: 0.755767	valid_1's auc: 0.750262
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752858	valid_1's auc: 0.750096
[100]	training's auc: 0.755519	valid_1's auc: 0.750263
[1

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752805	valid_1's auc: 0.750053
[100]	training's auc: 0.755464	valid_1's auc: 0.750241
[150]	training's auc: 0.757405	valid_1's auc: 0.750227
Early stopping, best iteration is:
[128]	training's auc: 0.756576	valid_1's auc: 0.750268
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752811	valid_1's auc: 0.750072
[100]	training's auc: 0.75549	valid_1's auc: 0.750289
Early stopping, best iteration is:
[91]	training's auc: 0.755076	valid_1's auc: 0.7503
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752795	valid_1's auc: 0.750084
[100]	training's auc: 0.755389	valid_1's auc: 0.75028
[150]	training's auc: 0.757543	valid_1's auc: 0.750252
Early stopping, best iteration is:
[106]	training's auc: 0.755692	valid_1's auc: 0.750281
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752785	valid_1's auc: 0.750049


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752881	valid_1's auc: 0.750121
[100]	training's auc: 0.755506	valid_1's auc: 0.750268
Early stopping, best iteration is:
[81]	training's auc: 0.754672	valid_1's auc: 0.750279
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752765	valid_1's auc: 0.750066
[100]	training's auc: 0.755506	valid_1's auc: 0.750286
[150]	training's auc: 0.757625	valid_1's auc: 0.750252
Early stopping, best iteration is:
[133]	training's auc: 0.756934	valid_1's auc: 0.750296
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.75281	valid_1's auc: 0.750104
[100]	training's auc: 0.755485	valid_1's auc: 0.75029
[150]	training's auc: 0.757502	valid_1's auc: 0.750245
Early stopping, best iteration is:
[118]	training's auc: 0.756213	valid_1's auc: 0.750301
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752834	valid_1's auc: 0.7501


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752823	valid_1's auc: 0.750062
[100]	training's auc: 0.75547	valid_1's auc: 0.750292
[150]	training's auc: 0.757369	valid_1's auc: 0.750228
Early stopping, best iteration is:
[111]	training's auc: 0.755934	valid_1's auc: 0.7503
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752788	valid_1's auc: 0.750118
[100]	training's auc: 0.755377	valid_1's auc: 0.750344
[150]	training's auc: 0.7574	valid_1's auc: 0.750317
Early stopping, best iteration is:
[116]	training's auc: 0.756049	valid_1's auc: 0.750358
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752809	valid_1's auc: 0.750099
[100]	training's auc: 0.755403	valid_1's auc: 0.750319
Early stopping, best iteration is:
[89]	training's auc: 0.754967	valid_1's auc: 0.750324
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752764	valid_1's auc: 0.750096
[

[[{'reg_alpha': 0.0}, {'reg_lambda': 0.0}, 0.74703312099168],
 [{'reg_alpha': 0.0}, {'reg_lambda': 0.5}, 0.7469860602844426],
 [{'reg_alpha': 0.0}, {'reg_lambda': 1.0}, 0.746979167775397],
 [{'reg_alpha': 0.0}, {'reg_lambda': 1.5}, 0.7469837756478445],
 [{'reg_alpha': 0.0}, {'reg_lambda': 2.0}, 0.7469737702967091],
 [{'reg_alpha': 0.5}, {'reg_lambda': 0.0}, 0.7469767678271023],
 [{'reg_alpha': 0.5}, {'reg_lambda': 0.5}, 0.7469853239187755],
 [{'reg_alpha': 0.5}, {'reg_lambda': 1.0}, 0.7469673977084317],
 [{'reg_alpha': 0.5}, {'reg_lambda': 1.5}, 0.7469509249063483],
 [{'reg_alpha': 0.5}, {'reg_lambda': 2.0}, 0.7469520598380246],
 [{'reg_alpha': 1.0}, {'reg_lambda': 0.0}, 0.7469706571018007],
 [{'reg_alpha': 1.0}, {'reg_lambda': 0.5}, 0.7469567301536987],
 [{'reg_alpha': 1.0}, {'reg_lambda': 1.0}, 0.7469557305856592],
 [{'reg_alpha': 1.0}, {'reg_lambda': 1.5}, 0.7469561324107497],
 [{'reg_alpha': 1.0}, {'reg_lambda': 2.0}, 0.746957356607769],
 [{'reg_alpha': 1.5}, {'reg_lambda': 0.0}, 0

# 5.

In [23]:
# 2.
params = {
    'objective': 'binary',
    'num_rounds': 1000,
    'seed': 42,
    'metric': 'auc',
    'learning_rate': 0.1,
    'force_row_wise': True,
    
    'max_depth': -1,
    'num_leaves': 1000,
    
    'min_child_samples': 100,
      
}

search_cv = []
search_param = {
    'learning_rate': [1.5, 0.5, 0.1, 0.05, 0.01, 0.005],
    'max_depth':[-1],
}
best = 0.

p0, p1 = search_param.items()

for i in progress_bar(p0[1]):
    for j in progress_bar(p1[1]):
        params[p0[0]] = i
        params[p1[0]] = j
        
        model = lgb.LGBMClassifier(**params)
        model.fit(
            *trn_data,
            eval_set=[trn_data, val_data], 
            verbose=50,
            early_stopping_rounds=50,
        )
        score = model.evals_result_['valid_1']['auc'][0]
        search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
        if score > best:
            best = score
            best_param = [{p0[0]: i}, {p1[0]: j}, score]
search_cv



Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.75706	valid_1's auc: 0.735243
Early stopping, best iteration is:
[1]	training's auc: 0.747795	valid_1's auc: 0.747033


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.758993	valid_1's auc: 0.748098
Early stopping, best iteration is:
[10]	training's auc: 0.752384	valid_1's auc: 0.749445


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.752884	valid_1's auc: 0.750062
[100]	training's auc: 0.755495	valid_1's auc: 0.75028
Early stopping, best iteration is:
[95]	training's auc: 0.755374	valid_1's auc: 0.75029


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.750837	valid_1's auc: 0.749346
[100]	training's auc: 0.75292	valid_1's auc: 0.750134
[150]	training's auc: 0.75443	valid_1's auc: 0.750303
[200]	training's auc: 0.755632	valid_1's auc: 0.75035
[250]	training's auc: 0.756651	valid_1's auc: 0.750328
Early stopping, best iteration is:
[206]	training's auc: 0.755756	valid_1's auc: 0.750352


Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.749033	valid_1's auc: 0.748184
[100]	training's auc: 0.749528	valid_1's auc: 0.748554
[150]	training's auc: 0.749974	valid_1's auc: 0.74885
[200]	training's auc: 0.750401	valid_1's auc: 0.749104
[250]	training's auc: 0.750846	valid_1's auc: 0.749355
[300]	training's auc: 0.751284	valid_1's auc: 0.749566
[350]	training's auc: 0.751711	valid_1's auc: 0.749752
[400]	training's auc: 0.752131	valid_1's auc: 0.749916
[450]	training's auc: 0.752524	valid_1's auc: 0.750044
[500]	training's auc: 0.75292	valid_1's auc: 0.750152
[550]	training's auc: 0.753264	valid_1's auc: 0.75022
[600]	training's auc: 0.753603	valid_1's auc: 0.750275
[650]	training's auc: 0.753921	valid_1's auc: 0.750314
[700]	training's auc: 0.75419	valid_1's auc: 0.750334
[750]	training's auc: 0.754454	valid_1's auc: 0.750349
[800]	training's auc: 0.754704	valid_1's auc: 0.75036
[850]	training's auc: 0.75496	valid_1's auc: 0.750368
[900]	train

Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.748653	valid_1's auc: 0.747865
[100]	training's auc: 0.749032	valid_1's auc: 0.748184
[150]	training's auc: 0.749295	valid_1's auc: 0.74839
[200]	training's auc: 0.749529	valid_1's auc: 0.748552
[250]	training's auc: 0.749752	valid_1's auc: 0.748705
[300]	training's auc: 0.749973	valid_1's auc: 0.748853
[350]	training's auc: 0.750186	valid_1's auc: 0.748982
[400]	training's auc: 0.750398	valid_1's auc: 0.749101
[450]	training's auc: 0.750623	valid_1's auc: 0.74923
[500]	training's auc: 0.750844	valid_1's auc: 0.74935
[550]	training's auc: 0.751057	valid_1's auc: 0.749458
[600]	training's auc: 0.751284	valid_1's auc: 0.749566
[650]	training's auc: 0.751506	valid_1's auc: 0.74967
[700]	training's auc: 0.751719	valid_1's auc: 0.749758
[750]	training's auc: 0.751913	valid_1's auc: 0.749826
[800]	training's auc: 0.752116	valid_1's auc: 0.749897
[850]	training's auc: 0.752319	valid_1's auc: 0.74997
[900]	trai

[[{'learning_rate': 1.5}, {'max_depth': -1}, 0.74703312099168],
 [{'learning_rate': 0.5}, {'max_depth': -1}, 0.74703312099168],
 [{'learning_rate': 0.1}, {'max_depth': -1}, 0.74703312099168],
 [{'learning_rate': 0.05}, {'max_depth': -1}, 0.74703312099168],
 [{'learning_rate': 0.01}, {'max_depth': -1}, 0.74703312099168],
 [{'learning_rate': 0.005}, {'max_depth': -1}, 0.74703312099168]]

In [None]:
#lgb.plot_importance(model, importance_type='gain')

In [64]:
score = model.evals_result_['valid_1']['auc'][0]
search_cv.append([{p0[0]: i}, {p1[0]: j}, score])
if score > best:
    best = score
    best_param = [{p0[0]: i}, {p1[0]: j}, score]

0.7467622224838212

In [None]:
#save_path = f'lgb_fold{n}.txt'
#model.save_model(save_path)

In [None]:
#a = lgb.Booster(model_file=save_path)

In [None]:
#a.predict(train_df.loc[val_idx][features])

In [None]:
#model.predict(train_df.loc[val_idx][features])

# inference