In [None]:
import numpy as np
from numpy.core.fromnumeric import _all_dispatcher
import pandas as pd
import joblib
np.random.seed(2021)
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("train_modified.gz", compression='gzip', header='infer')
Y = df['click']

In [None]:
# input your bootstrap random_state
df_bootstrapped_lgb = df.sample(n=len(df), replace=True, random_state=_)
Y_lgb = df_bootstrapped_lgb['click']

In [None]:
column_list = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14', 'C15', 'C16', 'C17', 'C18', 
               'C19', 'C20', 'C21']
df_bootstrapped_lgb[column_list] = df_bootstrapped_lgb[column_list].astype('object')

In [None]:
def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self

df_bootstrapped_lgb = convert_obj_to_int(df_bootstrapped_lgb)

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.metrics import make_scorer, auc, log_loss, roc_auc_score

import lightgbm as lgb

lgb_model = lgb.LGBMClassifier(boosting_type='gbdt', objective='binary', metric='binary_logloss')

param = {
    'max_depth': Integer(3, 10),
    'learning_rate': Real(0.01, 0.3),
    'feature_fraction': Real(0.2, 0.9, 'uniform'),
    'bagging_fraction': Real(0.2, 0.9),
    'max_bin': Integer(20, 255, 'uniform'),
    'n_estimators': Integer(100, 1000, 'uniform'),
    'num_leaves': Integer(24, 80, 'uniform'),
    'min_sum_hessian_in_leaf':Integer(0,100, 'uniform'),
    'min_data_in_leaf': Integer(20, 100, 'uniform'),
    'min_split_gain': Real(0.001, 0.1),
    'lambda_l1': Real(1e-8, 10.0),
    'lambda_l2': Real(1e-8, 10.0),
    'bagging_freq': Integer(1,7, 'uniform')
}

LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

# input your random_state
opt_l = BayesSearchCV(
    lgb_model,
    param,
    scoring = LogLoss,
    n_iter=32,
    cv=5,
    random_state=_
)

opt_l.fit(df_bootstrapped_lgb, Y_lgb)

In [None]:
opt_l.best_params_