In [1]:
import numpy as np
from numpy.core.fromnumeric import _all_dispatcher
import pandas as pd
import joblib
np.random.seed(2021)
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("train_modified.gz", compression='gzip', header='infer')
Y = df['click']

In [None]:
# input your bootstrap random_state
df_bootstrapped_lgb = df.sample(n=len(df), replace=True, random_state=70)
Y_lgb = df_bootstrapped_lgb['click']

In [None]:
column_list = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14', 'C15', 'C16', 'C17', 'C18', 
               'C19', 'C20', 'C21']
df_bootstrapped_lgb[column_list] = df_bootstrapped_lgb[column_list].astype('object')

In [None]:
def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self

df_bootstrapped_lgb = convert_obj_to_int(df_bootstrapped_lgb)

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import log_loss
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.metrics import make_scorer

xgb_model = XGBClassifier(max_depth=3,learning_rate=0.1, alpha=0, colsample_bytree = 0.5,
                     subsample=0.1,n_estimators=100,gamma=0)

param = {
    'iterations': Integer(10, 400),
    'max_depth': Integer(3, 8, 'uniform'),
    'learning_rate': Real(0.01, 0.3, 'log-uniform'),
    'alpha': Real(0, 10.0, 'uniform'),
    'colsample_bytree' : Real(0.5,1.0, 'uniform'),
    'subsample': Real(0.1, 1.0, 'uniform'),
    'n_estimators': Integer(100, 300, 'uniform'),
    'gamma': Real(0, 10.0, 'uniform')
}

LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

# log-uniform: understand as search over p = exp(x) by varying x
opt_x = BayesSearchCV(
    xgb_model,
    param,
    scoring = LogLoss,
    n_iter=40,
    cv=5,
    random_state=70
)

# executes bayesian optimization
opt_x.fit(df_bootstrapped_lgb, Y_lgb)