In [60]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.compose import make_column_transformer
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from lightgbm.sklearn import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

from sklearn.feature_selection import RFE

In [61]:
DIR = "../../data/"
SMOOTHIE = "Smoothie King/"

smoothie_demographic = pd.read_csv(DIR + SMOOTHIE + "processed_demographic.csv")
smoothie_stores = pd.read_csv(DIR + SMOOTHIE + "smoothie_king_stores.csv")
smoothie_poi_variables = pd.read_csv(DIR + SMOOTHIE + "processed_poi.csv")
# smoothie_sister = pd.read_csv(DIR + SMOOTHIE + "competition_sister_variables.csv")
smoothie_trade_area = pd.read_csv(DIR + SMOOTHIE + "processed_trade_area.csv").rename(columns={"store_num": "store"})

In [62]:
smoothie_merged = smoothie_stores.merge(
    smoothie_demographic, on="store", how="outer"
).merge(
    smoothie_poi_variables, on="store", how="outer"
).merge(
    smoothie_trade_area, on="store", how="outer"
)
smoothie_merged

Unnamed: 0,store,longitude,latitude,category,cbsa_name,dma_name,state_name,market_size,store_density,age0018_p_10mi,...,popgrfy_ta,popgrpy_ta,poverty_inpoverty_p_ta,spend_breakfastbrunch_ta,spend_dinner_ta,spend_foodbev_ta,spend_lunch_ta,wealth_hhavg_ta,wealth_hhtotal_ta,white_p_ta
0,SK 1504,-97.650392,30.519353,SHOPPING,"Austin-Round Rock, TX","Austin, TX",Texas,Large Metro (2),Light Suburban,0.2805,...,8.3789,3.9235,0.0611,7069439.0,40790484.0,230383651.0,23166216.0,240573.0,25223.0,0.4897
1,SK 0057,-88.171150,30.672501,SHOPPING,"Mobile, AL","Mobile et al, AL-FL",Alabama,Medium City (4),Light Suburban,0.2264,...,0.6017,0.3932,0.1830,4724526.0,25460067.0,160135521.0,14653701.0,217054.0,22216.0,0.5129
2,SK 1415,-90.535722,38.784250,HOME,"St. Louis, MO-IL","St. Louis, MO",Missouri,Very Large Metro (1),Light Suburban,0.2129,...,2.5003,0.7142,0.0639,4501211.0,24794631.0,151609187.0,14129014.0,245860.0,19907.0,0.8459
3,SK 1231,-80.134700,26.100737,TRAVEL,"Miami-Fort Lauderdale et al, FL","Miami-Ft. Lauderdale, FL",Florida,Very Large Metro (1),Suburban,0.2174,...,7.6482,12.7188,0.0831,4038906.0,23214366.0,146417939.0,12859709.0,255812.0,22124.0,0.7020
4,SK 1535,-96.856651,32.996408,WORK,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Light Suburban,0.2620,...,2.1871,-5.7794,0.0800,5919218.0,32751951.0,207616741.0,18696840.0,216763.0,29251.0,0.4593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791,SK 1536,-96.872596,32.647809,HOME,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Exurban,0.2664,...,2.0540,-0.2994,0.1833,7614977.0,40037412.0,264008408.0,23332500.0,199437.0,29430.0,0.1490
792,SK 1886,-105.077634,40.564695,OTHER,"Fort Collins, CO","Denver, CO",Colorado,Medium City (4),Light Suburban,0.2224,...,4.7691,2.3442,0.2202,5118356.0,28458285.0,197532602.0,16637181.0,216659.0,25712.0,0.7874
793,SK 0162,-95.478001,30.316531,SHOPPING,"Houston-The Woodlands et al, TX","Houston, TX",Texas,Very Large Metro (1),Exurban,0.2412,...,9.9822,8.1691,0.1202,4677280.0,25445615.0,161088758.0,14627532.0,216855.0,19407.0,0.5041
794,SK 1449,-78.968258,35.064994,SHOPPING,"Fayetteville, NC","Raleigh et al, NC",North Carolina,Medium City (4),Exurban,0.2614,...,-0.3816,-1.0761,0.1809,10318478.0,54577513.0,349542567.0,31683602.0,208448.0,41681.0,0.3933


In [63]:
train_df, test_df = train_test_split(smoothie_merged, test_size=0.1, random_state=42)
X_train = train_df.drop(columns=["category"])
y_train = train_df["category"]
X_test = test_df.drop(columns=["category"])
y_test = test_df["category"]

In [64]:
drop_features = ['store',
 'longitude',
 'latitude',
 'cbsa_name',
 'dma_name',
 'state_name',
 ]

ordinal_features_oth = [
    "market_size",
    "store_density",
]
ordering_ordinal_oth = [
    ["Very Large Metro (1)", "Large Metro (2)", "Large City (3)", "Medium City (4)", "Small City (5)", "Small Town (6)"],
    ["Rural", "Exurban", "Suburban", "Light Suburban", "Light Urban", "Urban", "Super Urban"],
]
numeric_features = list(set(smoothie_merged.select_dtypes(include=np.number).columns.tolist()) - {"longitude", "latitude"})

In [65]:
numeric_transformer = make_pipeline(
    SimpleImputer(strategy="median"), 
    StandardScaler()
)

ordinal_transformer_oth = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OrdinalEncoder(categories=ordering_ordinal_oth),
)

preprocessor = make_column_transformer(
    ("drop", drop_features),
    (numeric_transformer, numeric_features),
    (ordinal_transformer_oth, ordinal_features_oth),
)

In [66]:
preprocessor.fit(X_train)

In [67]:
column_names = (
    numeric_features
    + preprocessor.named_transformers_['pipeline-2']['ordinalencoder'].get_feature_names_out().tolist()
)
len(column_names)

920

In [86]:
transformed_X_train = preprocessor.transform(X_train)

In [89]:
transformed_X_train_df = pd.DataFrame(transformed_X_train, columns=column_names)
transformed_X_train_df

Unnamed: 0,spend_breakfastbrunch_ta,emp_accommodation_foodserv_p_3mi,hh_7pers_p_ta,hh_type_male_nochild_p_ta,hh_1vehicle_p_5mi,age0018_p_1mi,hh_type_1pers_p_10mi,hu_ownerocc_1mi,military_installations_2mi,medsalcy_3mi,...,emp_manfacturing_p_3mi,spend_lunch_3mi,inrix_dinner_ns,dtpop_students_9th_12th_p_1mi,hh_4vehicle_p_5mi,hh_type_male_child_p_1mi,emp_retail_trade_p_5mi,hh_type_fam_p_10mi,x0,x1
0,3.006608,-0.578451,-0.255387,-1.411746,-2.293129,-0.445402,-1.025068,-0.285635,-0.12555,0.479451,...,0.366232,-0.101906,0.488229,2.459765,1.252747,1.189833,-0.180202,0.623062,1.0,0.0
1,1.082548,-0.527091,-0.993526,-0.277228,0.427386,-0.169020,0.926991,1.572752,-0.12555,1.109466,...,-0.013909,0.699866,1.207545,-0.411251,-0.563990,-0.350107,-0.694044,-0.642321,0.0,3.0
2,-0.690315,-0.116207,1.053132,-1.075928,-1.771367,-0.042033,-2.235070,0.105572,-0.12555,0.197425,...,-0.287299,-0.132011,1.333107,0.999073,0.851812,0.416935,-1.089913,2.168023,0.0,1.0
3,0.045323,-0.013486,0.516304,0.811909,-1.020539,0.874261,-0.935003,0.332584,-0.12555,1.169469,...,0.353214,0.031376,-0.865277,-0.814200,1.046014,1.154701,-0.796288,0.979815,0.0,3.0
4,1.011306,-0.146167,-0.344859,0.258264,-1.070029,-1.184911,-0.696135,-0.336635,-0.12555,0.302638,...,-0.594537,0.007517,-0.165114,-0.411251,1.183836,-0.309120,-0.961452,0.735184,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,-0.973485,0.003634,-1.317860,-1.257451,1.277194,0.702456,1.056214,1.756228,-0.12555,0.746686,...,-0.521633,0.527092,-0.354165,-1.116412,-1.453564,-0.976622,-1.441213,-1.067513,0.0,2.0
712,0.224869,0.495839,-0.937607,0.212884,0.259121,-1.003146,0.578479,-0.512025,-0.12555,-1.196983,...,-0.607556,-0.385537,-0.902875,-0.663094,0.275468,-0.437936,1.128000,-0.206936,3.0,1.0
713,0.266487,-0.069126,1.008396,0.076742,-0.880554,1.616260,-1.959002,0.103706,-0.12555,0.805167,...,-0.081606,-0.016699,-0.956788,0.847967,0.306791,0.434501,0.278586,1.956883,0.0,1.0
714,-1.391657,-0.920854,-0.031709,-0.141086,1.183871,0.500771,0.453171,-0.129525,-0.12555,-0.154384,...,-1.091846,-0.409191,-0.480791,-0.461619,-0.620371,0.171014,-1.328482,-0.205480,2.0,1.0


In [90]:
import optuna
from optuna.integration import LightGBMPruningCallback

In [101]:
best_booster = None
lgbm = None

In [102]:
def objective(trial, X, y):
    global lgbm
    param_grid = {
        #         "device_type": trial.suggest_categorical("device_type", ['gpu']),
        "n_estimators": trial.suggest_categorical("n_estimators", [200]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 400, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 500, step=100),
        "max_bin": trial.suggest_int("max_bin", 200, 300),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

    cv_scores = np.empty(5)
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        # print(idx)
        # print(train_idx)
        # print(test_idx)
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        # print('X_train shape: ', X_train.shape)
        # print('X_test shape: ', X_test.shape)
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        # print('y_train shape: ', y_train.shape)
        # print('y_test shape: ', y_test.shape)

        lgbm = LGBMClassifier(objective="multiclass", **param_grid)
        lgbm.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="multi_logloss",
            early_stopping_rounds=100,
            callbacks=[
                LightGBMPruningCallback(trial, "multi_logloss")
            ],  # Add a pruning callback
        )
        preds = lgbm.predict_proba(X_test)
        cv_scores[idx] = log_loss(y_test, preds)

    return np.mean(cv_scores)

In [103]:
def callback(study, trial):
    global best_booster
    if study.best_trial == trial:
        best_booster = lgbm

In [104]:
study = optuna.create_study(direction="minimize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, transformed_X_train_df, y_train)
study.optimize(func, n_trials=100, callbacks=[callback])

[32m[I 2023-05-17 00:06:41,138][0m A new study created in memory with name: LGBM Classifier[0m




[32m[I 2023-05-17 00:06:41,441][0m Trial 0 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07463154424972845, 'num_leaves': 60, 'max_depth': 11, 'min_data_in_leaf': 300, 'max_bin': 247, 'lambda_l1': 80, 'lambda_l2': 50, 'min_gain_to_split': 0.05629586349575666, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:42,098][0m Trial 1 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.27593943765791296, 'num_leaves': 140, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 269, 'lambda_l1': 95, 'lambda_l2': 65, 'min_gain_to_split': 0.12748039993506322, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:42,403][0m Trial 2 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24882881836407544, 'num_leaves': 20, 'max_depth': 9, 'min_data_in_leaf': 300, 'max_bin': 290, 'lambda_l1': 80, 'lambda_l2': 80, 'min_gain_to_split': 10.332366814077044, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:42,711][0m Trial 3 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.27493119133277727, 'num_leaves': 340, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 299, 'lambda_l1': 55, 'lambda_l2': 70, 'min_gain_to_split': 8.554074277347079, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:43,011][0m Trial 4 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.05114862660512073, 'num_leaves': 40, 'max_depth': 10, 'min_data_in_leaf': 500, 'max_bin': 269, 'lambda_l1': 10, 'lambda_l2': 5, 'min_gain_to_split': 4.378055337830201, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:43,372][0m Trial 5 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.043076317296794335, 'num_leaves': 300, 'max_depth': 10, 'min_data_in_leaf': 400, 'max_bin': 273, 'lambda_l1': 55, 'lambda_l2': 50, 'min_gain_to_split': 11.83330215368848, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m












[32m[I 2023-05-17 00:06:44,061][0m Trial 6 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06944573894914238, 'num_leaves': 320, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 235, 'lambda_l1': 55, 'lambda_l2': 75, 'min_gain_to_split': 11.437904238114696, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:44,410][0m Trial 7 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.22734872411381885, 'num_leaves': 280, 'max_depth': 9, 'min_data_in_leaf': 300, 'max_bin': 217, 'lambda_l1': 5, 'lambda_l2': 30, 'min_gain_to_split': 8.51959361301691, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




















[32m[I 2023-05-17 00:06:45,578][0m Trial 8 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2661385284715809, 'num_leaves': 200, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 212, 'lambda_l1': 95, 'lambda_l2': 45, 'min_gain_to_split': 7.578952040836537, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:45,940][0m Trial 9 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29781521579748504, 'num_leaves': 200, 'max_depth': 6, 'min_data_in_leaf': 500, 'max_bin': 206, 'lambda_l1': 20, 'lambda_l2': 5, 'min_gain_to_split': 8.245879327511652, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:46,356][0m Trial 10 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1211946895350334, 'num_leaves': 100, 'max_depth': 4, 'min_data_in_leaf': 400, 'max_bin': 243, 'lambda_l1': 75, 'lambda_l2': 100, 'min_gain_to_split': 14.268216283299077, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m












[32m[I 2023-05-17 00:06:47,076][0m Trial 11 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.16532205875111677, 'num_leaves': 140, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 262, 'lambda_l1': 100, 'lambda_l2': 35, 'min_gain_to_split': 0.11296461508973474, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:47,480][0m Trial 12 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.19413021068404412, 'num_leaves': 100, 'max_depth': 12, 'min_data_in_leaf': 300, 'max_bin': 253, 'lambda_l1': 80, 'lambda_l2': 60, 'min_gain_to_split': 0.07999166264188799, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m












[32m[I 2023-05-17 00:06:48,213][0m Trial 13 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.11042829440457322, 'num_leaves': 140, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 230, 'lambda_l1': 75, 'lambda_l2': 95, 'min_gain_to_split': 2.6160470328150396, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:48,628][0m Trial 14 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.016491087062291382, 'num_leaves': 80, 'max_depth': 11, 'min_data_in_leaf': 300, 'max_bin': 281, 'lambda_l1': 35, 'lambda_l2': 25, 'min_gain_to_split': 4.24386962936142, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:49,004][0m Trial 15 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20700357532633312, 'num_leaves': 240, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 256, 'lambda_l1': 90, 'lambda_l2': 65, 'min_gain_to_split': 1.757651456572022, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m












[32m[I 2023-05-17 00:06:49,756][0m Trial 16 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.15899544299212165, 'num_leaves': 160, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 244, 'lambda_l1': 65, 'lambda_l2': 85, 'min_gain_to_split': 5.1058600953066895, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:50,123][0m Trial 17 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.10144422982767773, 'num_leaves': 60, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 279, 'lambda_l1': 35, 'lambda_l2': 55, 'min_gain_to_split': 1.7304815693115296, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




















[32m[I 2023-05-17 00:06:51,361][0m Trial 18 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1403118414836315, 'num_leaves': 400, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 227, 'lambda_l1': 90, 'lambda_l2': 20, 'min_gain_to_split': 5.897094475091821, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:51,776][0m Trial 19 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1847189283127838, 'num_leaves': 160, 'max_depth': 11, 'min_data_in_leaf': 300, 'max_bin': 264, 'lambda_l1': 65, 'lambda_l2': 45, 'min_gain_to_split': 2.837307484330975, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:52,195][0m Trial 20 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2376219915361109, 'num_leaves': 20, 'max_depth': 9, 'min_data_in_leaf': 400, 'max_bin': 246, 'lambda_l1': 100, 'lambda_l2': 85, 'min_gain_to_split': 0.10396523361502139, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:52,573][0m Trial 21 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24345497765747132, 'num_leaves': 40, 'max_depth': 9, 'min_data_in_leaf': 300, 'max_bin': 293, 'lambda_l1': 85, 'lambda_l2': 80, 'min_gain_to_split': 6.639446736193989, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 00:06:52,954][0m Trial 22 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29378248531342366, 'num_leaves': 20, 'max_depth': 11, 'min_data_in_leaf': 300, 'max_bin': 288, 'lambda_l1': 70, 'lambda_l2': 70, 'min_gain_to_split': 3.5089025291145663, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 00:06:53,339][0m Trial 23 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.25502729966893045, 'num_leaves': 100, 'max_depth': 8, 'min_data_in_leaf': 300, 'max_bin': 284, 'lambda_l1': 80, 'lambda_l2': 90, 'min_gain_to_split': 1.2637912486247327, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m




















[32m[I 2023-05-17 00:06:55,350][0m Trial 24 finished with value: 1.455207988082923 and parameters: {'n_estimators': 200, 'learning_rate': 0.22353658680604294, 'num_leaves': 60, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 300, 'lambda_l1': 40, 'lambda_l2': 60, 'min_gain_to_split': 3.0111960961327013, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 24 with value: 1.455207988082923.[0m




















[32m[I 2023-05-17 00:06:57,598][0m Trial 25 finished with value: 1.447877878130811 and parameters: {'n_estimators': 200, 'learning_rate': 0.22656800972790245, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 300, 'lambda_l1': 40, 'lambda_l2': 60, 'min_gain_to_split': 1.3048200358871256, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 25 with value: 1.447877878130811.[0m




















[32m[I 2023-05-17 00:06:59,471][0m Trial 26 finished with value: 1.4513387961825777 and parameters: {'n_estimators': 200, 'learning_rate': 0.2134848136984868, 'num_leaves': 80, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 300, 'lambda_l1': 40, 'lambda_l2': 40, 'min_gain_to_split': 3.1564193283883726, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 25 with value: 1.447877878130811.[0m




[32m[I 2023-05-17 00:06:59,761][0m Trial 27 pruned. Trial was pruned at iteration 115.[0m
[32m[I 2023-05-17 00:06:59,842][0m Trial 28 pruned. Trial was pruned at iteration 0.[0m
















[32m[I 2023-05-17 00:07:02,074][0m Trial 29 finished with value: 1.4443629952414048 and parameters: {'n_estimators': 200, 'learning_rate': 0.21632618959763175, 'num_leaves': 60, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 277, 'lambda_l1': 40, 'lambda_l2': 45, 'min_gain_to_split': 1.9752780528754583, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 29 with value: 1.4443629952414048.[0m




[32m[I 2023-05-17 00:07:02,350][0m Trial 30 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2023-05-17 00:07:02,418][0m Trial 31 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2023-05-17 00:07:02,681][0m Trial 32 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2023-05-17 00:07:02,749][0m Trial 33 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2023-05-17 00:07:02,818][0m Trial 34 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2023-05-17 00:07:03,099][0m Trial 35 pruned. Trial was pruned at iteration 101.[0m








[32m[I 2023-05-17 00:07:04,096][0m Trial 36 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24088514159023472, 'num_leaves': 60, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 287, 'lambda_l1': 50, 'lambda_l2': 60, 'min_gain_to_split': 2.3644166079379487, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 29 with value: 1.4443629952414048.[0m




















[32m[I 2023-05-17 00:07:06,320][0m Trial 37 finished with value: 1.4468215326703362 and parameters: {'n_estimators': 200, 'learning_rate': 0.19638853785781463, 'num_leaves': 120, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 300, 'lambda_l1': 35, 'lambda_l2': 45, 'min_gain_to_split': 3.92265780146684, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 29 with value: 1.4443629952414048.[0m












[32m[I 2023-05-17 00:07:07,155][0m Trial 38 pruned. Trial was pruned at iteration 101.[0m
















[32m[I 2023-05-17 00:07:09,499][0m Trial 39 finished with value: 1.4302233419305235 and parameters: {'n_estimators': 200, 'learning_rate': 0.2665204950154079, 'num_leaves': 180, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 274, 'lambda_l1': 30, 'lambda_l2': 15, 'min_gain_to_split': 4.718376367265989, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 39 with value: 1.4302233419305235.[0m




[32m[I 2023-05-17 00:07:09,879][0m Trial 40 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2736005117290489, 'num_leaves': 240, 'max_depth': 12, 'min_data_in_leaf': 300, 'max_bin': 273, 'lambda_l1': 30, 'lambda_l2': 5, 'min_gain_to_split': 4.3844837847424705, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 39 with value: 1.4302233419305235.[0m




















[32m[I 2023-05-17 00:07:12,207][0m Trial 41 finished with value: 1.4342512434377752 and parameters: {'n_estimators': 200, 'learning_rate': 0.2378072479826459, 'num_leaves': 160, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 264, 'lambda_l1': 35, 'lambda_l2': 15, 'min_gain_to_split': 1.9706508156596731, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 39 with value: 1.4302233419305235.[0m




















[32m[I 2023-05-17 00:07:14,788][0m Trial 42 finished with value: 1.249397211472323 and parameters: {'n_estimators': 200, 'learning_rate': 0.2642970892641992, 'num_leaves': 180, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 265, 'lambda_l1': 0, 'lambda_l2': 15, 'min_gain_to_split': 1.9436790687521488, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:17,274][0m Trial 43 finished with value: 1.2715736319905109 and parameters: {'n_estimators': 200, 'learning_rate': 0.2587532132427031, 'num_leaves': 180, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 261, 'lambda_l1': 5, 'lambda_l2': 10, 'min_gain_to_split': 2.0959686348045095, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:17,650][0m Trial 44 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2626411522518104, 'num_leaves': 220, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 264, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 1.8828168078759342, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:20,511][0m Trial 45 finished with value: 1.5338322471481605 and parameters: {'n_estimators': 200, 'learning_rate': 0.27850266757332515, 'num_leaves': 180, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 259, 'lambda_l1': 10, 'lambda_l2': 15, 'min_gain_to_split': 0.758361578401912, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:23,031][0m Trial 46 finished with value: 1.2529835930759348 and parameters: {'n_estimators': 200, 'learning_rate': 0.28571598168752566, 'num_leaves': 180, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 268, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 2.1620841119388854, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:23,398][0m Trial 47 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28521770810153463, 'num_leaves': 180, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 268, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 2.6237823248403775, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:25,544][0m Trial 48 finished with value: 1.3901485011220585 and parameters: {'n_estimators': 200, 'learning_rate': 0.27155435399642186, 'num_leaves': 220, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 250, 'lambda_l1': 10, 'lambda_l2': 0, 'min_gain_to_split': 0.7451588814452084, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:27,753][0m Trial 49 finished with value: 1.3789029930558836 and parameters: {'n_estimators': 200, 'learning_rate': 0.2995297129630382, 'num_leaves': 280, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 251, 'lambda_l1': 10, 'lambda_l2': 10, 'min_gain_to_split': 0.5740279305551583, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:28,152][0m Trial 50 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29467052903010826, 'num_leaves': 280, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 252, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 0.6504302555223898, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:30,329][0m Trial 51 finished with value: 1.3883746382805306 and parameters: {'n_estimators': 200, 'learning_rate': 0.26610913866026925, 'num_leaves': 220, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 237, 'lambda_l1': 10, 'lambda_l2': 0, 'min_gain_to_split': 0.859490006917724, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m








[32m[I 2023-05-17 00:07:31,209][0m Trial 52 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28397547308374127, 'num_leaves': 220, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 239, 'lambda_l1': 10, 'lambda_l2': 0, 'min_gain_to_split': 0.521797462433008, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:33,416][0m Trial 53 finished with value: 1.3852136209148176 and parameters: {'n_estimators': 200, 'learning_rate': 0.2703517504038666, 'num_leaves': 260, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 15, 'lambda_l2': 0, 'min_gain_to_split': 0.12661413832643653, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:36,105][0m Trial 54 finished with value: 1.3803548487507558 and parameters: {'n_estimators': 200, 'learning_rate': 0.2976606065078326, 'num_leaves': 260, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 237, 'lambda_l1': 5, 'lambda_l2': 10, 'min_gain_to_split': 0.2057539825882061, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m








[32m[I 2023-05-17 00:07:36,958][0m Trial 55 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2964864263834026, 'num_leaves': 280, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 256, 'lambda_l1': 5, 'lambda_l2': 25, 'min_gain_to_split': 0.13751241686755655, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:37,220][0m Trial 56 pruned. Trial was pruned at iteration 101.[0m








[32m[I 2023-05-17 00:07:37,999][0m Trial 57 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29809653276994813, 'num_leaves': 300, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 240, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 0.13117665788739363, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:38,400][0m Trial 58 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2798707188603234, 'num_leaves': 260, 'max_depth': 8, 'min_data_in_leaf': 300, 'max_bin': 220, 'lambda_l1': 15, 'lambda_l2': 25, 'min_gain_to_split': 1.3320737121050925, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m








[32m[I 2023-05-17 00:07:38,887][0m Trial 59 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.256859287862353, 'num_leaves': 260, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 233, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 2.28843295948961, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m












[32m[I 2023-05-17 00:07:39,752][0m Trial 60 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24932875329314594, 'num_leaves': 360, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 268, 'lambda_l1': 20, 'lambda_l2': 10, 'min_gain_to_split': 2.975111478535675, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:42,035][0m Trial 61 finished with value: 1.3870363928430387 and parameters: {'n_estimators': 200, 'learning_rate': 0.27140565601947486, 'num_leaves': 260, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 0, 'lambda_l2': 0, 'min_gain_to_split': 1.2843044670639365, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:44,391][0m Trial 62 finished with value: 1.4097413908205532 and parameters: {'n_estimators': 200, 'learning_rate': 0.2751802877529294, 'num_leaves': 200, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 0, 'lambda_l2': 0, 'min_gain_to_split': 1.5882926360551384, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m








[32m[I 2023-05-17 00:07:45,333][0m Trial 63 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2827070566353426, 'num_leaves': 260, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 259, 'lambda_l1': 5, 'lambda_l2': 15, 'min_gain_to_split': 0.02880214868018313, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m








[32m[I 2023-05-17 00:07:45,774][0m Trial 64 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2619356167895425, 'num_leaves': 300, 'max_depth': 9, 'min_data_in_leaf': 400, 'max_bin': 243, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 1.1576436894436015, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:45,995][0m Trial 65 pruned. Trial was pruned at iteration 101.[0m








[32m[I 2023-05-17 00:07:46,778][0m Trial 66 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2896273082722914, 'num_leaves': 320, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 259, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 0.38610292125487167, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m
[32m[I 2023-05-17 00:07:46,860][0m Trial 67 pruned. Trial was pruned at iteration 0.[0m












[32m[I 2023-05-17 00:07:47,865][0m Trial 68 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.27275638743610686, 'num_leaves': 280, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 243, 'lambda_l1': 10, 'lambda_l2': 30, 'min_gain_to_split': 2.620423766008229, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m
























[32m[I 2023-05-17 00:07:50,339][0m Trial 69 finished with value: 1.3775029909972676 and parameters: {'n_estimators': 200, 'learning_rate': 0.29141512391193986, 'num_leaves': 160, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 250, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 0.5748883453612956, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:07:50,796][0m Trial 70 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2889492486386413, 'num_leaves': 160, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 261, 'lambda_l1': 20, 'lambda_l2': 15, 'min_gain_to_split': 0.45720455954433575, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:52,976][0m Trial 71 finished with value: 1.3917024695299431 and parameters: {'n_estimators': 200, 'learning_rate': 0.27726059953757576, 'num_leaves': 140, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 250, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 1.1057072298015433, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:55,249][0m Trial 72 finished with value: 1.3810550822406715 and parameters: {'n_estimators': 200, 'learning_rate': 0.28950533730889005, 'num_leaves': 140, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 256, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 1.6470605579312165, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:07:57,796][0m Trial 73 finished with value: 1.3295855355117656 and parameters: {'n_estimators': 200, 'learning_rate': 0.2997956908194161, 'num_leaves': 140, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 256, 'lambda_l1': 5, 'lambda_l2': 10, 'min_gain_to_split': 2.027015685775273, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:00,506][0m Trial 74 finished with value: 1.3276584053837635 and parameters: {'n_estimators': 200, 'learning_rate': 0.2913336016029288, 'num_leaves': 140, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 255, 'lambda_l1': 5, 'lambda_l2': 10, 'min_gain_to_split': 2.200643117664521, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:08:00,887][0m Trial 75 pruned. Trial was pruned at iteration 133.[0m
[32m[I 2023-05-17 00:08:00,992][0m Trial 76 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2023-05-17 00:08:01,319][0m Trial 77 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 00:08:01,783][0m Trial 78 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2586303477929015, 'num_leaves': 200, 'max_depth': 9, 'min_data_in_leaf': 400, 'max_bin': 261, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 1.8548916530309572, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m
[32m[I 2023-05-17 00:08:01,861][0m Trial 79 pruned. Trial was pruned at iteration 0.[0m




















[32m[I 2023-05-17 00:08:04,171][0m Trial 80 finished with value: 1.2642506864910508 and parameters: {'n_estimators': 200, 'learning_rate': 0.29158535839014454, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 266, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 2.05392669772488, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:06,661][0m Trial 81 finished with value: 1.2647952186833649 and parameters: {'n_estimators': 200, 'learning_rate': 0.2913905329887974, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 200, 'max_bin': 266, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 2.2853596946057055, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:08:06,981][0m Trial 82 pruned. Trial was pruned at iteration 101.[0m
















[32m[I 2023-05-17 00:08:09,661][0m Trial 83 finished with value: 1.2509294836221705 and parameters: {'n_estimators': 200, 'learning_rate': 0.28065673691648224, 'num_leaves': 140, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 271, 'lambda_l1': 0, 'lambda_l2': 15, 'min_gain_to_split': 2.600547519553177, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:12,243][0m Trial 84 finished with value: 1.255553196107148 and parameters: {'n_estimators': 200, 'learning_rate': 0.2825905007808819, 'num_leaves': 140, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 271, 'lambda_l1': 0, 'lambda_l2': 15, 'min_gain_to_split': 2.6174188998527184, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:14,752][0m Trial 85 finished with value: 1.2591827733481664 and parameters: {'n_estimators': 200, 'learning_rate': 0.26598053501938335, 'num_leaves': 140, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 281, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 2.5204097975906468, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:17,063][0m Trial 86 finished with value: 1.2609629130735986 and parameters: {'n_estimators': 200, 'learning_rate': 0.28004638304284163, 'num_leaves': 100, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 276, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 3.6346584623897757, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:08:17,413][0m Trial 87 pruned. Trial was pruned at iteration 133.[0m
















[32m[I 2023-05-17 00:08:20,104][0m Trial 88 finished with value: 1.256378834104181 and parameters: {'n_estimators': 200, 'learning_rate': 0.25351228891192096, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 276, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 2.694182351111449, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:23,287][0m Trial 89 finished with value: 1.2533539194637175 and parameters: {'n_estimators': 200, 'learning_rate': 0.2319757087169976, 'num_leaves': 80, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 276, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 2.6651355428318593, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:08:23,739][0m Trial 90 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24709630551295517, 'num_leaves': 80, 'max_depth': 12, 'min_data_in_leaf': 300, 'max_bin': 276, 'lambda_l1': 0, 'lambda_l2': 25, 'min_gain_to_split': 2.789516256412657, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m
























[32m[I 2023-05-17 00:08:26,807][0m Trial 91 finished with value: 1.2587621184571973 and parameters: {'n_estimators': 200, 'learning_rate': 0.2663755007934507, 'num_leaves': 100, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 282, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 2.510923250148272, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:29,905][0m Trial 92 finished with value: 1.2585034897213339 and parameters: {'n_estimators': 200, 'learning_rate': 0.2528126585836306, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 283, 'lambda_l1': 0, 'lambda_l2': 30, 'min_gain_to_split': 3.2162279717943214, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:33,040][0m Trial 93 finished with value: 1.2571553701280753 and parameters: {'n_estimators': 200, 'learning_rate': 0.2537863405505069, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 283, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 3.2816473325720983, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




[32m[I 2023-05-17 00:08:33,577][0m Trial 94 pruned. Trial was pruned at iteration 141.[0m
















[32m[I 2023-05-17 00:08:36,698][0m Trial 95 finished with value: 1.2564426845746668 and parameters: {'n_estimators': 200, 'learning_rate': 0.2534427060287845, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 280, 'lambda_l1': 0, 'lambda_l2': 25, 'min_gain_to_split': 2.5980522399387644, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:39,470][0m Trial 96 finished with value: 1.264356875555785 and parameters: {'n_estimators': 200, 'learning_rate': 0.2533892136793229, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 285, 'lambda_l1': 0, 'lambda_l2': 25, 'min_gain_to_split': 4.001669160471103, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:42,690][0m Trial 97 finished with value: 1.2557480389136333 and parameters: {'n_estimators': 200, 'learning_rate': 0.23314824075994434, 'num_leaves': 80, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 279, 'lambda_l1': 0, 'lambda_l2': 30, 'min_gain_to_split': 3.167833037563834, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m




















[32m[I 2023-05-17 00:08:45,662][0m Trial 98 finished with value: 1.2578980594006655 and parameters: {'n_estimators': 200, 'learning_rate': 0.23251440476100999, 'num_leaves': 120, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 271, 'lambda_l1': 0, 'lambda_l2': 30, 'min_gain_to_split': 3.072654914209787, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 42 with value: 1.249397211472323.[0m
[32m[I 2023-05-17 00:08:45,729][0m Trial 99 pruned. Trial was pruned at iteration 0.[0m




In [106]:
print(f"\tBest value (multi_logloss): {study.best_value:.5f}")
print(f"\tBest params:")

for key, value in study.best_params.items():
    print(f"\t\t{key}: {value}")

	Best value (multi_logloss): 1.24940
	Best params:
		n_estimators: 200
		learning_rate: 0.2642970892641992
		num_leaves: 180
		max_depth: 11
		min_data_in_leaf: 200
		max_bin: 265
		lambda_l1: 0
		lambda_l2: 15
		min_gain_to_split: 1.9436790687521488
		bagging_fraction: 0.9
		bagging_freq: 1
		feature_fraction: 0.9


In [110]:
best_booster

In [None]:
def evaluate_models(X_train, y_train,
		            X_test, y_test,
		    		rfe_estimator, 
		    		rfe_num_features_start,  rfe_num_features_end, rfe_num_features_step, 
				    pipe_model,
					random_state=42):
	
	for i in range(rfe_num_features_start, rfe_num_features_end+1, rfe_num_features_step):
		rfe = RFE(estimator=rfe_estimator, n_features_to_select=i)
		pipe_rfe_model = make_pipeline(preprocessor, 
			                           rfe, 
				                       pipe_model)
		
		pipe_rfe_model.fit(X_train, y_train)
		
		print('---- RFE with ', i, ' features selected ----')
		print(pipe_rfe_model.score(X_train, y_train))
		print(pipe_rfe_model.score(X_test, y_test))	