In [131]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.compose import make_column_transformer
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from lightgbm.sklearn import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

from sklearn.feature_selection import RFE

In [132]:
DIR = "../../data/"
SMOOTHIE = "Smoothie King/"

smoothie_demographic = pd.read_csv(DIR + SMOOTHIE + "processed_demographic.csv")
smoothie_stores = pd.read_csv(DIR + SMOOTHIE + "smoothie_king_stores.csv")
smoothie_poi_variables = pd.read_csv(DIR + SMOOTHIE + "processed_poi.csv")
# smoothie_sister = pd.read_csv(DIR + SMOOTHIE + "competition_sister_variables.csv")
smoothie_trade_area = pd.read_csv(DIR + SMOOTHIE + "processed_trade_area.csv").rename(columns={"store_num": "store"})

In [133]:
smoothie_merged = smoothie_stores.merge(
    smoothie_demographic, on="store", how="outer"
).merge(
    smoothie_poi_variables, on="store", how="outer"
).merge(
    smoothie_trade_area, on="store", how="outer"
)
smoothie_merged

Unnamed: 0,store,longitude,latitude,category,cbsa_name,dma_name,state_name,market_size,store_density,age0018_p_10mi,...,popgrfy_ta,popgrpy_ta,poverty_inpoverty_p_ta,spend_breakfastbrunch_ta,spend_dinner_ta,spend_foodbev_ta,spend_lunch_ta,wealth_hhavg_ta,wealth_hhtotal_ta,white_p_ta
0,SK 1504,-97.650392,30.519353,SHOPPING,"Austin-Round Rock, TX","Austin, TX",Texas,Large Metro (2),Light Suburban,0.2805,...,8.3789,3.9235,0.0611,7069439.0,40790484.0,230383651.0,23166216.0,240573.0,25223.0,0.4897
1,SK 0057,-88.171150,30.672501,SHOPPING,"Mobile, AL","Mobile et al, AL-FL",Alabama,Medium City (4),Light Suburban,0.2264,...,0.6017,0.3932,0.1830,4724526.0,25460067.0,160135521.0,14653701.0,217054.0,22216.0,0.5129
2,SK 1415,-90.535722,38.784250,HOME,"St. Louis, MO-IL","St. Louis, MO",Missouri,Very Large Metro (1),Light Suburban,0.2129,...,2.5003,0.7142,0.0639,4501211.0,24794631.0,151609187.0,14129014.0,245860.0,19907.0,0.8459
3,SK 1231,-80.134700,26.100737,TRAVEL,"Miami-Fort Lauderdale et al, FL","Miami-Ft. Lauderdale, FL",Florida,Very Large Metro (1),Suburban,0.2174,...,7.6482,12.7188,0.0831,4038906.0,23214366.0,146417939.0,12859709.0,255812.0,22124.0,0.7020
4,SK 1535,-96.856651,32.996408,WORK,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Light Suburban,0.2620,...,2.1871,-5.7794,0.0800,5919218.0,32751951.0,207616741.0,18696840.0,216763.0,29251.0,0.4593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791,SK 1536,-96.872596,32.647809,HOME,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Exurban,0.2664,...,2.0540,-0.2994,0.1833,7614977.0,40037412.0,264008408.0,23332500.0,199437.0,29430.0,0.1490
792,SK 1886,-105.077634,40.564695,OTHER,"Fort Collins, CO","Denver, CO",Colorado,Medium City (4),Light Suburban,0.2224,...,4.7691,2.3442,0.2202,5118356.0,28458285.0,197532602.0,16637181.0,216659.0,25712.0,0.7874
793,SK 0162,-95.478001,30.316531,SHOPPING,"Houston-The Woodlands et al, TX","Houston, TX",Texas,Very Large Metro (1),Exurban,0.2412,...,9.9822,8.1691,0.1202,4677280.0,25445615.0,161088758.0,14627532.0,216855.0,19407.0,0.5041
794,SK 1449,-78.968258,35.064994,SHOPPING,"Fayetteville, NC","Raleigh et al, NC",North Carolina,Medium City (4),Exurban,0.2614,...,-0.3816,-1.0761,0.1809,10318478.0,54577513.0,349542567.0,31683602.0,208448.0,41681.0,0.3933


In [134]:
train_df, test_df = train_test_split(smoothie_merged, test_size=0.1, random_state=42)
X_train = train_df.drop(columns=["category"])
y_train = train_df["category"]
X_test = test_df.drop(columns=["category"])
y_test = test_df["category"]

In [135]:
drop_features = ['store',
 'longitude',
 'latitude',
 'cbsa_name',
 'dma_name',
 'state_name',
 ]

ordinal_features_oth = [
    "market_size",
    "store_density",
]
ordering_ordinal_oth = [
    ["Very Large Metro (1)", "Large Metro (2)", "Large City (3)", "Medium City (4)", "Small City (5)", "Small Town (6)"],
    ["Rural", "Exurban", "Suburban", "Light Suburban", "Light Urban", "Urban", "Super Urban"],
]
numeric_features = list(set(smoothie_merged.select_dtypes(include=np.number).columns.tolist()) - {"longitude", "latitude"})

In [136]:
numeric_transformer = make_pipeline(
    SimpleImputer(strategy="median"), 
    StandardScaler()
)

ordinal_transformer_oth = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OrdinalEncoder(categories=ordering_ordinal_oth),
)

preprocessor = make_column_transformer(
    ("drop", drop_features),
    (numeric_transformer, numeric_features),
    (ordinal_transformer_oth, ordinal_features_oth),
)

In [137]:
preprocessor.fit(X_train)

In [138]:
column_names = (
    numeric_features
    + preprocessor.named_transformers_['pipeline-2']['ordinalencoder'].get_feature_names_out().tolist()
)
len(column_names)

920

In [139]:
transformed_X_train = preprocessor.transform(X_train)

In [140]:
transformed_X_train_df = pd.DataFrame(transformed_X_train, columns=column_names)
transformed_X_train_df

Unnamed: 0,spend_breakfastbrunch_ta,emp_accommodation_foodserv_p_3mi,hh_7pers_p_ta,hh_type_male_nochild_p_ta,hh_1vehicle_p_5mi,age0018_p_1mi,hh_type_1pers_p_10mi,hu_ownerocc_1mi,military_installations_2mi,medsalcy_3mi,...,emp_manfacturing_p_3mi,spend_lunch_3mi,inrix_dinner_ns,dtpop_students_9th_12th_p_1mi,hh_4vehicle_p_5mi,hh_type_male_child_p_1mi,emp_retail_trade_p_5mi,hh_type_fam_p_10mi,x0,x1
0,3.006608,-0.578451,-0.255387,-1.411746,-2.293129,-0.445402,-1.025068,-0.285635,-0.12555,0.479451,...,0.366232,-0.101906,0.488229,2.459765,1.252747,1.189833,-0.180202,0.623062,1.0,0.0
1,1.082548,-0.527091,-0.993526,-0.277228,0.427386,-0.169020,0.926991,1.572752,-0.12555,1.109466,...,-0.013909,0.699866,1.207545,-0.411251,-0.563990,-0.350107,-0.694044,-0.642321,0.0,3.0
2,-0.690315,-0.116207,1.053132,-1.075928,-1.771367,-0.042033,-2.235070,0.105572,-0.12555,0.197425,...,-0.287299,-0.132011,1.333107,0.999073,0.851812,0.416935,-1.089913,2.168023,0.0,1.0
3,0.045323,-0.013486,0.516304,0.811909,-1.020539,0.874261,-0.935003,0.332584,-0.12555,1.169469,...,0.353214,0.031376,-0.865277,-0.814200,1.046014,1.154701,-0.796288,0.979815,0.0,3.0
4,1.011306,-0.146167,-0.344859,0.258264,-1.070029,-1.184911,-0.696135,-0.336635,-0.12555,0.302638,...,-0.594537,0.007517,-0.165114,-0.411251,1.183836,-0.309120,-0.961452,0.735184,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,-0.973485,0.003634,-1.317860,-1.257451,1.277194,0.702456,1.056214,1.756228,-0.12555,0.746686,...,-0.521633,0.527092,-0.354165,-1.116412,-1.453564,-0.976622,-1.441213,-1.067513,0.0,2.0
712,0.224869,0.495839,-0.937607,0.212884,0.259121,-1.003146,0.578479,-0.512025,-0.12555,-1.196983,...,-0.607556,-0.385537,-0.902875,-0.663094,0.275468,-0.437936,1.128000,-0.206936,3.0,1.0
713,0.266487,-0.069126,1.008396,0.076742,-0.880554,1.616260,-1.959002,0.103706,-0.12555,0.805167,...,-0.081606,-0.016699,-0.956788,0.847967,0.306791,0.434501,0.278586,1.956883,0.0,1.0
714,-1.391657,-0.920854,-0.031709,-0.141086,1.183871,0.500771,0.453171,-0.129525,-0.12555,-0.154384,...,-1.091846,-0.409191,-0.480791,-0.461619,-0.620371,0.171014,-1.328482,-0.205480,2.0,1.0


In [141]:
import optuna
from optuna.integration import LightGBMPruningCallback

In [142]:
best_booster = None
lgbm = None

In [143]:
def objective(trial, X, y):
    global lgbm
    param_grid = {
        #         "device_type": trial.suggest_categorical("device_type", ['gpu']),
        "n_estimators": trial.suggest_categorical("n_estimators", [200]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 400, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 500, step=100),
        "max_bin": trial.suggest_int("max_bin", 200, 300),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

    cv_scores = np.empty(5)
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        # print(idx)
        # print(train_idx)
        # print(test_idx)
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        # print('X_train shape: ', X_train.shape)
        # print('X_test shape: ', X_test.shape)
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        # print('y_train shape: ', y_train.shape)
        # print('y_test shape: ', y_test.shape)

        lgbm = LGBMClassifier(objective="multiclass", **param_grid)
        lgbm.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="multi_logloss",
            early_stopping_rounds=100,
            callbacks=[
                LightGBMPruningCallback(trial, "multi_logloss")
            ],  # Add a pruning callback
        )
        preds = lgbm.predict_proba(X_test)
        cv_scores[idx] = log_loss(y_test, preds)

    return np.mean(cv_scores)

In [144]:
def callback(study, trial):
    global best_booster
    if study.best_trial == trial:
        best_booster = lgbm

In [145]:
study = optuna.create_study(direction="minimize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, transformed_X_train_df, y_train)
study.optimize(func, n_trials=100, callbacks=[callback])

[32m[I 2023-05-17 01:06:43,867][0m A new study created in memory with name: LGBM Classifier[0m




[32m[I 2023-05-17 01:06:44,268][0m Trial 0 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2981906101103282, 'num_leaves': 320, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 248, 'lambda_l1': 35, 'lambda_l2': 10, 'min_gain_to_split': 13.713612872028758, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 01:06:44,609][0m Trial 1 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.19668066989960853, 'num_leaves': 320, 'max_depth': 12, 'min_data_in_leaf': 400, 'max_bin': 209, 'lambda_l1': 10, 'lambda_l2': 50, 'min_gain_to_split': 2.8292802740206504, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 01:06:44,935][0m Trial 2 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.14185224033660476, 'num_leaves': 160, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 270, 'lambda_l1': 100, 'lambda_l2': 55, 'min_gain_to_split': 9.97914814951265, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 01:06:45,263][0m Trial 3 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24955314628151598, 'num_leaves': 340, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 261, 'lambda_l1': 90, 'lambda_l2': 5, 'min_gain_to_split': 10.367776356124201, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 01:06:45,596][0m Trial 4 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.17236994117906504, 'num_leaves': 320, 'max_depth': 4, 'min_data_in_leaf': 400, 'max_bin': 210, 'lambda_l1': 40, 'lambda_l2': 65, 'min_gain_to_split': 7.978288145468277, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m












[32m[I 2023-05-17 01:06:46,407][0m Trial 5 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.22058223391218157, 'num_leaves': 300, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 263, 'lambda_l1': 25, 'lambda_l2': 20, 'min_gain_to_split': 12.885246425298075, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 01:06:46,788][0m Trial 6 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1146922000014254, 'num_leaves': 360, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 300, 'lambda_l1': 10, 'lambda_l2': 75, 'min_gain_to_split': 8.873975192624654, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 01:06:47,167][0m Trial 7 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29893060855527015, 'num_leaves': 400, 'max_depth': 9, 'min_data_in_leaf': 500, 'max_bin': 218, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 14.91709471279361, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 01:06:47,536][0m Trial 8 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.09997204698359101, 'num_leaves': 340, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 223, 'lambda_l1': 55, 'lambda_l2': 30, 'min_gain_to_split': 3.101332464667779, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m








[32m[I 2023-05-17 01:06:47,870][0m Trial 9 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.047273916430556596, 'num_leaves': 260, 'max_depth': 9, 'min_data_in_leaf': 500, 'max_bin': 240, 'lambda_l1': 15, 'lambda_l2': 5, 'min_gain_to_split': 5.376004521596055, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




















[32m[I 2023-05-17 01:06:49,730][0m Trial 10 finished with value: 1.4595932755857146 and parameters: {'n_estimators': 200, 'learning_rate': 0.29403473659982116, 'num_leaves': 20, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 286, 'lambda_l1': 65, 'lambda_l2': 35, 'min_gain_to_split': 0.18913594983057536, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 10 with value: 1.4595932755857146.[0m




[32m[I 2023-05-17 01:06:49,960][0m Trial 11 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 01:06:50,367][0m Trial 12 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24906370989649076, 'num_leaves': 140, 'max_depth': 6, 'min_data_in_leaf': 300, 'max_bin': 238, 'lambda_l1': 75, 'lambda_l2': 30, 'min_gain_to_split': 5.874185991381581, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 10 with value: 1.4595932755857146.[0m








[32m[I 2023-05-17 01:06:50,820][0m Trial 13 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2747684209404674, 'num_leaves': 20, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 280, 'lambda_l1': 40, 'lambda_l2': 35, 'min_gain_to_split': 11.813854480651635, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 10 with value: 1.4595932755857146.[0m




















[32m[I 2023-05-17 01:06:52,723][0m Trial 14 finished with value: 1.4512192310455259 and parameters: {'n_estimators': 200, 'learning_rate': 0.23194425799968763, 'num_leaves': 100, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 35, 'lambda_l2': 20, 'min_gain_to_split': 6.695065109567382, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:53,041][0m Trial 15 pruned. Trial was pruned at iteration 131.[0m
















[32m[I 2023-05-17 01:06:54,339][0m Trial 16 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.25973016016068734, 'num_leaves': 80, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 251, 'lambda_l1': 80, 'lambda_l2': 20, 'min_gain_to_split': 6.229949027383812, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:54,725][0m Trial 17 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.21613515127363492, 'num_leaves': 80, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 281, 'lambda_l1': 65, 'lambda_l2': 80, 'min_gain_to_split': 4.205778324978674, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:54,796][0m Trial 18 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2023-05-17 01:06:55,064][0m Trial 19 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 01:06:55,432][0m Trial 20 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.23520314826685074, 'num_leaves': 40, 'max_depth': 9, 'min_data_in_leaf': 300, 'max_bin': 255, 'lambda_l1': 50, 'lambda_l2': 60, 'min_gain_to_split': 7.117060511713066, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:55,810][0m Trial 21 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.299807311828118, 'num_leaves': 220, 'max_depth': 7, 'min_data_in_leaf': 400, 'max_bin': 245, 'lambda_l1': 35, 'lambda_l2': 15, 'min_gain_to_split': 7.595061023018954, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:56,202][0m Trial 22 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2740763735077565, 'num_leaves': 200, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 230, 'lambda_l1': 25, 'lambda_l2': 0, 'min_gain_to_split': 4.6056285463957645, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:56,629][0m Trial 23 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2731495716033664, 'num_leaves': 60, 'max_depth': 6, 'min_data_in_leaf': 300, 'max_bin': 248, 'lambda_l1': 45, 'lambda_l2': 15, 'min_gain_to_split': 9.072823658279997, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:56,891][0m Trial 24 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 01:06:57,325][0m Trial 25 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28167500361140246, 'num_leaves': 120, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 255, 'lambda_l1': 60, 'lambda_l2': 10, 'min_gain_to_split': 13.32072173328007, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:57,755][0m Trial 26 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2479180138034475, 'num_leaves': 160, 'max_depth': 7, 'min_data_in_leaf': 400, 'max_bin': 225, 'lambda_l1': 45, 'lambda_l2': 25, 'min_gain_to_split': 7.272640026113555, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:58,454][0m Trial 27 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28365851350661675, 'num_leaves': 280, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 278, 'lambda_l1': 20, 'lambda_l2': 40, 'min_gain_to_split': 8.94880547511271, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:58,846][0m Trial 28 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2526492442097243, 'num_leaves': 100, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 241, 'lambda_l1': 85, 'lambda_l2': 0, 'min_gain_to_split': 1.9062718501119518, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:06:59,238][0m Trial 29 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20422904210012122, 'num_leaves': 40, 'max_depth': 12, 'min_data_in_leaf': 400, 'max_bin': 259, 'lambda_l1': 35, 'lambda_l2': 45, 'min_gain_to_split': 3.3832150344729324, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:06:59,530][0m Trial 30 pruned. Trial was pruned at iteration 131.[0m




[32m[I 2023-05-17 01:06:59,930][0m Trial 31 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.18622347117671936, 'num_leaves': 180, 'max_depth': 12, 'min_data_in_leaf': 400, 'max_bin': 213, 'lambda_l1': 0, 'lambda_l2': 55, 'min_gain_to_split': 3.5737021912628055, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:00,317][0m Trial 32 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.16630809894834267, 'num_leaves': 360, 'max_depth': 10, 'min_data_in_leaf': 400, 'max_bin': 272, 'lambda_l1': 10, 'lambda_l2': 50, 'min_gain_to_split': 10.078340934386238, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:00,716][0m Trial 33 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20209069121190723, 'num_leaves': 280, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 202, 'lambda_l1': 90, 'lambda_l2': 35, 'min_gain_to_split': 4.9706384013715645, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:01,135][0m Trial 34 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2624061225794501, 'num_leaves': 320, 'max_depth': 8, 'min_data_in_leaf': 400, 'max_bin': 290, 'lambda_l1': 15, 'lambda_l2': 25, 'min_gain_to_split': 1.2182415066089192, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:01,526][0m Trial 35 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.177844982670738, 'num_leaves': 360, 'max_depth': 7, 'min_data_in_leaf': 400, 'max_bin': 231, 'lambda_l1': 35, 'lambda_l2': 60, 'min_gain_to_split': 2.345948642719211, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:01,925][0m Trial 36 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.23104150140284258, 'num_leaves': 300, 'max_depth': 5, 'min_data_in_leaf': 500, 'max_bin': 263, 'lambda_l1': 100, 'lambda_l2': 10, 'min_gain_to_split': 6.262776338522217, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:02,364][0m Trial 37 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.14734396538662858, 'num_leaves': 240, 'max_depth': 8, 'min_data_in_leaf': 300, 'max_bin': 216, 'lambda_l1': 20, 'lambda_l2': 75, 'min_gain_to_split': 4.24402204204744, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:02,805][0m Trial 38 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28649624412019303, 'num_leaves': 320, 'max_depth': 9, 'min_data_in_leaf': 400, 'max_bin': 222, 'lambda_l1': 0, 'lambda_l2': 50, 'min_gain_to_split': 8.488596962810767, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:03,242][0m Trial 39 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.25819953897829373, 'num_leaves': 160, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 244, 'lambda_l1': 30, 'lambda_l2': 25, 'min_gain_to_split': 9.804596684122206, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:03,653][0m Trial 40 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2162588595530699, 'num_leaves': 380, 'max_depth': 6, 'min_data_in_leaf': 500, 'max_bin': 251, 'lambda_l1': 10, 'lambda_l2': 100, 'min_gain_to_split': 1.0759335316326513, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:04,367][0m Trial 41 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.14384797107799713, 'num_leaves': 140, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 288, 'lambda_l1': 100, 'lambda_l2': 55, 'min_gain_to_split': 2.8918430602048986, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:04,814][0m Trial 42 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2911633181074196, 'num_leaves': 340, 'max_depth': 6, 'min_data_in_leaf': 300, 'max_bin': 295, 'lambda_l1': 95, 'lambda_l2': 70, 'min_gain_to_split': 14.138051734556566, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:05,513][0m Trial 43 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29918405173524776, 'num_leaves': 100, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 275, 'lambda_l1': 75, 'lambda_l2': 40, 'min_gain_to_split': 11.553824373802506, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:06,240][0m Trial 44 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2409527555302843, 'num_leaves': 160, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 283, 'lambda_l1': 45, 'lambda_l2': 85, 'min_gain_to_split': 12.632261717103383, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:06,648][0m Trial 45 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.12826970799429233, 'num_leaves': 40, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 266, 'lambda_l1': 60, 'lambda_l2': 35, 'min_gain_to_split': 0.03354669760368156, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:07,505][0m Trial 46 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1855165576745206, 'num_leaves': 260, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 260, 'lambda_l1': 85, 'lambda_l2': 65, 'min_gain_to_split': 10.764203155021786, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:07,956][0m Trial 47 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2633596904035487, 'num_leaves': 20, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 284, 'lambda_l1': 30, 'lambda_l2': 45, 'min_gain_to_split': 14.771150358531958, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:08,408][0m Trial 48 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.15779021494405332, 'num_leaves': 60, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 238, 'lambda_l1': 40, 'lambda_l2': 10, 'min_gain_to_split': 12.279557080925763, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:08,820][0m Trial 49 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28035908533329595, 'num_leaves': 180, 'max_depth': 7, 'min_data_in_leaf': 400, 'max_bin': 275, 'lambda_l1': 75, 'lambda_l2': 55, 'min_gain_to_split': 5.265232210771593, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 14 with value: 1.4512192310455259.[0m
[32m[I 2023-05-17 01:07:08,886][0m Trial 50 pruned. Trial was pruned at iteration 0.[0m








[32m[I 2023-05-17 01:07:09,367][0m Trial 51 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.25016541318224494, 'num_leaves': 300, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 263, 'lambda_l1': 95, 'lambda_l2': 0, 'min_gain_to_split': 6.7132799782826895, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:09,817][0m Trial 52 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.26665945375259115, 'num_leaves': 340, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 253, 'lambda_l1': 95, 'lambda_l2': 5, 'min_gain_to_split': 7.849306879451223, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:10,284][0m Trial 53 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2375090238573491, 'num_leaves': 380, 'max_depth': 12, 'min_data_in_leaf': 500, 'max_bin': 258, 'lambda_l1': 85, 'lambda_l2': 20, 'min_gain_to_split': 5.563771886010626, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:10,764][0m Trial 54 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29160757024649464, 'num_leaves': 280, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 248, 'lambda_l1': 90, 'lambda_l2': 15, 'min_gain_to_split': 13.178837521680885, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m




















[32m[I 2023-05-17 01:07:12,367][0m Trial 55 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2703555519792459, 'num_leaves': 380, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 235, 'lambda_l1': 70, 'lambda_l2': 20, 'min_gain_to_split': 9.473358318294206, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:12,787][0m Trial 56 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.25538954317381823, 'num_leaves': 140, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 265, 'lambda_l1': 55, 'lambda_l2': 30, 'min_gain_to_split': 10.628059134632254, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:13,265][0m Trial 57 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.22356843818020536, 'num_leaves': 60, 'max_depth': 9, 'min_data_in_leaf': 300, 'max_bin': 271, 'lambda_l1': 80, 'lambda_l2': 10, 'min_gain_to_split': 11.974178572204298, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:13,664][0m Trial 58 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2775791660916343, 'num_leaves': 300, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 245, 'lambda_l1': 40, 'lambda_l2': 5, 'min_gain_to_split': 7.741312831276362, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:14,472][0m Trial 59 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24776686003643955, 'num_leaves': 220, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 255, 'lambda_l1': 65, 'lambda_l2': 50, 'min_gain_to_split': 8.709064168512374, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:14,886][0m Trial 60 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28981572866595096, 'num_leaves': 120, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 294, 'lambda_l1': 60, 'lambda_l2': 25, 'min_gain_to_split': 0.5321683627091214, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:15,377][0m Trial 61 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20922301697191498, 'num_leaves': 340, 'max_depth': 4, 'min_data_in_leaf': 400, 'max_bin': 210, 'lambda_l1': 25, 'lambda_l2': 65, 'min_gain_to_split': 8.284862728423143, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:15,795][0m Trial 62 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.23237605695038593, 'num_leaves': 320, 'max_depth': 4, 'min_data_in_leaf': 400, 'max_bin': 203, 'lambda_l1': 50, 'lambda_l2': 60, 'min_gain_to_split': 10.412023812156136, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:16,274][0m Trial 63 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.18641830999005288, 'num_leaves': 360, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 209, 'lambda_l1': 45, 'lambda_l2': 45, 'min_gain_to_split': 9.208113938555824, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:16,680][0m Trial 64 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1975690768389531, 'num_leaves': 320, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 221, 'lambda_l1': 35, 'lambda_l2': 65, 'min_gain_to_split': 6.897867131053812, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:17,169][0m Trial 65 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2772817345775552, 'num_leaves': 260, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 217, 'lambda_l1': 40, 'lambda_l2': 85, 'min_gain_to_split': 11.085483836419906, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:17,618][0m Trial 66 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.298811570129314, 'num_leaves': 340, 'max_depth': 12, 'min_data_in_leaf': 300, 'max_bin': 231, 'lambda_l1': 30, 'lambda_l2': 40, 'min_gain_to_split': 6.067972885088144, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:18,110][0m Trial 67 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2225352420319387, 'num_leaves': 400, 'max_depth': 10, 'min_data_in_leaf': 500, 'max_bin': 228, 'lambda_l1': 55, 'lambda_l2': 70, 'min_gain_to_split': 9.673670938739704, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:19,109][0m Trial 68 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2425273592274189, 'num_leaves': 240, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 212, 'lambda_l1': 100, 'lambda_l2': 0, 'min_gain_to_split': 9.98316979923783, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:19,517][0m Trial 69 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2577423829925184, 'num_leaves': 180, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 278, 'lambda_l1': 5, 'lambda_l2': 15, 'min_gain_to_split': 8.853181688848066, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:19,941][0m Trial 70 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2678830198516783, 'num_leaves': 300, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 200, 'lambda_l1': 15, 'lambda_l2': 30, 'min_gain_to_split': 7.26464515089719, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:20,807][0m Trial 71 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20810686182194332, 'num_leaves': 280, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 25, 'lambda_l2': 20, 'min_gain_to_split': 10.28098702550125, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m
















[32m[I 2023-05-17 01:07:21,863][0m Trial 72 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.19443147188174578, 'num_leaves': 320, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 241, 'lambda_l1': 40, 'lambda_l2': 10, 'min_gain_to_split': 11.34113161497186, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m
















[32m[I 2023-05-17 01:07:22,873][0m Trial 73 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.16921427174772152, 'num_leaves': 360, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 269, 'lambda_l1': 35, 'lambda_l2': 60, 'min_gain_to_split': 9.223361222057559, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:23,735][0m Trial 74 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.23278808510976173, 'num_leaves': 80, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 258, 'lambda_l1': 30, 'lambda_l2': 55, 'min_gain_to_split': 3.9235166880935552, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:23,802][0m Trial 75 pruned. Trial was pruned at iteration 0.[0m




[32m[I 2023-05-17 01:07:24,308][0m Trial 76 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2829849525289484, 'num_leaves': 340, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 274, 'lambda_l1': 25, 'lambda_l2': 35, 'min_gain_to_split': 0.895240253241524, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:24,825][0m Trial 77 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20843707518793952, 'num_leaves': 260, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 266, 'lambda_l1': 5, 'lambda_l2': 25, 'min_gain_to_split': 1.5692201762433517, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:25,076][0m Trial 78 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 01:07:25,607][0m Trial 79 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.27343468864098897, 'num_leaves': 20, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 252, 'lambda_l1': 50, 'lambda_l2': 50, 'min_gain_to_split': 4.837511345811398, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:26,066][0m Trial 80 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.26147379270268023, 'num_leaves': 120, 'max_depth': 10, 'min_data_in_leaf': 400, 'max_bin': 256, 'lambda_l1': 45, 'lambda_l2': 20, 'min_gain_to_split': 11.056876916214211, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:26,556][0m Trial 81 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.11252719654286367, 'num_leaves': 360, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 291, 'lambda_l1': 10, 'lambda_l2': 90, 'min_gain_to_split': 7.907340755364761, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:27,042][0m Trial 82 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2291300428963149, 'num_leaves': 320, 'max_depth': 7, 'min_data_in_leaf': 500, 'max_bin': 297, 'lambda_l1': 10, 'lambda_l2': 75, 'min_gain_to_split': 9.4792384323756, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:27,522][0m Trial 83 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29305958363554097, 'num_leaves': 340, 'max_depth': 6, 'min_data_in_leaf': 500, 'max_bin': 279, 'lambda_l1': 90, 'lambda_l2': 75, 'min_gain_to_split': 8.557565382999531, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:27,979][0m Trial 84 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.08972157112790671, 'num_leaves': 40, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 214, 'lambda_l1': 0, 'lambda_l2': 70, 'min_gain_to_split': 7.445283171617757, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:29,046][0m Trial 85 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.24983868964455114, 'num_leaves': 360, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 206, 'lambda_l1': 35, 'lambda_l2': 80, 'min_gain_to_split': 2.8641824562817693, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:29,462][0m Trial 86 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.21508664882480766, 'num_leaves': 400, 'max_depth': 3, 'min_data_in_leaf': 500, 'max_bin': 285, 'lambda_l1': 15, 'lambda_l2': 10, 'min_gain_to_split': 9.879703246967278, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:30,201][0m Trial 87 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2825202495074512, 'num_leaves': 380, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 281, 'lambda_l1': 95, 'lambda_l2': 65, 'min_gain_to_split': 8.231756848435099, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:30,710][0m Trial 88 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.23744170241181714, 'num_leaves': 320, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 263, 'lambda_l1': 80, 'lambda_l2': 60, 'min_gain_to_split': 8.87954303544342, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:31,225][0m Trial 89 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.19347051406360563, 'num_leaves': 300, 'max_depth': 7, 'min_data_in_leaf': 400, 'max_bin': 290, 'lambda_l1': 35, 'lambda_l2': 30, 'min_gain_to_split': 0.12743756786930627, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:31,706][0m Trial 90 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.20310569486810823, 'num_leaves': 280, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 300, 'lambda_l1': 30, 'lambda_l2': 15, 'min_gain_to_split': 10.638261404832887, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:32,212][0m Trial 91 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.27252917535729804, 'num_leaves': 140, 'max_depth': 9, 'min_data_in_leaf': 500, 'max_bin': 209, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 14.235101945276117, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:32,688][0m Trial 92 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2926561885299662, 'num_leaves': 380, 'max_depth': 9, 'min_data_in_leaf': 500, 'max_bin': 217, 'lambda_l1': 20, 'lambda_l2': 0, 'min_gain_to_split': 13.233011250458146, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:33,184][0m Trial 93 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2976507212079131, 'num_leaves': 400, 'max_depth': 8, 'min_data_in_leaf': 500, 'max_bin': 244, 'lambda_l1': 70, 'lambda_l2': 10, 'min_gain_to_split': 14.952240264939581, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:33,712][0m Trial 94 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.28194655926164724, 'num_leaves': 80, 'max_depth': 11, 'min_data_in_leaf': 500, 'max_bin': 204, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 14.325161426837806, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m












[32m[I 2023-05-17 01:07:34,250][0m Trial 95 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29020109726769827, 'num_leaves': 340, 'max_depth': 7, 'min_data_in_leaf': 500, 'max_bin': 236, 'lambda_l1': 10, 'lambda_l2': 40, 'min_gain_to_split': 13.90895609419404, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:34,748][0m Trial 96 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2862429580310557, 'num_leaves': 60, 'max_depth': 10, 'min_data_in_leaf': 500, 'max_bin': 224, 'lambda_l1': 0, 'lambda_l2': 50, 'min_gain_to_split': 6.42231802977199, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 14 with value: 1.4512192310455259.[0m








[32m[I 2023-05-17 01:07:35,219][0m Trial 97 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.29940092037037286, 'num_leaves': 220, 'max_depth': 8, 'min_data_in_leaf': 400, 'max_bin': 221, 'lambda_l1': 100, 'lambda_l2': 55, 'min_gain_to_split': 13.601961290238792, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 14 with value: 1.4512192310455259.[0m




















[32m[I 2023-05-17 01:07:36,830][0m Trial 98 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.26969925847372095, 'num_leaves': 100, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 249, 'lambda_l1': 40, 'lambda_l2': 70, 'min_gain_to_split': 12.843926169188096, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 14 with value: 1.4512192310455259.[0m




[32m[I 2023-05-17 01:07:37,235][0m Trial 99 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2547406078020643, 'num_leaves': 380, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 276, 'lambda_l1': 10, 'lambda_l2': 20, 'min_gain_to_split': 14.54279834246002, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 14 with value: 1.4512192310455259.[0m




In [146]:
print(f"\tBest value (multi_logloss): {study.best_value:.5f}")
print(f"\tBest params:")

for key, value in study.best_params.items():
    print(f"\t\t{key}: {value}")

	Best value (multi_logloss): 1.45122
	Best params:
		n_estimators: 200
		learning_rate: 0.23194425799968763
		num_leaves: 100
		max_depth: 7
		min_data_in_leaf: 200
		max_bin: 248
		lambda_l1: 35
		lambda_l2: 20
		min_gain_to_split: 6.695065109567382
		bagging_fraction: 0.9
		bagging_freq: 1
		feature_fraction: 0.6000000000000001


In [147]:
best_booster

In [148]:
from sklearn.preprocessing import LabelEncoder

In [149]:
le = LabelEncoder()
smoothie_merged["category"] = le.fit_transform(smoothie_merged["category"])

In [150]:
class_weight = {
    "HOME": 0.31,
    "OTHER": 0.13,
    "SHOPPING": 0.22,
    "TRAVEL": 0.15,
    "WORK": 0.28
}
encoded_class_weights = {i: class_weight[label] for i, label in enumerate(le.classes_)}
class_weight = encoded_class_weights

In [151]:
pipe_rf = make_pipeline(
    preprocessor,
    RandomForestClassifier(n_jobs=-1, random_state=42)
)
pipe_rf.fit(X_train, y_train);

In [152]:
rf_param_grid = {
    "randomforestclassifier__n_estimators": [25, 50, 100, 150],
    "randomforestclassifier__max_features": ["sqrt", "log2", None],
    "randomforestclassifier__max_depth": [5, 10, 20, 50],
    "randomforestclassifier__max_leaf_nodes": [30, 50, 70],
    "randomforestclassifier__class_weight": ["balanced", class_weight]
}
rf_grid_search = GridSearchCV(
    pipe_rf, rf_param_grid, cv=5, n_jobs=-1, return_train_score=True
)
rf_grid_search.fit(X_train, y_train);

720 fits failed out of a total of 1440.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
720 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/clin404/miniconda3/envs/575/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/clin404/miniconda3/envs/575/lib/python3.10/site-packages/sklearn/pipeline.py", line 405, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/Users/clin404/miniconda3/envs/575/lib/python3.10/site-packages/sklearn/ensemble/_forest.py", line 385, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "/Users/clin404/miniconda3/en

In [157]:
rf_tuned = rf_grid_search.best_estimator_
best_rf = rf_tuned.named_steps["randomforestclassifier"]
best_rf

In [154]:
def evaluate_models(X_train, y_train,
		            X_test, y_test,
		    		rfe_estimator, 
		    		rfe_num_features_start, rfe_num_features_end, rfe_num_features_step, 
				    pipe_model):
	
	for i in range(rfe_num_features_start, rfe_num_features_end+1, rfe_num_features_step):
		rfe = RFE(estimator=rfe_estimator, n_features_to_select=i)
		pipe_rfe_model = make_pipeline(preprocessor, 
			                           rfe, 
				                       pipe_model)
		
		pipe_rfe_model.fit(X_train, y_train)
		
		print('---- RFE with ', i, ' features selected ----')
		print(pipe_rfe_model.score(X_train, y_train))
		print(pipe_rfe_model.score(X_test, y_test))	

In [158]:
evaluate_models(X_train, y_train,
		        X_test, y_test,
		    	best_booster, 
		    	10, 50, 2, 
				rf_tuned)

ValueError: Specifying the columns using strings is only supported for pandas DataFrames