In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.compose import make_column_transformer
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from lightgbm.sklearn import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

from sklearn.feature_selection import RFE

import warnings
warnings.filterwarnings("ignore")

In [2]:
DIR = "../../data/"
SMOOTHIE = "Smoothie King/"

smoothie_demographic = pd.read_csv(DIR + SMOOTHIE + "processed_demographic.csv")
smoothie_stores = pd.read_csv(DIR + SMOOTHIE + "smoothie_king_stores.csv")
smoothie_poi_variables = pd.read_csv(DIR + SMOOTHIE + "processed_poi.csv")
# smoothie_sister = pd.read_csv(DIR + SMOOTHIE + "competition_sister_variables.csv")
smoothie_trade_area = pd.read_csv(DIR + SMOOTHIE + "processed_trade_area.csv").rename(columns={"store_num": "store"})

In [3]:
smoothie_merged = smoothie_stores.merge(
    smoothie_demographic, on="store", how="outer"
).merge(
    smoothie_poi_variables, on="store", how="outer"
).merge(
    smoothie_trade_area, on="store", how="outer"
)
smoothie_merged

Unnamed: 0,store,longitude,latitude,category,cbsa_name,dma_name,state_name,market_size,store_density,age0018_p_10mi,...,popgrfy_ta,popgrpy_ta,poverty_inpoverty_p_ta,spend_breakfastbrunch_ta,spend_dinner_ta,spend_foodbev_ta,spend_lunch_ta,wealth_hhavg_ta,wealth_hhtotal_ta,white_p_ta
0,SK 1504,-97.650392,30.519353,SHOPPING,"Austin-Round Rock, TX","Austin, TX",Texas,Large Metro (2),Light Suburban,0.2805,...,8.3789,3.9235,0.0611,7069439.0,40790484.0,230383651.0,23166216.0,240573.0,25223.0,0.4897
1,SK 0057,-88.171150,30.672501,SHOPPING,"Mobile, AL","Mobile et al, AL-FL",Alabama,Medium City (4),Light Suburban,0.2264,...,0.6017,0.3932,0.1830,4724526.0,25460067.0,160135521.0,14653701.0,217054.0,22216.0,0.5129
2,SK 1415,-90.535722,38.784250,HOME,"St. Louis, MO-IL","St. Louis, MO",Missouri,Very Large Metro (1),Light Suburban,0.2129,...,2.5003,0.7142,0.0639,4501211.0,24794631.0,151609187.0,14129014.0,245860.0,19907.0,0.8459
3,SK 1231,-80.134700,26.100737,TRAVEL,"Miami-Fort Lauderdale et al, FL","Miami-Ft. Lauderdale, FL",Florida,Very Large Metro (1),Suburban,0.2174,...,7.6482,12.7188,0.0831,4038906.0,23214366.0,146417939.0,12859709.0,255812.0,22124.0,0.7020
4,SK 1535,-96.856651,32.996408,WORK,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Light Suburban,0.2620,...,2.1871,-5.7794,0.0800,5919218.0,32751951.0,207616741.0,18696840.0,216763.0,29251.0,0.4593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791,SK 1536,-96.872596,32.647809,HOME,"Dallas-Fort Worth-Arlington, TX","Dallas-Ft. Worth, TX",Texas,Very Large Metro (1),Exurban,0.2664,...,2.0540,-0.2994,0.1833,7614977.0,40037412.0,264008408.0,23332500.0,199437.0,29430.0,0.1490
792,SK 1886,-105.077634,40.564695,OTHER,"Fort Collins, CO","Denver, CO",Colorado,Medium City (4),Light Suburban,0.2224,...,4.7691,2.3442,0.2202,5118356.0,28458285.0,197532602.0,16637181.0,216659.0,25712.0,0.7874
793,SK 0162,-95.478001,30.316531,SHOPPING,"Houston-The Woodlands et al, TX","Houston, TX",Texas,Very Large Metro (1),Exurban,0.2412,...,9.9822,8.1691,0.1202,4677280.0,25445615.0,161088758.0,14627532.0,216855.0,19407.0,0.5041
794,SK 1449,-78.968258,35.064994,SHOPPING,"Fayetteville, NC","Raleigh et al, NC",North Carolina,Medium City (4),Exurban,0.2614,...,-0.3816,-1.0761,0.1809,10318478.0,54577513.0,349542567.0,31683602.0,208448.0,41681.0,0.3933


In [4]:
train_df, test_df = train_test_split(smoothie_merged, test_size=0.1, random_state=42)
X_train = train_df.drop(columns=["category"])
y_train = train_df["category"]
X_test = test_df.drop(columns=["category"])
y_test = test_df["category"]

In [5]:
drop_features = ['store',
 'longitude',
 'latitude',
 'cbsa_name',
 'dma_name',
 'state_name',
 ]

passthrough_features = []

ordinal_features_oth = [
    "market_size",
    "store_density",
]
ordering_ordinal_oth = [
    ["Very Large Metro (1)", "Large Metro (2)", "Large City (3)", "Medium City (4)", "Small City (5)", "Small Town (6)"],
    ["Rural", "Exurban", "Suburban", "Light Suburban", "Light Urban", "Urban", "Super Urban"],
]
numeric_features = list(set(smoothie_merged.select_dtypes(include=np.number).columns.tolist()) - {"longitude", "latitude"})

In [6]:
numeric_transformer = make_pipeline(
    SimpleImputer(strategy="median"), 
    StandardScaler()
)

ordinal_transformer_oth = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OrdinalEncoder(categories=ordering_ordinal_oth),
)

preprocessor = make_column_transformer(
    ("drop", drop_features),
    (numeric_transformer, numeric_features),
    (ordinal_transformer_oth, ordinal_features_oth),
    ("passthrough", passthrough_features),
)

In [7]:
preprocessor.fit(X_train)

In [8]:
column_names = (
    numeric_features
    + preprocessor.named_transformers_['pipeline-2']['ordinalencoder'].get_feature_names_out().tolist()
)
len(column_names)

920

In [9]:
transformed_X_train = preprocessor.transform(X_train)

In [10]:
transformed_X_train_df = pd.DataFrame(transformed_X_train, columns=column_names)
transformed_X_train_df

Unnamed: 0,gq_other_p_3mi,emp_military_10mi,hh_2pers_p_10mi,hu_vacant_5mi,com0205_p_1mi,other_p_ta,genx_p_10mi,com0002_p_10mi,emp_retail_trade_p_5mi,emp_educ_services_p_10mi,...,dtpop_students_p_ta,pop_migration_10mi,com12pl_p_ta,hh_type_male_child_p_5mi,medsalcy_ta,occhu_2mi,percapita_inc_3mi,asian_p_3mi,x0,x1
0,3.531649,-0.260143,0.069611,-0.619462,-0.823084,0.261417,1.121923,-0.775356,-0.180202,0.170414,...,0.589739,1.036752,0.864670,0.747918,0.784573,-0.479087,0.486845,-0.354294,1.0,0.0
1,0.045627,-0.071340,-0.347581,0.276204,0.286615,-0.034546,-0.212734,-0.425355,-0.694044,-0.915408,...,-0.755550,0.433997,-0.303647,-0.924081,0.730122,0.872324,1.025312,-0.095207,0.0,3.0
2,2.314058,-0.260143,-1.344205,-0.616595,-1.608248,-0.004950,0.133288,-1.633164,-1.089913,0.913859,...,0.139076,1.261008,2.269661,-1.324808,0.035313,-0.156323,0.652047,4.134616,0.0,1.0
3,-0.068669,-0.228676,-1.662066,-0.414359,-0.593641,-0.315711,-0.181839,-1.449059,-0.796288,0.030203,...,2.698979,-0.575687,1.114112,0.540646,1.531321,-0.012966,-0.436286,-0.594188,0.0,3.0
4,0.531384,-0.139996,-0.675374,-0.547670,-0.167035,-0.789252,-0.880063,0.503262,-0.961452,0.574744,...,-0.494199,0.202209,0.396340,-0.606263,0.315890,-0.372332,1.044339,0.048729,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,0.377426,-0.260143,-0.291293,1.045726,0.445393,-0.278716,1.702746,0.072335,-1.441213,-1.084965,...,0.802506,0.534032,-1.148648,-0.633899,0.683691,1.258779,2.252722,-0.446413,0.0,2.0
712,-0.665310,0.226167,-0.086009,-0.465605,0.249102,-0.559880,-0.039723,0.199793,1.128000,-0.762153,...,-0.100496,-0.173001,-0.718167,0.167555,-1.458898,-0.360386,0.134349,0.327007,3.0,1.0
713,0.855080,-0.260143,-0.874037,-0.370759,-1.548052,0.305811,1.912831,-1.716112,0.278586,1.491007,...,2.129367,1.656586,2.385033,-0.067354,0.817108,-0.236059,0.121644,-0.310153,0.0,1.0
714,-0.272951,-0.260143,-0.314471,-0.247602,1.863051,-1.551356,-0.138587,-0.018705,-1.328482,0.189978,...,-0.515978,-0.146693,-1.266301,-0.550990,-0.703299,-0.274677,-0.631956,-0.822568,2.0,1.0


In [11]:
import optuna
from optuna.integration import LightGBMPruningCallback

In [12]:
best_booster = None
lgbm = None

In [13]:
def objective(trial, X, y):
    global lgbm
    param_grid = {
        #         "device_type": trial.suggest_categorical("device_type", ['gpu']),
        "n_estimators": trial.suggest_categorical("n_estimators", [200]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 400, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 500, step=100),
        "max_bin": trial.suggest_int("max_bin", 200, 300),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

    cv_scores = np.empty(5)
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        # print(idx)
        # print(train_idx)
        # print(test_idx)
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        # print('X_train shape: ', X_train.shape)
        # print('X_test shape: ', X_test.shape)
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        # print('y_train shape: ', y_train.shape)
        # print('y_test shape: ', y_test.shape)

        lgbm = LGBMClassifier(objective="multiclass", **param_grid)
        lgbm.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="multi_logloss",
            early_stopping_rounds=100,
            callbacks=[
                LightGBMPruningCallback(trial, "multi_logloss")
            ],  # Add a pruning callback
        )
        preds = lgbm.predict_proba(X_test)
        cv_scores[idx] = log_loss(y_test, preds)

    return np.mean(cv_scores)

In [14]:
def callback(study, trial):
    global best_booster
    if study.best_trial == trial:
        best_booster = lgbm

In [15]:
study = optuna.create_study(direction="minimize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, transformed_X_train_df, y_train)
study.optimize(func, n_trials=100, callbacks=[callback])

[32m[I 2023-05-17 11:54:17,549][0m A new study created in memory with name: LGBM Classifier[0m




[32m[I 2023-05-17 11:54:17,955][0m Trial 0 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.0414085392272303, 'num_leaves': 180, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 210, 'lambda_l1': 95, 'lambda_l2': 85, 'min_gain_to_split': 14.849322381913252, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:18,251][0m Trial 1 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.030150295512012225, 'num_leaves': 240, 'max_depth': 5, 'min_data_in_leaf': 500, 'max_bin': 231, 'lambda_l1': 15, 'lambda_l2': 70, 'min_gain_to_split': 5.4202395139428745, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:18,852][0m Trial 2 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.052205669669439014, 'num_leaves': 280, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 220, 'lambda_l1': 25, 'lambda_l2': 95, 'min_gain_to_split': 2.025251889273854, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:19,137][0m Trial 3 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1898749245730347, 'num_leaves': 360, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 225, 'lambda_l1': 50, 'lambda_l2': 90, 'min_gain_to_split': 2.5958792892478497, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:19,465][0m Trial 4 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.15045473146488267, 'num_leaves': 100, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 295, 'lambda_l1': 20, 'lambda_l2': 60, 'min_gain_to_split': 2.7738156028618604, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:19,788][0m Trial 5 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.12716494013732393, 'num_leaves': 180, 'max_depth': 5, 'min_data_in_leaf': 400, 'max_bin': 244, 'lambda_l1': 65, 'lambda_l2': 95, 'min_gain_to_split': 5.806713861567888, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:20,154][0m Trial 6 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.09137604682351338, 'num_leaves': 100, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 242, 'lambda_l1': 70, 'lambda_l2': 65, 'min_gain_to_split': 7.783948388363343, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:20,476][0m Trial 7 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.17801490443516665, 'num_leaves': 260, 'max_depth': 11, 'min_data_in_leaf': 400, 'max_bin': 222, 'lambda_l1': 35, 'lambda_l2': 15, 'min_gain_to_split': 13.41929107879192, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:20,842][0m Trial 8 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.026980199050035184, 'num_leaves': 60, 'max_depth': 9, 'min_data_in_leaf': 400, 'max_bin': 280, 'lambda_l1': 35, 'lambda_l2': 80, 'min_gain_to_split': 5.2024808084574845, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:21,166][0m Trial 9 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.11371394333290695, 'num_leaves': 40, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 264, 'lambda_l1': 55, 'lambda_l2': 20, 'min_gain_to_split': 2.6387872695163357, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:21,743][0m Trial 10 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.2734029254979041, 'num_leaves': 160, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 202, 'lambda_l1': 100, 'lambda_l2': 40, 'min_gain_to_split': 14.60808539862417, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:22,081][0m Trial 11 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.013763256853803592, 'num_leaves': 260, 'max_depth': 5, 'min_data_in_leaf': 500, 'max_bin': 200, 'lambda_l1': 0, 'lambda_l2': 75, 'min_gain_to_split': 11.222885216095191, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:22,464][0m Trial 12 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06223234735106899, 'num_leaves': 360, 'max_depth': 4, 'min_data_in_leaf': 500, 'max_bin': 231, 'lambda_l1': 100, 'lambda_l2': 45, 'min_gain_to_split': 10.086421330601201, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:22,802][0m Trial 13 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07016696584022424, 'num_leaves': 220, 'max_depth': 6, 'min_data_in_leaf': 500, 'max_bin': 259, 'lambda_l1': 80, 'lambda_l2': 75, 'min_gain_to_split': 8.703502430028898, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:23,184][0m Trial 14 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.010316515224074632, 'num_leaves': 140, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 211, 'lambda_l1': 5, 'lambda_l2': 55, 'min_gain_to_split': 12.49879239165553, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:23,966][0m Trial 15 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.045533923559022214, 'num_leaves': 320, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 236, 'lambda_l1': 85, 'lambda_l2': 35, 'min_gain_to_split': 14.58301869734088, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:24,327][0m Trial 16 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.0854059261930733, 'num_leaves': 200, 'max_depth': 4, 'min_data_in_leaf': 500, 'max_bin': 211, 'lambda_l1': 15, 'lambda_l2': 0, 'min_gain_to_split': 9.999789591113341, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:24,759][0m Trial 17 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.041974005631691236, 'num_leaves': 240, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 254, 'lambda_l1': 45, 'lambda_l2': 85, 'min_gain_to_split': 6.382044499720114, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:25,168][0m Trial 18 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.08860887457135687, 'num_leaves': 400, 'max_depth': 6, 'min_data_in_leaf': 300, 'max_bin': 211, 'lambda_l1': 90, 'lambda_l2': 70, 'min_gain_to_split': 0.07203895711795738, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:26,319][0m Trial 19 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.03220027920238086, 'num_leaves': 300, 'max_depth': 12, 'min_data_in_leaf': 200, 'max_bin': 276, 'lambda_l1': 70, 'lambda_l2': 60, 'min_gain_to_split': 12.006622965548136, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:26,701][0m Trial 20 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.056803435574319, 'num_leaves': 120, 'max_depth': 4, 'min_data_in_leaf': 500, 'max_bin': 235, 'lambda_l1': 10, 'lambda_l2': 100, 'min_gain_to_split': 7.783323799356963, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:27,553][0m Trial 21 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.04495615931944835, 'num_leaves': 300, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 219, 'lambda_l1': 25, 'lambda_l2': 100, 'min_gain_to_split': 4.177739105442212, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:28,333][0m Trial 22 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.015040000231696103, 'num_leaves': 200, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 218, 'lambda_l1': 35, 'lambda_l2': 85, 'min_gain_to_split': 4.329052920211943, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:28,728][0m Trial 23 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07154915835052529, 'num_leaves': 260, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 227, 'lambda_l1': 25, 'lambda_l2': 90, 'min_gain_to_split': 6.777608823604574, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:29,351][0m Trial 24 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.039217009360093855, 'num_leaves': 220, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 208, 'lambda_l1': 40, 'lambda_l2': 80, 'min_gain_to_split': 9.343560890948911, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:29,747][0m Trial 25 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.01061410564485665, 'num_leaves': 300, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 242, 'lambda_l1': 25, 'lambda_l2': 70, 'min_gain_to_split': 10.936275053725605, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:30,145][0m Trial 26 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06012033287825998, 'num_leaves': 160, 'max_depth': 6, 'min_data_in_leaf': 400, 'max_bin': 217, 'lambda_l1': 60, 'lambda_l2': 90, 'min_gain_to_split': 8.138734660414155, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:31,004][0m Trial 27 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.10472098065029786, 'num_leaves': 340, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 230, 'lambda_l1': 5, 'lambda_l2': 100, 'min_gain_to_split': 6.5865111548338895, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:31,397][0m Trial 28 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07870805459455645, 'num_leaves': 280, 'max_depth': 3, 'min_data_in_leaf': 300, 'max_bin': 250, 'lambda_l1': 15, 'lambda_l2': 50, 'min_gain_to_split': 8.667802148362526, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 1.4718720941213552.[0m




[32m[I 2023-05-17 11:54:33,487][0m Trial 29 finished with value: 1.4570103740538678 and parameters: {'n_estimators': 200, 'learning_rate': 0.05513242181197879, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 224, 'lambda_l1': 50, 'lambda_l2': 85, 'min_gain_to_split': 1.2630345632871307, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:34,737][0m Trial 30 finished with value: 1.4718320480900926 and parameters: {'n_estimators': 200, 'learning_rate': 0.057176191491686554, 'num_leaves': 180, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 206, 'lambda_l1': 75, 'lambda_l2': 65, 'min_gain_to_split': 1.003034467734408, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:35,874][0m Trial 31 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.032401619897783285, 'num_leaves': 180, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 205, 'lambda_l1': 80, 'lambda_l2': 65, 'min_gain_to_split': 0.7297888111965705, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:36,978][0m Trial 32 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.058533240320331525, 'num_leaves': 220, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 217, 'lambda_l1': 95, 'lambda_l2': 75, 'min_gain_to_split': 1.6447645128810395, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:37,340][0m Trial 33 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.05329718125526253, 'num_leaves': 180, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 225, 'lambda_l1': 75, 'lambda_l2': 85, 'min_gain_to_split': 1.6941200210024108, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:37,567][0m Trial 34 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:37,937][0m Trial 35 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.10010660393528248, 'num_leaves': 140, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 205, 'lambda_l1': 90, 'lambda_l2': 65, 'min_gain_to_split': 1.2414802758877344, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:38,290][0m Trial 36 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1212526389096409, 'num_leaves': 80, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 224, 'lambda_l1': 70, 'lambda_l2': 80, 'min_gain_to_split': 2.8107251643583, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:39,428][0m Trial 37 finished with value: 1.4714409654411962 and parameters: {'n_estimators': 200, 'learning_rate': 0.1346054310039032, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 238, 'lambda_l1': 60, 'lambda_l2': 95, 'min_gain_to_split': 0.027138594322444476, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:40,308][0m Trial 38 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07396438335051342, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 235, 'lambda_l1': 60, 'lambda_l2': 95, 'min_gain_to_split': 0.8158543151444011, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:40,914][0m Trial 39 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.14540796217955015, 'num_leaves': 160, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 248, 'lambda_l1': 60, 'lambda_l2': 90, 'min_gain_to_split': 0.19856949298689808, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:41,827][0m Trial 40 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.09543997941674882, 'num_leaves': 120, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 299, 'lambda_l1': 50, 'lambda_l2': 95, 'min_gain_to_split': 2.3042677892313526, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:42,893][0m Trial 41 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.13583575663283584, 'num_leaves': 200, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 244, 'lambda_l1': 65, 'lambda_l2': 70, 'min_gain_to_split': 1.4892326593168788, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:43,244][0m Trial 42 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1676529400393993, 'num_leaves': 280, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 230, 'lambda_l1': 75, 'lambda_l2': 80, 'min_gain_to_split': 2.0811100053231746, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:43,603][0m Trial 43 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.11000475077063317, 'num_leaves': 260, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 239, 'lambda_l1': 55, 'lambda_l2': 85, 'min_gain_to_split': 0.04443381573039096, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:45,141][0m Trial 44 finished with value: 1.4714605548556976 and parameters: {'n_estimators': 200, 'learning_rate': 0.02097115173215184, 'num_leaves': 180, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 200, 'lambda_l1': 45, 'lambda_l2': 60, 'min_gain_to_split': 5.517908400726741, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:45,388][0m Trial 45 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2023-05-17 11:54:45,570][0m Trial 46 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:46,831][0m Trial 47 finished with value: 1.4715000108693077 and parameters: {'n_estimators': 200, 'learning_rate': 0.04881475419547232, 'num_leaves': 20, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 212, 'lambda_l1': 55, 'lambda_l2': 50, 'min_gain_to_split': 5.366637835804582, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:47,065][0m Trial 48 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:48,181][0m Trial 49 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06588360866699566, 'num_leaves': 80, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 222, 'lambda_l1': 65, 'lambda_l2': 55, 'min_gain_to_split': 2.9906619269521144, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:48,429][0m Trial 50 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:49,601][0m Trial 51 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.04091120468791162, 'num_leaves': 220, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 208, 'lambda_l1': 55, 'lambda_l2': 50, 'min_gain_to_split': 7.191284771784511, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:50,733][0m Trial 52 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06464020547755518, 'num_leaves': 140, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 214, 'lambda_l1': 65, 'lambda_l2': 60, 'min_gain_to_split': 5.034267853471068, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:51,350][0m Trial 53 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.08895121408803447, 'num_leaves': 200, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 209, 'lambda_l1': 40, 'lambda_l2': 75, 'min_gain_to_split': 6.086844843568123, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:51,711][0m Trial 54 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.05031063851224056, 'num_leaves': 240, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 200, 'lambda_l1': 85, 'lambda_l2': 45, 'min_gain_to_split': 5.731633780239444, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:51,964][0m Trial 55 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:52,821][0m Trial 56 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.034480441932193746, 'num_leaves': 260, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 215, 'lambda_l1': 50, 'lambda_l2': 65, 'min_gain_to_split': 5.564304310207572, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:53,942][0m Trial 57 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07539662923439651, 'num_leaves': 120, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 274, 'lambda_l1': 100, 'lambda_l2': 25, 'min_gain_to_split': 4.356964179976041, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:54,301][0m Trial 58 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.0190777339223173, 'num_leaves': 180, 'max_depth': 6, 'min_data_in_leaf': 300, 'max_bin': 210, 'lambda_l1': 75, 'lambda_l2': 50, 'min_gain_to_split': 2.469481999609751, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:55,956][0m Trial 59 finished with value: 1.4701891138423286 and parameters: {'n_estimators': 200, 'learning_rate': 0.04167377028619196, 'num_leaves': 400, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 228, 'lambda_l1': 55, 'lambda_l2': 5, 'min_gain_to_split': 3.7519881314127685, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:56,195][0m Trial 60 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2023-05-17 11:54:56,439][0m Trial 61 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:54:57,059][0m Trial 62 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.029019687583017144, 'num_leaves': 380, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 232, 'lambda_l1': 60, 'lambda_l2': 10, 'min_gain_to_split': 6.131897308807414, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:57,922][0m Trial 63 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.058629461416398757, 'num_leaves': 360, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 238, 'lambda_l1': 50, 'lambda_l2': 25, 'min_gain_to_split': 13.43771475655338, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:59,053][0m Trial 64 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07028087173692922, 'num_leaves': 280, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 224, 'lambda_l1': 70, 'lambda_l2': 15, 'min_gain_to_split': 3.6482850729274228, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:54:59,411][0m Trial 65 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.040481204181831265, 'num_leaves': 320, 'max_depth': 5, 'min_data_in_leaf': 300, 'max_bin': 206, 'lambda_l1': 95, 'lambda_l2': 0, 'min_gain_to_split': 0.6436914496822373, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m
[32m[I 2023-05-17 11:54:59,469][0m Trial 66 pruned. Trial was pruned at iteration 0.[0m




[32m[I 2023-05-17 11:55:00,104][0m Trial 67 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.055010560870966424, 'num_leaves': 380, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 203, 'lambda_l1': 40, 'lambda_l2': 70, 'min_gain_to_split': 1.4137879661248147, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:01,240][0m Trial 68 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.03229747910659733, 'num_leaves': 100, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 217, 'lambda_l1': 80, 'lambda_l2': 35, 'min_gain_to_split': 4.757522080157985, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:01,602][0m Trial 69 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.08642151169938543, 'num_leaves': 220, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 233, 'lambda_l1': 35, 'lambda_l2': 85, 'min_gain_to_split': 0.42492544072222527, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:02,683][0m Trial 70 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.011280718360971557, 'num_leaves': 20, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 226, 'lambda_l1': 60, 'lambda_l2': 80, 'min_gain_to_split': 3.044147304986648, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:03,043][0m Trial 71 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.026875419615751447, 'num_leaves': 200, 'max_depth': 4, 'min_data_in_leaf': 500, 'max_bin': 240, 'lambda_l1': 0, 'lambda_l2': 75, 'min_gain_to_split': 6.688335708291042, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:03,412][0m Trial 72 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.03885790088719646, 'num_leaves': 240, 'max_depth': 12, 'min_data_in_leaf': 400, 'max_bin': 260, 'lambda_l1': 85, 'lambda_l2': 55, 'min_gain_to_split': 3.851374165805942, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:03,774][0m Trial 73 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06168524841458493, 'num_leaves': 160, 'max_depth': 5, 'min_data_in_leaf': 500, 'max_bin': 211, 'lambda_l1': 15, 'lambda_l2': 60, 'min_gain_to_split': 5.49082174361393, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:04,175][0m Trial 74 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.047791342177540067, 'num_leaves': 180, 'max_depth': 3, 'min_data_in_leaf': 400, 'max_bin': 246, 'lambda_l1': 65, 'lambda_l2': 65, 'min_gain_to_split': 4.28881672043029, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:04,446][0m Trial 75 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2023-05-17 11:55:04,828][0m Trial 76 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06671523100601197, 'num_leaves': 300, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 255, 'lambda_l1': 55, 'lambda_l2': 75, 'min_gain_to_split': 0.4227947576933973, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:05,194][0m Trial 77 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.034645885658252086, 'num_leaves': 60, 'max_depth': 4, 'min_data_in_leaf': 400, 'max_bin': 236, 'lambda_l1': 20, 'lambda_l2': 70, 'min_gain_to_split': 1.1409787458701464, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:05,553][0m Trial 78 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07757688159817223, 'num_leaves': 240, 'max_depth': 3, 'min_data_in_leaf': 500, 'max_bin': 212, 'lambda_l1': 95, 'lambda_l2': 80, 'min_gain_to_split': 5.157525706793674, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:06,402][0m Trial 79 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.05561974656264011, 'num_leaves': 260, 'max_depth': 8, 'min_data_in_leaf': 200, 'max_bin': 204, 'lambda_l1': 50, 'lambda_l2': 65, 'min_gain_to_split': 4.653576790247435, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:07,329][0m Trial 80 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.09642406506497608, 'num_leaves': 280, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 223, 'lambda_l1': 70, 'lambda_l2': 85, 'min_gain_to_split': 3.573998930233788e-05, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:08,107][0m Trial 81 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.045454166215239136, 'num_leaves': 200, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 219, 'lambda_l1': 20, 'lambda_l2': 95, 'min_gain_to_split': 3.1405440192387966, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:08,925][0m Trial 82 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.024777850454118396, 'num_leaves': 220, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 216, 'lambda_l1': 10, 'lambda_l2': 95, 'min_gain_to_split': 3.9956410598321948, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:09,787][0m Trial 83 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.037449276895614364, 'num_leaves': 320, 'max_depth': 11, 'min_data_in_leaf': 200, 'max_bin': 233, 'lambda_l1': 30, 'lambda_l2': 60, 'min_gain_to_split': 1.630053230508913, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:10,439][0m Trial 84 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.050262328230340145, 'num_leaves': 260, 'max_depth': 5, 'min_data_in_leaf': 200, 'max_bin': 229, 'lambda_l1': 10, 'lambda_l2': 100, 'min_gain_to_split': 2.629822253568933, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:11,070][0m Trial 85 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.06367604044068251, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 208, 'lambda_l1': 5, 'lambda_l2': 90, 'min_gain_to_split': 5.869962459616556, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:11,927][0m Trial 86 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.07220853455948073, 'num_leaves': 300, 'max_depth': 9, 'min_data_in_leaf': 200, 'max_bin': 221, 'lambda_l1': 60, 'lambda_l2': 100, 'min_gain_to_split': 2.1805558952511612, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:12,335][0m Trial 87 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.0810754861567102, 'num_leaves': 340, 'max_depth': 7, 'min_data_in_leaf': 300, 'max_bin': 201, 'lambda_l1': 90, 'lambda_l2': 85, 'min_gain_to_split': 1.2307680035911792, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:13,505][0m Trial 88 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.026536791831994026, 'num_leaves': 160, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 226, 'lambda_l1': 75, 'lambda_l2': 80, 'min_gain_to_split': 3.5222081907147658, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:14,778][0m Trial 89 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.05486642271774857, 'num_leaves': 280, 'max_depth': 6, 'min_data_in_leaf': 200, 'max_bin': 207, 'lambda_l1': 50, 'lambda_l2': 95, 'min_gain_to_split': 4.574487027833643, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:15,391][0m Trial 90 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.044681233088065084, 'num_leaves': 200, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 213, 'lambda_l1': 65, 'lambda_l2': 70, 'min_gain_to_split': 0.7739078621763833, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:15,761][0m Trial 91 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.1873206621449973, 'num_leaves': 400, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 223, 'lambda_l1': 45, 'lambda_l2': 90, 'min_gain_to_split': 1.874802852948838, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:16,124][0m Trial 92 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.016660413806372974, 'num_leaves': 380, 'max_depth': 8, 'min_data_in_leaf': 300, 'max_bin': 217, 'lambda_l1': 55, 'lambda_l2': 95, 'min_gain_to_split': 2.73559177677698, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:16,489][0m Trial 93 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.02973480959360299, 'num_leaves': 180, 'max_depth': 10, 'min_data_in_leaf': 300, 'max_bin': 231, 'lambda_l1': 40, 'lambda_l2': 85, 'min_gain_to_split': 6.417670159985854, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:16,863][0m Trial 94 finished with value: 1.4718720941213552 and parameters: {'n_estimators': 200, 'learning_rate': 0.03794178944366624, 'num_leaves': 360, 'max_depth': 11, 'min_data_in_leaf': 400, 'max_bin': 219, 'lambda_l1': 50, 'lambda_l2': 55, 'min_gain_to_split': 3.2733005702245865, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 29 with value: 1.4570103740538678.[0m




[32m[I 2023-05-17 11:55:19,046][0m Trial 95 finished with value: 1.4525581067058875 and parameters: {'n_estimators': 200, 'learning_rate': 0.05808046623123695, 'num_leaves': 380, 'max_depth': 3, 'min_data_in_leaf': 200, 'max_bin': 237, 'lambda_l1': 35, 'lambda_l2': 90, 'min_gain_to_split': 2.3189034978614576, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 95 with value: 1.4525581067058875.[0m




[32m[I 2023-05-17 11:55:19,288][0m Trial 96 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2023-05-17 11:55:19,350][0m Trial 97 pruned. Trial was pruned at iteration 0.[0m




[32m[I 2023-05-17 11:55:21,048][0m Trial 98 finished with value: 1.4324071573871202 and parameters: {'n_estimators': 200, 'learning_rate': 0.054649202445685975, 'num_leaves': 220, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 244, 'lambda_l1': 20, 'lambda_l2': 50, 'min_gain_to_split': 2.230443979234765, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 98 with value: 1.4324071573871202.[0m




[32m[I 2023-05-17 11:55:22,742][0m Trial 99 finished with value: 1.430884803889265 and parameters: {'n_estimators': 200, 'learning_rate': 0.0542703507698215, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 200, 'max_bin': 241, 'lambda_l1': 20, 'lambda_l2': 45, 'min_gain_to_split': 2.1627231830949416, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 99 with value: 1.430884803889265.[0m


In [16]:
print(f"\tBest value (multi_logloss): {study.best_value:.5f}")
print(f"\tBest params:")

for key, value in study.best_params.items():
    print(f"\t\t{key}: {value}")

	Best value (multi_logloss): 1.43088
	Best params:
		n_estimators: 200
		learning_rate: 0.0542703507698215
		num_leaves: 240
		max_depth: 4
		min_data_in_leaf: 200
		max_bin: 241
		lambda_l1: 20
		lambda_l2: 45
		min_gain_to_split: 2.1627231830949416
		bagging_fraction: 0.7
		bagging_freq: 1
		feature_fraction: 0.4


In [17]:
best_booster

In [18]:
from sklearn.preprocessing import LabelEncoder

In [19]:
le = LabelEncoder()
smoothie_merged["category"] = le.fit_transform(smoothie_merged["category"])

In [20]:
class_weight = {
    "HOME": 0.31,
    "OTHER": 0.13,
    "SHOPPING": 0.22,
    "TRAVEL": 0.15,
    "WORK": 0.28
}
encoded_class_weights = {i: class_weight[label] for i, label in enumerate(le.classes_)}
class_weight = encoded_class_weights

In [21]:
pipe_rf = make_pipeline(
    preprocessor,
    RandomForestClassifier(n_jobs=-1, random_state=42)
)
pipe_rf.fit(X_train, y_train);

In [22]:
rf_param_grid = {
    "randomforestclassifier__n_estimators": [25, 50, 100, 150],
    "randomforestclassifier__max_features": ["sqrt", "log2", None],
    "randomforestclassifier__max_depth": [5, 10, 20, 50],
    "randomforestclassifier__max_leaf_nodes": [30, 50, 70],
    "randomforestclassifier__class_weight": ["balanced", [class_weight]]
}
rf_grid_search = GridSearchCV(
    pipe_rf, rf_param_grid, cv=5, n_jobs=-1, return_train_score=True
)
rf_grid_search.fit(X_train, y_train);

In [23]:
rf_tuned = rf_grid_search.best_estimator_
best_rf = rf_tuned.named_steps["randomforestclassifier"]
type(best_rf)
best_rf

In [24]:
def evaluate_models(X_train, y_train,
		            X_test, y_test,
		    		rfe_estimator, 
		    		rfe_num_features_start, rfe_num_features_end, rfe_num_features_step, 
				    pipe_model):
	
	for i in range(rfe_num_features_start, rfe_num_features_end+1, rfe_num_features_step):
		print('---- RFE with ', i, ' features selected ----')
		
		rfe = RFE(estimator=rfe_estimator, n_features_to_select=i)
		pipe_rfe_model = make_pipeline(preprocessor, 
			                           rfe, 
				                       pipe_model)
		
		# print(pipe_rfe_model)
		
		pipe_rfe_model.fit(X_train, pd.DataFrame(y_train))
		
		print(pipe_rfe_model.score(X_train, pd.DataFrame(y_train)))
		print(pipe_rfe_model.score(X_test, pd.DataFrame(y_test)))	

In [25]:
evaluate_models(X_train, y_train,
		        X_test, y_test,
		    	best_booster, 
		    	8, 25, 1, 
				rf_tuned)

---- RFE with  8  features selected ----


ValueError: Specifying the columns using strings is only supported for pandas DataFrames