In [203]:
import pandas as pd
from glob import glob

MAIN_SOURCE = True

LIVE_PRED_START = pd.Timestamp.today() + pd.to_timedelta(1, "d")
LIVE_PRED_START = LIVE_PRED_START.strftime("%Y-%m-%d")

LIVE_PRED_END = pd.to_datetime(LIVE_PRED_START) + pd.to_timedelta(1, "d")
LIVE_PRED_END = LIVE_PRED_END.date().strftime("%Y-%m-%d")

START_HOUR = 9
END_HOUR = 18

print("Pred Date Start: ", LIVE_PRED_START, "Pred Date End: ", LIVE_PRED_END)

stock_price_files = glob("../Dataset/*")

stock_price_list = [pd.read_csv(v) for v in stock_price_files]
stock_price_df = pd.concat(stock_price_list, ignore_index=True)
stock_price_df["timestamp"] = pd.to_datetime(
    stock_price_df.timestamp).dt.tz_localize(None)
stock_price_df.sort_values(
    by=["short_name", "timestamp"], ignore_index=True, inplace=True
)
stock_price_df.drop_duplicates(ignore_index=True, inplace=True)
stock_price_df["date"] = stock_price_df.timestamp.dt.date
stock_price_df["hour"] = stock_price_df.timestamp.dt.hour

stock_names = list(stock_price_df.short_name.unique())

processed_data = pd.read_csv("./stock_price_processed_data_live.csv.gz")
processed_data.head()

Pred Date Start:  2024-01-02 Pred Date End:  2024-01-03


Unnamed: 0,timestamp,date,hour,AKBNK,ARCLK,ASELS,BIMAS,DOHOL,EKGYO,EREGL,...,LAG_5DAY_YKBNK,year,month,day,dow,quarter,doy,woy,is_monday_morning,is_friday_noon
0,2018-01-02 09:00:00,2018-01-02,9,0.016222,0.004632,0.001813,0.010882,0.011668,0.01063,0.002966,...,,2018,1,2,1,1,2,1,0,0
1,2018-01-02 10:00:00,2018-01-02,10,0.004986,-0.006459,-0.00181,-0.003807,0.0,-0.003463,-0.003937,...,,2018,1,2,1,1,2,1,0,0
2,2018-01-02 11:00:00,2018-01-02,11,-0.001973,-0.00093,0.00364,-0.005713,0.0,0.003475,0.0,...,,2018,1,2,1,1,2,1,0,0
3,2018-01-02 12:00:00,2018-01-02,12,0.002966,0.0,0.000602,0.00447,0.011111,0.0,0.001968,...,,2018,1,2,1,1,2,1,0,0
4,2018-01-02 13:00:00,2018-01-02,13,0.00497,0.005582,-0.001204,0.004456,-0.010989,0.003463,0.003945,...,,2018,1,2,1,1,2,1,0,0


In [204]:
TEST_START = LIVE_PRED_START

drop_cols = ["timestamp", "date"] + stock_names

train_data = processed_data[processed_data.date < TEST_START].reset_index(drop=True)
test_data = processed_data[processed_data.date >= TEST_START].reset_index(drop=True)

X_train = train_data.drop(columns=drop_cols)
y_train = train_data[stock_names].copy()
X_test = test_data.drop(columns=drop_cols)

X_test.tail()

Unnamed: 0,hour,AKBNK_CLOSE,AKBNK_DIVIDENDS,AKBNK_HIGH,AKBNK_LOW,AKBNK_OPEN,AKBNK_STOCKSPLITS,AKBNK_VOLUME,ARCLK_CLOSE,ARCLK_DIVIDENDS,...,LAG_5DAY_YKBNK,year,month,day,dow,quarter,doy,woy,is_monday_morning,is_friday_noon
5,14,,,,,,,,,,...,19.87,2024,1,2,1,1,2,1,0,0
6,15,,,,,,,,,,...,19.89,2024,1,2,1,1,2,1,0,0
7,16,,,,,,,,,,...,19.84,2024,1,2,1,1,2,1,0,0
8,17,,,,,,,,,,...,20.0,2024,1,2,1,1,2,1,0,0
9,18,,,,,,,,,,...,20.06,2024,1,2,1,1,2,1,0,0


In [17]:
import catboost as cb
READ_MODEL_FIT = True

if READ_MODEL_FIT:
    cbregr = cb.CatBoostRegressor()
    cbregr.load_model("./catboost_fit")
else:
    cbregr = cb.CatBoostRegressor(objective="MultiRMSE", random_seed=6969)
    cbregr.fit(X_train, y_train)
    cbregr.save_model("./catboost_fit")
     
cb_preds = cbregr.predict(X_test)
cb_preds

0:	learn: 0.0483117	total: 2.81s	remaining: 46m 51s
1:	learn: 0.0482845	total: 5.34s	remaining: 44m 24s
2:	learn: 0.0482628	total: 7.89s	remaining: 43m 41s
3:	learn: 0.0482434	total: 10.4s	remaining: 43m 12s
4:	learn: 0.0482193	total: 12.9s	remaining: 42m 56s
5:	learn: 0.0481986	total: 15.5s	remaining: 42m 43s
6:	learn: 0.0481806	total: 18.1s	remaining: 42m 42s
7:	learn: 0.0481529	total: 20.6s	remaining: 42m 33s
8:	learn: 0.0481306	total: 23.1s	remaining: 42m 27s
9:	learn: 0.0481140	total: 25.7s	remaining: 42m 27s
10:	learn: 0.0481065	total: 28.4s	remaining: 42m 33s
11:	learn: 0.0480982	total: 30.9s	remaining: 42m 27s
12:	learn: 0.0480820	total: 33.5s	remaining: 42m 20s
13:	learn: 0.0480628	total: 36.1s	remaining: 42m 22s
14:	learn: 0.0480477	total: 38.7s	remaining: 42m 18s
15:	learn: 0.0480337	total: 41.3s	remaining: 42m 17s
16:	learn: 0.0480232	total: 44.2s	remaining: 42m 35s
17:	learn: 0.0480046	total: 47.2s	remaining: 42m 52s
18:	learn: 0.0479917	total: 50.1s	remaining: 43m 7s
19:	

array([[ 1.94571449e-03, -1.18783167e-03,  2.88493443e-03, ...,
         7.45234466e-04,  1.12227752e-03,  3.02184092e-03],
       [ 1.13450686e-03, -6.94243227e-04,  1.18361892e-03, ...,
        -1.52150010e-03,  7.89900212e-04,  1.54923355e-03],
       [ 6.77917976e-04, -1.02518363e-03,  3.47151907e-04, ...,
        -1.74631356e-03, -1.78199744e-04,  9.49722648e-04],
       ...,
       [ 6.95107142e-05,  3.49211003e-04,  7.73537655e-04, ...,
        -6.16256684e-04, -3.66806028e-05,  9.39091114e-05],
       [ 5.66340494e-04,  8.26692117e-04,  1.21008715e-03, ...,
        -3.69361212e-04,  2.67828861e-04,  4.57134587e-04],
       [ 5.33546937e-03,  4.88980719e-03,  3.79075696e-03, ...,
         5.68885487e-03,  4.37082419e-03,  4.18664322e-03]])

In [51]:
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor

lgbm_model = lgb.LGBMRegressor(random_state=6969, objective="regression_l1", importance_type="gain")
model = MultiOutputRegressor(lgbm_model, n_jobs=-1)
model.fit(X_train, y_train)
lgb_preds = model.predict(X_test)

In [136]:
import optuna

VALID_START = "2023-01-01"

tuning_train = processed_data[processed_data.date < VALID_START].reset_index(drop=True)
tuning_valid = processed_data[(processed_data.date >= VALID_START) & (processed_data.date < TEST_START)].reset_index(drop=True)

def pred_to_result_dataframe(preds, close_hour = 18):

    result = test_data[["date", "hour"]].copy()
    pred_df = pd.DataFrame(preds, columns=stock_names)
    result = pd.concat([result, pred_df], axis=1)
    result = result.melt(id_vars=["date", "hour"], value_name="prediction_return", var_name="short_name")
    result["date"] = pd.to_datetime(result.date)

    stock_price_df["date"] = pd.to_datetime(stock_price_df.date)
    yesterday_closes = stock_price_df[stock_price_df["hour"] == close_hour].reset_index(drop=True)
    result = result.merge(yesterday_closes[["date", "hour", "short_name", "price"]], how="left")
    result["price"] = result["price"].bfill()
    result.rename(columns={"price": "yesterday_close"}, inplace=True)
    result["yesterday_close"] = result["yesterday_close"].shift(10)
    result["prediction_price"] = result.yesterday_close * (1 + result.prediction_return)

    result = result.merge(stock_price_df[["date", "hour", "short_name", "price"]], how="left").dropna(axis=0)
    
    return result

def objective(trial):

    x_tuning_train = tuning_train.drop(columns=drop_cols)
    y_tuning_train = tuning_train[stock_names].copy()
    x_tuning_valid = tuning_valid.drop(columns=drop_cols)

    param = {
        "objective": trial.suggest_categorical("objective", ["regression", "regression_l1", "mape"]),
        "metric": "mae",
        "verbosity": -1,
        "boosting_type": trial.suggest_categorical("boosting_type", ["gbdt", "dart", "rf"]),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": 1,
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_depth": trial.suggest_int("max_depth", 5, 8),
        "feature_pre_filter": False,
        "num_iterations": trial.suggest_int("num_iterations", 100, 750),
        "boost_from_average": trial.suggest_categorical("boost_from_average", [True, False]),
        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.15),
    }

    model = MultiOutputRegressor(lgb.LGBMRegressor(**param), n_jobs=-1)
    model.fit(x_tuning_train, y_tuning_train)
    preds = model.predict(x_tuning_valid)

    tuning_result = pred_to_result_dataframe(preds)

    wmape = tuning_result.groupby("short_name").apply(lambda x: pd.Series({
        "wmape": abs(x["prediction_price"] - x["price"]).sum() / x["price"].sum()
        })).wmape.mean()
    
    return wmape

study = optuna.create_study(direction="minimize") # timeout=5*3600
study.optimize(objective, timeout=3*3600)
study.trials_dataframe().to_csv("./study_dataframe_optuna_lgbm.csv", index=False)

[32m[I 2023-12-31 20:39:29,019][0m A new study created in memory with name: no-name-56f1a27b-caca-4ad2-88f2-10bf355090d2[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 20:43:47,431][0m Trial 0 finished with value: 0.016091326353850507 and parameters: {'objective': 'mape', 'boosting_type': 'dart', 'lambda_l1': 8.116760233134114, 'lambda_l2': 2.1596402188879506e-08, 'num_leaves': 222, 'feature_fraction': 0.9235832706586538, 'bagging_fraction': 0.7661505048284494, 'min_child_samples': 63, 'max_depth': 8, 'num_iterations': 239, 'boost_from_average': True, 'learning_rate': 0.017451165783280074}. Best is trial 0 with value: 0.016091326353850507.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 20:46:37,696][0m Trial 1 finished with value: 0.016830721250473118 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 0.0004027326420035557, 'lambda_l2': 0.4287647758582228, 'num_leaves': 52, 'feature_fraction': 0.8109262163052546, 'bagging_fraction': 0.7933144813085115, 'min_child_samples': 44, 'max_depth': 5, 'num_iterations': 474, 'boost_from_average': True, 'learning_rate': 0.07927335733064181}. Best is trial 0 with value: 0.016091326353850507.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 20:55:18,758][0m Trial 2 finished with value: 0.01635095099469314 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 2.6565238898279947e-05, 'lambda_l2': 0.0009131313907297755, 'num_leaves': 33, 'feature_fraction': 0.7098008921689998, 'bagging_fraction': 0.9906358184525541, 'min_child_samples': 58, 'max_depth': 7, 'num_iterations': 734, 'boost_from_average': True, 'learning_rate': 0.10322765155074966}. Best is trial 0 with value: 0.016091326353850507.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 20:57:40,409][0m Trial 3 finished with value: 0.01610891282964772 and parameters: {'objective': 'mape', 'boosting_type': 'gbdt', 'lambda_l1': 1.5427236064447507e-08, 'lambda_l2': 1.875270069649761, 'num_leaves': 23, 'feature_fraction': 0.43252474740015795, 'bagging_fraction': 0.9789933671799598, 'min_child_samples': 14, 'max_depth': 8, 'num_iterations': 293, 'boost_from_average': False, 'learning_rate': 0.022219822810082425}. Best is trial 0 with value: 0.016091326353850507.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:02:11,921][0m Trial 4 finished with value: 0.018184530655980573 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 5.652835617546432e-06, 'lambda_l2': 1.5489618206917007e-07, 'num_leaves': 199, 'feature_fraction': 0.640446379996461, 'bagging_fraction': 0.7700336390492442, 'min_child_samples': 19, 'max_depth': 6, 'num_iterations': 717, 'boost_from_average': True, 'learning_rate': 0.10520096251886198}. Best is trial 0 with value: 0.016091326353850507.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:02:52,341][0m Trial 5 finished with value: 0.01607770956897154 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.4065623028418623, 'lambda_l2': 0.10784644109043155, 'num_leaves': 148, 'feature_fraction': 0.977888157809208, 'bagging_fraction': 0.6230356566731139, 'min_child_samples': 15, 'max_depth': 7, 'num_iterations': 190, 'boost_from_average': True, 'learning_rate': 0.05076044961651507}. Best is trial 5 with value: 0.01607770956897154.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:08:06,298][0m Trial 6 finished with value: 0.016292543933134555 and parameters: {'objective': 'regression_l1', 'boosting_type': 'dart', 'lambda_l1': 4.698605814119956e-05, 'lambda_l2': 0.015324106175279463, 'num_leaves': 193, 'feature_fraction': 0.7841372527866477, 'bagging_fraction': 0.5742326212369602, 'min_child_samples': 9, 'max_depth': 6, 'num_iterations': 420, 'boost_from_average': True, 'learning_rate': 0.13266012342402617}. Best is trial 5 with value: 0.01607770956897154.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:10:54,278][0m Trial 7 finished with value: 0.016144018175047962 and parameters: {'objective': 'mape', 'boosting_type': 'gbdt', 'lambda_l1': 0.03398133214713593, 'lambda_l2': 0.0005963336866860556, 'num_leaves': 101, 'feature_fraction': 0.7831585226503079, 'bagging_fraction': 0.5144403062859584, 'min_child_samples': 59, 'max_depth': 7, 'num_iterations': 250, 'boost_from_average': False, 'learning_rate': 0.08488867057268393}. Best is trial 5 with value: 0.01607770956897154.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:16:37,156][0m Trial 8 finished with value: 0.017109982943955368 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 2.1359133735995914e-05, 'lambda_l2': 0.02841612995108218, 'num_leaves': 227, 'feature_fraction': 0.7353469798236931, 'bagging_fraction': 0.6479827423450937, 'min_child_samples': 59, 'max_depth': 8, 'num_iterations': 682, 'boost_from_average': True, 'learning_rate': 0.0813786552027005}. Best is trial 5 with value: 0.01607770956897154.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:21:43,637][0m Trial 9 finished with value: 0.016079152367606864 and parameters: {'objective': 'regression_l1', 'boosting_type': 'gbdt', 'lambda_l1': 3.7836218260151544e-05, 'lambda_l2': 0.0011538144265954068, 'num_leaves': 42, 'feature_fraction': 0.635640715497272, 'bagging_fraction': 0.5274326584198947, 'min_child_samples': 64, 'max_depth': 8, 'num_iterations': 521, 'boost_from_average': False, 'learning_rate': 0.1307238988888538}. Best is trial 5 with value: 0.01607770956897154.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:22:13,553][0m Trial 10 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 8.816979942542755, 'lambda_l2': 3.281542231961584e-06, 'num_leaves': 138, 'feature_fraction': 0.9948710619824198, 'bagging_fraction': 0.43847967094014917, 'min_child_samples': 92, 'max_depth': 5, 'num_iterations': 114, 'boost_from_average': True, 'learning_rate': 0.04548612752166902}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:22:45,502][0m Trial 11 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 8.892975695439382, 'lambda_l2': 3.1436316133735467e-06, 'num_leaves': 146, 'feature_fraction': 0.9950883454433153, 'bagging_fraction': 0.45643394002855225, 'min_child_samples': 100, 'max_depth': 5, 'num_iterations': 120, 'boost_from_average': True, 'learning_rate': 0.04539136877337642}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:23:48,698][0m Trial 12 finished with value: 0.016234324972126405 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.15801031435140075, 'lambda_l2': 1.7660890431029762e-06, 'num_leaves': 131, 'feature_fraction': 0.9999529960285118, 'bagging_fraction': 0.40928604329600665, 'min_child_samples': 100, 'max_depth': 5, 'num_iterations': 119, 'boost_from_average': True, 'learning_rate': 0.04349369423496817}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:24:40,076][0m Trial 13 finished with value: 0.016176464662102454 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.36944424271376497, 'lambda_l2': 7.912311973411316e-06, 'num_leaves': 92, 'feature_fraction': 0.9092682698881823, 'bagging_fraction': 0.4039923582354183, 'min_child_samples': 97, 'max_depth': 5, 'num_iterations': 105, 'boost_from_average': True, 'learning_rate': 0.04698311689722487}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:26:31,083][0m Trial 14 finished with value: 0.01625017550037489 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.005850200080861768, 'lambda_l2': 1.781325106863955e-05, 'num_leaves': 163, 'feature_fraction': 0.46898942785234304, 'bagging_fraction': 0.4524334908180157, 'min_child_samples': 84, 'max_depth': 6, 'num_iterations': 374, 'boost_from_average': True, 'learning_rate': 0.03267233470912242}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:27:33,312][0m Trial 15 finished with value: 0.016148354415679828 and parameters: {'objective': 'regression_l1', 'boosting_type': 'rf', 'lambda_l1': 8.380288705100494, 'lambda_l2': 2.787134743310249e-07, 'num_leaves': 97, 'feature_fraction': 0.8944475367796985, 'bagging_fraction': 0.47180629433112353, 'min_child_samples': 83, 'max_depth': 5, 'num_iterations': 102, 'boost_from_average': True, 'learning_rate': 0.00754673897979094}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:29:06,642][0m Trial 16 finished with value: 0.01616441231835364 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.7195380021808762, 'lambda_l2': 4.469309465879468e-05, 'num_leaves': 173, 'feature_fraction': 0.9929101357901622, 'bagging_fraction': 0.8727164458945099, 'min_child_samples': 88, 'max_depth': 5, 'num_iterations': 170, 'boost_from_average': False, 'learning_rate': 0.058803193118811325}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:34:02,837][0m Trial 17 finished with value: 0.016294169353900358 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.004814967468032291, 'lambda_l2': 3.9200225590073455e-08, 'num_leaves': 254, 'feature_fraction': 0.8584047248596287, 'bagging_fraction': 0.4110219404911255, 'min_child_samples': 75, 'max_depth': 6, 'num_iterations': 608, 'boost_from_average': True, 'learning_rate': 0.06283284383665648}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:35:45,746][0m Trial 18 finished with value: 0.016297639508891723 and parameters: {'objective': 'regression', 'boosting_type': 'dart', 'lambda_l1': 4.3507619758008225e-08, 'lambda_l2': 2.214833796820516e-06, 'num_leaves': 127, 'feature_fraction': 0.5198440580930446, 'bagging_fraction': 0.5395244611192753, 'min_child_samples': 37, 'max_depth': 5, 'num_iterations': 338, 'boost_from_average': True, 'learning_rate': 0.0305209692149035}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:37:49,306][0m Trial 19 finished with value: 0.016339606031040045 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.0468530585634716e-07, 'lambda_l2': 5.585599709173301e-07, 'num_leaves': 76, 'feature_fraction': 0.9555801256746177, 'bagging_fraction': 0.67024546943277, 'min_child_samples': 100, 'max_depth': 6, 'num_iterations': 176, 'boost_from_average': True, 'learning_rate': 0.007193656550823602}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:40:13,710][0m Trial 20 finished with value: 0.016158417750288905 and parameters: {'objective': 'regression_l1', 'boosting_type': 'rf', 'lambda_l1': 1.6763244573311569, 'lambda_l2': 9.373394815470964e-05, 'num_leaves': 126, 'feature_fraction': 0.8493134400900342, 'bagging_fraction': 0.5932494824988022, 'min_child_samples': 74, 'max_depth': 5, 'num_iterations': 248, 'boost_from_average': False, 'learning_rate': 0.06667210309742833}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:40:51,050][0m Trial 21 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 6.958074838463137, 'lambda_l2': 7.860601725539502, 'num_leaves': 154, 'feature_fraction': 0.9916411474866202, 'bagging_fraction': 0.6102675000995725, 'min_child_samples': 26, 'max_depth': 7, 'num_iterations': 168, 'boost_from_average': True, 'learning_rate': 0.046846095994660164}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:41:21,246][0m Trial 22 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.928759173704425, 'lambda_l2': 5.730250803440724e-06, 'num_leaves': 156, 'feature_fraction': 0.9956496568754386, 'bagging_fraction': 0.47047091037062094, 'min_child_samples': 27, 'max_depth': 7, 'num_iterations': 102, 'boost_from_average': True, 'learning_rate': 0.0377705194481454}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:43:55,586][0m Trial 23 finished with value: 0.016301004586019397 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.06992377892022, 'lambda_l2': 0.00022704330996375832, 'num_leaves': 180, 'feature_fraction': 0.9424743581710003, 'bagging_fraction': 0.7115905273907902, 'min_child_samples': 42, 'max_depth': 7, 'num_iterations': 153, 'boost_from_average': True, 'learning_rate': 0.055214876042638084}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:44:55,568][0m Trial 24 finished with value: 0.016136527026852924 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.0234070983753556, 'lambda_l2': 5.475365574453536, 'num_leaves': 155, 'feature_fraction': 0.9963300485420081, 'bagging_fraction': 0.4930052996600365, 'min_child_samples': 26, 'max_depth': 7, 'num_iterations': 214, 'boost_from_average': True, 'learning_rate': 0.03535788834724573}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:49:51,538][0m Trial 25 finished with value: 0.01608592173482805 and parameters: {'objective': 'mape', 'boosting_type': 'rf', 'lambda_l1': 9.979392362020029, 'lambda_l2': 2.2071947461640315e-05, 'num_leaves': 112, 'feature_fraction': 0.8699123409032417, 'bagging_fraction': 0.5672760648580193, 'min_child_samples': 24, 'max_depth': 7, 'num_iterations': 304, 'boost_from_average': True, 'learning_rate': 0.021369889494444134}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:52:28,122][0m Trial 26 finished with value: 0.016380468858941358 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.010105268966161554, 'lambda_l2': 9.341694939266175e-08, 'num_leaves': 208, 'feature_fraction': 0.9333427497658033, 'bagging_fraction': 0.7086853147365711, 'min_child_samples': 34, 'max_depth': 7, 'num_iterations': 147, 'boost_from_average': True, 'learning_rate': 0.06879010243535844}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:53:43,566][0m Trial 27 finished with value: 0.016250612561767356 and parameters: {'objective': 'regression', 'boosting_type': 'dart', 'lambda_l1': 0.001026934106772135, 'lambda_l2': 0.0029486510768916693, 'num_leaves': 72, 'feature_fraction': 0.8236693550551071, 'bagging_fraction': 0.44077252397942257, 'min_child_samples': 31, 'max_depth': 7, 'num_iterations': 103, 'boost_from_average': True, 'learning_rate': 0.03482468330784878}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 21:54:34,652][0m Trial 28 finished with value: 0.01609358372941637 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 2.733176224136925, 'lambda_l2': 8.634491912132643e-07, 'num_leaves': 2, 'feature_fraction': 0.9654879131522343, 'bagging_fraction': 0.6091435430324098, 'min_child_samples': 50, 'max_depth': 6, 'num_iterations': 296, 'boost_from_average': True, 'learning_rate': 0.09069547767980814}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:00:02,240][0m Trial 29 finished with value: 0.0160742923887184 and parameters: {'objective': 'mape', 'boosting_type': 'dart', 'lambda_l1': 0.46955243944538777, 'lambda_l2': 7.117766683721133e-06, 'num_leaves': 230, 'feature_fraction': 0.9126903371307336, 'bagging_fraction': 0.4926399281543174, 'min_child_samples': 23, 'max_depth': 8, 'num_iterations': 214, 'boost_from_average': True, 'learning_rate': 0.009752397649324632}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:02:43,083][0m Trial 30 finished with value: 0.01624627584191953 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.043520850689497334, 'lambda_l2': 3.536277092609508e-08, 'num_leaves': 180, 'feature_fraction': 0.5472410547663767, 'bagging_fraction': 0.5542623155653806, 'min_child_samples': 7, 'max_depth': 7, 'num_iterations': 246, 'boost_from_average': True, 'learning_rate': 0.02567999294949044}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:03:13,303][0m Trial 31 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.804584264350748, 'lambda_l2': 1.2521643005240283e-08, 'num_leaves': 144, 'feature_fraction': 0.9893315875245017, 'bagging_fraction': 0.4405751797843141, 'min_child_samples': 93, 'max_depth': 6, 'num_iterations': 123, 'boost_from_average': True, 'learning_rate': 0.04620984936384507}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:03:44,785][0m Trial 32 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.118098317597836, 'lambda_l2': 1.4100596196473399e-08, 'num_leaves': 147, 'feature_fraction': 0.9491135737116478, 'bagging_fraction': 0.48623221158150554, 'min_child_samples': 29, 'max_depth': 6, 'num_iterations': 140, 'boost_from_average': True, 'learning_rate': 0.04114257377720054}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:04:18,481][0m Trial 33 finished with value: 0.01608247818045044 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 2.67543472470834, 'lambda_l2': 3.443217725063247e-08, 'num_leaves': 160, 'feature_fraction': 0.9484102581836079, 'bagging_fraction': 0.5002242391992497, 'min_child_samples': 43, 'max_depth': 6, 'num_iterations': 151, 'boost_from_average': True, 'learning_rate': 0.07217402099034514}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:06:18,962][0m Trial 34 finished with value: 0.016182896320160806 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.25240168162140614, 'lambda_l2': 1.0548934708358337e-07, 'num_leaves': 171, 'feature_fraction': 0.8940147924406671, 'bagging_fraction': 0.4039965173297984, 'min_child_samples': 30, 'max_depth': 6, 'num_iterations': 206, 'boost_from_average': True, 'learning_rate': 0.018454241578093094}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:06:48,858][0m Trial 35 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 6.61124324932296, 'lambda_l2': 2.2573173621191446e-08, 'num_leaves': 114, 'feature_fraction': 0.9386293048184005, 'bagging_fraction': 0.4632909782183281, 'min_child_samples': 52, 'max_depth': 6, 'num_iterations': 136, 'boost_from_average': True, 'learning_rate': 0.03976425238523925}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:07:32,435][0m Trial 36 finished with value: 0.016134115876842513 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.453421298386772, 'lambda_l2': 1.692688985225802e-08, 'num_leaves': 115, 'feature_fraction': 0.92892748863813, 'bagging_fraction': 0.8308975231184826, 'min_child_samples': 48, 'max_depth': 6, 'num_iterations': 142, 'boost_from_average': True, 'learning_rate': 0.03938556077028335}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:08:40,042][0m Trial 37 finished with value: 0.016157890068405997 and parameters: {'objective': 'mape', 'boosting_type': 'rf', 'lambda_l1': 0.10079572902956939, 'lambda_l2': 7.122964093714981e-07, 'num_leaves': 142, 'feature_fraction': 0.9655641264559388, 'bagging_fraction': 0.4322989161280062, 'min_child_samples': 93, 'max_depth': 5, 'num_iterations': 100, 'boost_from_average': True, 'learning_rate': 0.05400320254309543}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:09:21,263][0m Trial 38 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.966676770516832, 'lambda_l2': 1.2385563835476138e-08, 'num_leaves': 115, 'feature_fraction': 0.8187749052791243, 'bagging_fraction': 0.4776983264071843, 'min_child_samples': 15, 'max_depth': 6, 'num_iterations': 280, 'boost_from_average': True, 'learning_rate': 0.013448814384372614}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:10:27,361][0m Trial 39 finished with value: 0.016076783137010064 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 3.564834785470862, 'lambda_l2': 1.4809009698365518e-08, 'num_leaves': 78, 'feature_fraction': 0.7605898571481504, 'bagging_fraction': 0.525542021084203, 'min_child_samples': 15, 'max_depth': 6, 'num_iterations': 399, 'boost_from_average': False, 'learning_rate': 0.01484881522330653}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:16:20,293][0m Trial 40 finished with value: 0.01612215749272046 and parameters: {'objective': 'regression_l1', 'boosting_type': 'dart', 'lambda_l1': 0.018055835152765553, 'lambda_l2': 1.0268225189947218e-08, 'num_leaves': 197, 'feature_fraction': 0.6410198473006294, 'bagging_fraction': 0.4731454223536469, 'min_child_samples': 39, 'max_depth': 6, 'num_iterations': 505, 'boost_from_average': True, 'learning_rate': 0.025974290858361067}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:17:13,815][0m Trial 41 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 4.667671801510246, 'lambda_l2': 2.7850104858856067e-06, 'num_leaves': 108, 'feature_fraction': 0.8341061247232264, 'bagging_fraction': 0.4590079375551821, 'min_child_samples': 66, 'max_depth': 5, 'num_iterations': 284, 'boost_from_average': True, 'learning_rate': 0.01451153770728806}. Best is trial 10 with value: 0.01607264025061446.[0m







  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:18:36,367][0m Trial 42 finished with value: 0.016139173666217192 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.8525440489595506, 'lambda_l2': 1.6344057188783735e-07, 'num_leaves': 111, 'feature_fraction': 0.6942423749431873, 'bagging_fraction': 0.42336795120431503, 'min_child_samples': 65, 'max_depth': 6, 'num_iterations': 349, 'boost_from_average': True, 'learning_rate': 0.013619926362784932}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:19:25,108][0m Trial 43 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.103711577358892, 'lambda_l2': 1.3736656607938827e-08, 'num_leaves': 124, 'feature_fraction': 0.8021296444388702, 'bagging_fraction': 0.633804288932094, 'min_child_samples': 12, 'max_depth': 6, 'num_iterations': 190, 'boost_from_average': True, 'learning_rate': 0.05264294680106124}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:20:22,464][0m Trial 44 finished with value: 0.01607491985476659 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.430571269442224, 'lambda_l2': 6.049790413761331e-08, 'num_leaves': 87, 'feature_fraction': 0.8286609090531236, 'bagging_fraction': 0.5826435124028675, 'min_child_samples': 55, 'max_depth': 6, 'num_iterations': 271, 'boost_from_average': True, 'learning_rate': 0.022932974870997217}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:26:57,929][0m Trial 45 finished with value: 0.0161774242688168 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.31563116000712566, 'lambda_l2': 2.9219418905115473e-07, 'num_leaves': 52, 'feature_fraction': 0.8758273781038518, 'bagging_fraction': 0.5158958934536991, 'min_child_samples': 19, 'max_depth': 7, 'num_iterations': 458, 'boost_from_average': True, 'learning_rate': 0.03846624819245628}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:28:16,412][0m Trial 46 finished with value: 0.016279849518423054 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 3.072463480497187e-06, 'lambda_l2': 5.9491541864358994e-06, 'num_leaves': 137, 'feature_fraction': 0.9981050500568469, 'bagging_fraction': 0.4454526596350558, 'min_child_samples': 93, 'max_depth': 5, 'num_iterations': 128, 'boost_from_average': True, 'learning_rate': 0.06079421552631228}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:29:09,865][0m Trial 47 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 5.000958138272397, 'lambda_l2': 1.0495448759641942e-08, 'num_leaves': 104, 'feature_fraction': 0.7955209104616386, 'bagging_fraction': 0.7446224939711248, 'min_child_samples': 71, 'max_depth': 6, 'num_iterations': 199, 'boost_from_average': True, 'learning_rate': 0.07537250903964685}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:31:43,901][0m Trial 48 finished with value: 0.016130361006898207 and parameters: {'objective': 'mape', 'boosting_type': 'rf', 'lambda_l1': 0.0002116851169194595, 'lambda_l2': 0.00012574092022583054, 'num_leaves': 141, 'feature_fraction': 0.6911297074345849, 'bagging_fraction': 0.7528787260888481, 'min_child_samples': 77, 'max_depth': 5, 'num_iterations': 219, 'boost_from_average': False, 'learning_rate': 0.09120418725569285}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:32:53,212][0m Trial 49 finished with value: 0.016146858630676005 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.154274230746191, 'lambda_l2': 2.279868148883052e-07, 'num_leaves': 101, 'feature_fraction': 0.8922768201693475, 'bagging_fraction': 0.9253914680159322, 'min_child_samples': 69, 'max_depth': 6, 'num_iterations': 128, 'boost_from_average': True, 'learning_rate': 0.11633017104880158}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:36:39,168][0m Trial 50 finished with value: 0.016203548183941877 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.1269401412492045, 'lambda_l2': 4.494975235592565e-05, 'num_leaves': 120, 'feature_fraction': 0.7247614543369668, 'bagging_fraction': 0.5499154625302766, 'min_child_samples': 5, 'max_depth': 5, 'num_iterations': 324, 'boost_from_average': True, 'learning_rate': 0.03026169591619036}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:37:15,341][0m Trial 51 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.442949681556366, 'lambda_l2': 2.0803026291200177e-06, 'num_leaves': 148, 'feature_fraction': 0.9725525917222845, 'bagging_fraction': 0.4281346778549934, 'min_child_samples': 88, 'max_depth': 5, 'num_iterations': 101, 'boost_from_average': True, 'learning_rate': 0.04435195999655131}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:37:53,058][0m Trial 52 finished with value: 0.016088393707848 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 2.151333406704147, 'lambda_l2': 3.7442865349806367e-06, 'num_leaves': 149, 'feature_fraction': 0.9757834204787403, 'bagging_fraction': 0.4215169267874499, 'min_child_samples': 89, 'max_depth': 5, 'num_iterations': 114, 'boost_from_average': True, 'learning_rate': 0.0456286746449298}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:38:52,178][0m Trial 53 finished with value: 0.01607328105581404 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.9973363657436645, 'lambda_l2': 0.09030801933836087, 'num_leaves': 61, 'feature_fraction': 0.7959381895349823, 'bagging_fraction': 0.6463712635103532, 'min_child_samples': 10, 'max_depth': 6, 'num_iterations': 280, 'boost_from_average': True, 'learning_rate': 0.07614960741532881}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:41:18,814][0m Trial 54 finished with value: 0.016153481335653648 and parameters: {'objective': 'regression_l1', 'boosting_type': 'rf', 'lambda_l1': 9.531995128086802, 'lambda_l2': 4.942726772598983e-07, 'num_leaves': 136, 'feature_fraction': 0.923137151389696, 'bagging_fraction': 0.40069752920339907, 'min_child_samples': 81, 'max_depth': 7, 'num_iterations': 167, 'boost_from_average': True, 'learning_rate': 0.04037643653050225}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:42:41,666][0m Trial 55 finished with value: 0.016157334419220824 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.6044602401178285, 'lambda_l2': 3.108244940866811e-08, 'num_leaves': 128, 'feature_fraction': 0.7602340891619954, 'bagging_fraction': 0.4625756536164519, 'min_child_samples': 57, 'max_depth': 6, 'num_iterations': 182, 'boost_from_average': True, 'learning_rate': 0.05071829852402995}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:43:40,883][0m Trial 56 finished with value: 0.016106049530317492 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 5.268336971044081, 'lambda_l2': 0.010547907491667018, 'num_leaves': 166, 'feature_fraction': 0.8456754551779586, 'bagging_fraction': 0.6668016092404272, 'min_child_samples': 21, 'max_depth': 8, 'num_iterations': 236, 'boost_from_average': True, 'learning_rate': 0.05210987595126101}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:44:23,701][0m Trial 57 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.831144644047546, 'lambda_l2': 9.923215437460008e-08, 'num_leaves': 184, 'feature_fraction': 0.9497511369150706, 'bagging_fraction': 0.4970023975667285, 'min_child_samples': 97, 'max_depth': 6, 'num_iterations': 132, 'boost_from_average': True, 'learning_rate': 0.05991259307610238}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:45:06,607][0m Trial 58 finished with value: 0.016112738363021104 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.6406536972955383, 'lambda_l2': 6.551213949590376e-08, 'num_leaves': 188, 'feature_fraction': 0.9482960955538117, 'bagging_fraction': 0.5009276610990594, 'min_child_samples': 96, 'max_depth': 6, 'num_iterations': 136, 'boost_from_average': True, 'learning_rate': 0.06629228972450696}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:47:45,781][0m Trial 59 finished with value: 0.016171074341269114 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.221130351340727, 'lambda_l2': 1.0012167934490263e-08, 'num_leaves': 208, 'feature_fraction': 0.9048744669993711, 'bagging_fraction': 0.48559028156082595, 'min_child_samples': 16, 'max_depth': 6, 'num_iterations': 157, 'boost_from_average': False, 'learning_rate': 0.05834831442617293}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:50:09,877][0m Trial 60 finished with value: 0.016151387234827397 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.6843403607812717, 'lambda_l2': 1.1190307637181477e-06, 'num_leaves': 89, 'feature_fraction': 0.8779632694288817, 'bagging_fraction': 0.45576396905136163, 'min_child_samples': 80, 'max_depth': 5, 'num_iterations': 408, 'boost_from_average': True, 'learning_rate': 0.005148943964837775}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:51:00,001][0m Trial 61 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 7.9734754559022, 'lambda_l2': 2.348686258594514, 'num_leaves': 156, 'feature_fraction': 0.9859161214214205, 'bagging_fraction': 0.5366929813963937, 'min_child_samples': 29, 'max_depth': 7, 'num_iterations': 174, 'boost_from_average': True, 'learning_rate': 0.029500691055936237}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:51:57,776][0m Trial 62 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 5.031602176654024, 'lambda_l2': 9.529490908157062, 'num_leaves': 152, 'feature_fraction': 0.980150605108869, 'bagging_fraction': 0.5311846803965454, 'min_child_samples': 35, 'max_depth': 7, 'num_iterations': 233, 'boost_from_average': True, 'learning_rate': 0.029027132793318595}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:52:51,008][0m Trial 63 finished with value: 0.016117107453629445 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.990875534337436, 'lambda_l2': 6.023572562528975e-08, 'num_leaves': 102, 'feature_fraction': 0.7995081581270949, 'bagging_fraction': 0.7488789285638856, 'min_child_samples': 85, 'max_depth': 6, 'num_iterations': 194, 'boost_from_average': True, 'learning_rate': 0.0753026947435233}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:53:48,402][0m Trial 64 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 5.02761580478116, 'lambda_l2': 5.566802083737463, 'num_leaves': 134, 'feature_fraction': 0.9232794001046287, 'bagging_fraction': 0.5164857554700496, 'min_child_samples': 34, 'max_depth': 7, 'num_iterations': 254, 'boost_from_average': True, 'learning_rate': 0.012354048770827545}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:54:51,156][0m Trial 65 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.225036911648195, 'lambda_l2': 1.335372338720251, 'num_leaves': 181, 'feature_fraction': 0.9733240901389347, 'bagging_fraction': 0.8100931212902016, 'min_child_samples': 70, 'max_depth': 7, 'num_iterations': 199, 'boost_from_average': True, 'learning_rate': 0.029824339823959455}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:55:49,401][0m Trial 66 finished with value: 0.016086925111567722 and parameters: {'objective': 'regression', 'boosting_type': 'dart', 'lambda_l1': 1.630368690164675, 'lambda_l2': 0.3071853591238795, 'num_leaves': 165, 'feature_fraction': 0.6655021018345362, 'bagging_fraction': 0.6872492074312514, 'min_child_samples': 37, 'max_depth': 7, 'num_iterations': 234, 'boost_from_average': True, 'learning_rate': 0.08505888030620981}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 22:57:51,552][0m Trial 67 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.95579112304622, 'lambda_l2': 1.2175144531462403e-05, 'num_leaves': 118, 'feature_fraction': 0.8331613407919352, 'bagging_fraction': 0.8399543155941831, 'min_child_samples': 64, 'max_depth': 5, 'num_iterations': 620, 'boost_from_average': True, 'learning_rate': 0.019531487901173924}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:07:00,049][0m Trial 68 finished with value: 0.01617612432719664 and parameters: {'objective': 'regression_l1', 'boosting_type': 'rf', 'lambda_l1': 0.4860559915286378, 'lambda_l2': 0.0018174978862306851, 'num_leaves': 118, 'feature_fraction': 0.8345863528809943, 'bagging_fraction': 0.9503832088695852, 'min_child_samples': 60, 'max_depth': 5, 'num_iterations': 599, 'boost_from_average': True, 'learning_rate': 0.016341359977493114}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:13:56,216][0m Trial 69 finished with value: 0.0163444205346699 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.373456369772064e-08, 'lambda_l2': 2.4846529558431766e-05, 'num_leaves': 124, 'feature_fraction': 0.7621108448935039, 'bagging_fraction': 0.886922158525238, 'min_child_samples': 48, 'max_depth': 5, 'num_iterations': 645, 'boost_from_average': True, 'learning_rate': 0.020515929870296677}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:17:23,396][0m Trial 70 finished with value: 0.016113931705181758 and parameters: {'objective': 'mape', 'boosting_type': 'rf', 'lambda_l1': 1.0439208233134136, 'lambda_l2': 0.0007144285396888228, 'num_leaves': 172, 'feature_fraction': 0.9617980888813499, 'bagging_fraction': 0.4736767746530387, 'min_child_samples': 25, 'max_depth': 7, 'num_iterations': 120, 'boost_from_average': True, 'learning_rate': 0.04272721706419994}. Best is trial 10 with value: 0.01607264025061446.[0m







  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:17:59,324][0m Trial 71 finished with value: 0.01607765973512611 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 2.5420454714804332, 'lambda_l2': 1.5170973266592634e-08, 'num_leaves': 143, 'feature_fraction': 0.7775010890807653, 'bagging_fraction': 0.43522948762829683, 'min_child_samples': 90, 'max_depth': 6, 'num_iterations': 110, 'boost_from_average': True, 'learning_rate': 0.04812184383292133}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:19:01,995][0m Trial 72 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 5.4289570846739625, 'lambda_l2': 2.641061969262375e-08, 'num_leaves': 108, 'feature_fraction': 0.8129009935858102, 'bagging_fraction': 0.4683814136613441, 'min_child_samples': 10, 'max_depth': 6, 'num_iterations': 374, 'boost_from_average': True, 'learning_rate': 0.03407977572436317}. Best is trial 10 with value: 0.01607264025061446.[0m






  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:20:15,166][0m Trial 73 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 4.98501513449816, 'lambda_l2': 2.458919923931329e-08, 'num_leaves': 112, 'feature_fraction': 0.8627352165143103, 'bagging_fraction': 0.6202139354963095, 'min_child_samples': 9, 'max_depth': 6, 'num_iterations': 366, 'boost_from_average': True, 'learning_rate': 0.03601243749157836}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:21:36,856][0m Trial 74 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 5.230337301778429, 'lambda_l2': 2.468237123333518e-08, 'num_leaves': 93, 'feature_fraction': 0.8593305888620668, 'bagging_fraction': 0.6015620069290092, 'min_child_samples': 17, 'max_depth': 6, 'num_iterations': 446, 'boost_from_average': True, 'learning_rate': 0.03614836116752286}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:23:01,878][0m Trial 75 finished with value: 0.016086184689426557 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.048521432624978, 'lambda_l2': 0.000329316832934709, 'num_leaves': 95, 'feature_fraction': 0.8881640548719084, 'bagging_fraction': 0.6366909470210182, 'min_child_samples': 13, 'max_depth': 6, 'num_iterations': 440, 'boost_from_average': True, 'learning_rate': 0.03686750737878453}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:24:59,925][0m Trial 76 finished with value: 0.01613366614067254 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 1.1603912174788447, 'lambda_l2': 1.0490066220567571e-08, 'num_leaves': 84, 'feature_fraction': 0.74140030791931, 'bagging_fraction': 0.5923510589442691, 'min_child_samples': 18, 'max_depth': 6, 'num_iterations': 503, 'boost_from_average': True, 'learning_rate': 0.056634333864849395}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:25:46,078][0m Trial 77 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 7.427230563414996, 'lambda_l2': 2.338755727251785e-06, 'num_leaves': 103, 'feature_fraction': 0.7165631328384987, 'bagging_fraction': 0.725062145670266, 'min_child_samples': 68, 'max_depth': 7, 'num_iterations': 163, 'boost_from_average': True, 'learning_rate': 0.1454080925012105}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:26:29,780][0m Trial 78 finished with value: 0.016100232685770422 and parameters: {'objective': 'regression', 'boosting_type': 'gbdt', 'lambda_l1': 2.6025614326408397, 'lambda_l2': 1.083254106979381e-08, 'num_leaves': 129, 'feature_fraction': 0.8111831467358704, 'bagging_fraction': 0.4481120298583166, 'min_child_samples': 74, 'max_depth': 6, 'num_iterations': 190, 'boost_from_average': True, 'learning_rate': 0.10243888673201446}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:28:16,532][0m Trial 79 finished with value: 0.01616290791540994 and parameters: {'objective': 'regression', 'boosting_type': 'dart', 'lambda_l1': 0.3883454875489108, 'lambda_l2': 1.8833299013995632, 'num_leaves': 158, 'feature_fraction': 0.9977610206707981, 'bagging_fraction': 0.5704015313031978, 'min_child_samples': 30, 'max_depth': 7, 'num_iterations': 100, 'boost_from_average': True, 'learning_rate': 0.06281789319516312}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:32:56,100][0m Trial 80 finished with value: 0.016254762139070452 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 6.480756161858034e-08, 'lambda_l2': 9.69502300740494, 'num_leaves': 154, 'feature_fraction': 0.9499661385814498, 'bagging_fraction': 0.535220801599735, 'min_child_samples': 26, 'max_depth': 8, 'num_iterations': 177, 'boost_from_average': False, 'learning_rate': 0.026992288686690874}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:33:38,735][0m Trial 81 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.850046013054431, 'lambda_l2': 1.296726072085771e-07, 'num_leaves': 146, 'feature_fraction': 0.9356641442652007, 'bagging_fraction': 0.48292592266174394, 'min_child_samples': 97, 'max_depth': 6, 'num_iterations': 138, 'boost_from_average': True, 'learning_rate': 0.041117922869812404}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:34:49,164][0m Trial 82 finished with value: 0.01609360198832522 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 3.0333725817979933, 'lambda_l2': 1.4359039172244503e-07, 'num_leaves': 123, 'feature_fraction': 0.786284318989218, 'bagging_fraction': 0.7845925617213201, 'min_child_samples': 21, 'max_depth': 6, 'num_iterations': 318, 'boost_from_average': True, 'learning_rate': 0.047905263649830436}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:35:56,548][0m Trial 83 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 6.874511937470793, 'lambda_l2': 3.4467962431046606e-07, 'num_leaves': 132, 'feature_fraction': 0.9082832027167113, 'bagging_fraction': 0.7350772752602812, 'min_child_samples': 13, 'max_depth': 6, 'num_iterations': 273, 'boost_from_average': True, 'learning_rate': 0.04996908657788224}. Best is trial 10 with value: 0.01607264025061446.[0m





  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:38:39,021][0m Trial 84 finished with value: 0.016315187597222867 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 0.0012935549284730666, 'lambda_l2': 5.447923993284767e-08, 'num_leaves': 108, 'feature_fraction': 0.9142396682228053, 'bagging_fraction': 0.5066265234369278, 'min_child_samples': 96, 'max_depth': 6, 'num_iterations': 216, 'boost_from_average': True, 'learning_rate': 0.05593973410660888}. Best is trial 10 with value: 0.01607264025061446.[0m




  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
[32m[I 2023-12-31 23:39:34,339][0m Trial 85 finished with value: 0.01607264025061446 and parameters: {'objective': 'regression', 'boosting_type': 'rf', 'lambda_l1': 9.041712133611075, 'lambda_l2': 4.076317018467136e-07, 'num_leaves': 150, 'feature_fraction': 0.9797519233897504, 'bagging_fraction': 0.4200272318501896, 'min_child_samples': 28, 'max_depth': 7, 'num_iterations': 257, 'boost_from_average': True, 'learning_rate': 0.024549571773505325}. Best is trial 10 with value: 0.01607264025061446.[0m


In [202]:
best_param

{'objective': 'regression',
 'boosting_type': 'rf',
 'lambda_l1': 8.816979942542755,
 'lambda_l2': 3.281542231961584e-06,
 'num_leaves': 138,
 'feature_fraction': 0.9948710619824198,
 'bagging_fraction': 0.43847967094014917,
 'min_child_samples': 92,
 'max_depth': 5,
 'num_iterations': 114,
 'boost_from_average': True,
 'learning_rate': 0.04548612752166902,
 'bagging_freq': 1}

In [197]:
best_param = study.best_params
best_param["bagging_freq"] = 1
# best_param["feature_pre_filter"] = False

lgb_tuned = MultiOutputRegressor(lgb.LGBMRegressor(**best_param), n_jobs=-1)
lgb_tuned.fit(X_train, y_train)
lgb_preds_tuned = lgb_tuned.predict(X_test)

















In [180]:
def raw_result_to_daily_errors(result_df, live_start_date = "2023-12-25"):
    
    errors = result_df.groupby(["date", "short_name"]).apply(
        lambda x: pd.Series({
            "wmape": abs(x["prediction_price"] - x["price"]).sum() / x["price"].sum()
            })
            ).reset_index()
    errors = errors[errors.date >= live_start_date].reset_index(drop=True)

    return errors.groupby(["date"]).wmape.mean()

def raw_result_to_ensemble_result(ensemble_dictionary):

    ensemble_df = pd.concat(ensemble_dictionary).reset_index().drop(columns=["level_1"])
    ensemble_df.rename(columns={"level_0": "type"}, inplace=True)
    ensemble_df = ensemble_df.groupby(
        ["date", "hour", "short_name", "yesterday_close"]
        )[["price", "prediction_return", "prediction_price"]].mean().reset_index()
    
    ensemble_dictionary["ensemble"] = ensemble_df
    
    return ensemble_df[list(catboost_result)]

catboost_result = pred_to_result_dataframe(cb_preds)
lightgbm_result = pred_to_result_dataframe(lgb_preds)
lightgbm_tuned = pred_to_result_dataframe(lgb_preds_tuned)

ensemble_dict = {
    "catboost-default": catboost_result,
    "lightgbm-default": lightgbm_result,
    "lightgbm-tuned": lightgbm_tuned
}
ensemble_result = raw_result_to_ensemble_result(ensemble_dict)

print(raw_result_to_daily_errors(lightgbm_result))
print(raw_result_to_daily_errors(lightgbm_tuned))
print(raw_result_to_daily_errors(catboost_result))
print(raw_result_to_daily_errors(ensemble_result))

date
2023-12-25    0.027892
2023-12-26    0.014122
2023-12-27    0.016455
2023-12-28    0.021119
Name: wmape, dtype: float64
date
2023-12-25    0.028508
2023-12-26    0.013953
2023-12-27    0.016339
2023-12-28    0.020509
Name: wmape, dtype: float64
date
2023-12-25    0.031589
2023-12-26    0.013977
2023-12-27    0.016765
2023-12-28    0.019673
Name: wmape, dtype: float64
date
2023-12-25    0.028224
2023-12-26    0.014143
2023-12-27    0.016364
2023-12-28    0.021351
Name: wmape, dtype: float64
