In [6]:
import sys
import pandas as pd

In [7]:
data_A = pd.read_csv("current_csv_files/data_A.csv", index_col='date_forecast').astype('float')
data_B = pd.read_csv("current_csv_files/data_B.csv", index_col='date_forecast').astype('float')
data_C = pd.read_csv("current_csv_files/data_C.csv", index_col='date_forecast').astype('float')

data_A_train = data_A[data_A['est'] == 0]
data_B_train = data_B[data_B['est'] == 0]
data_C_train = data_C[data_C['est'] == 0]

data_A_val = data_A[data_A['est'] == 1]
data_B_val = data_B[data_B['est'] == 1]
data_C_val = data_C[data_C['est'] == 1]



In [8]:
X_data_A_train = data_A_train
y_data_A_train = X_data_A_train.pop('target')

X_data_A_val = data_A_val
y_data_A_val = X_data_A_val.pop('target')

In [18]:
import catboost as cb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import optuna

In [26]:
def objective(trial):
    params = {
        "iterations": 10000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
        
    }

    model_A = cb.CatBoostRegressor(**params, silent=True)
    model_A.fit(X_data_A_train, y_data_A_train)
    predictions = model_A.predict(X_data_A_val)
    rmae = mean_absolute_error(y_data_A_val, predictions)
    return rmae

In [27]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

print('Best hyperparameters:', study.best_params)
print('Best RMAE:', study.best_value)

[I 2023-11-06 15:07:35,367] A new study created in memory with name: no-name-7804ee6b-8f58-4dd5-80aa-d6e59cd8286e
[I 2023-11-06 15:08:02,151] Trial 0 finished with value: 282.52011254832394 and parameters: {'learning_rate': 0.01556872594431643, 'depth': 5, 'subsample': 0.7684325712298643, 'colsample_bylevel': 0.5328314697008247, 'min_data_in_leaf': 66}. Best is trial 0 with value: 282.52011254832394.
[I 2023-11-06 15:08:21,039] Trial 1 finished with value: 282.06727018338086 and parameters: {'learning_rate': 0.010459878732403616, 'depth': 5, 'subsample': 0.20970068454187274, 'colsample_bylevel': 0.46499900254158805, 'min_data_in_leaf': 46}. Best is trial 1 with value: 282.06727018338086.
[I 2023-11-06 15:08:29,059] Trial 2 finished with value: 350.3994169808716 and parameters: {'learning_rate': 0.00994129114710742, 'depth': 1, 'subsample': 0.439334681160036, 'colsample_bylevel': 0.32081467101054695, 'min_data_in_leaf': 43}. Best is trial 1 with value: 282.06727018338086.
[I 2023-11-06 

Best hyperparameters: {'learning_rate': 0.0035170308367742866, 'depth': 9, 'subsample': 0.6201041487044322, 'colsample_bylevel': 0.7600364600270351, 'min_data_in_leaf': 92}
Best RMAE: 274.55185863206276


On A:
Best hyperparameters: {'learning_rate': 0.0035170308367742866, 'depth': 9, 'subsample': 0.6201041487044322, 'colsample_bylevel': 0.7600364600270351, 'min_data_in_leaf': 92}
Best RMAE: 274.55185863206276

In [28]:
def objective2(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 1000, 100000, log=True),

        # From last:
        'learning_rate': 0.0035170308367742866, 'depth': 9, 'subsample': 0.6201041487044322, 'colsample_bylevel': 0.7600364600270351, 'min_data_in_leaf': 92
    }

    model_A = cb.CatBoostRegressor(**params, silent=True)
    model_A.fit(X_data_A_train, y_data_A_train)
    predictions = model_A.predict(X_data_A_val)
    rmae = mean_absolute_error(y_data_A_val, predictions)
    return rmae

In [None]:
study2 = optuna.create_study(direction='minimize')
study2.optimize(objective2, n_trials=30)

print('Best hyperparameters:', study2.best_params)
print('Best RMAE:', study2.best_value)

[I 2023-11-06 16:25:36,272] A new study created in memory with name: no-name-272331c3-3eeb-49dc-b815-304e5e9889ce

[I 2023-11-06 16:39:35,508] Trial 0 finished with value: 277.79948878329134 and parameters: {'iterations': 70179}. Best is trial 0 with value: 277.79948878329134.

[I 2023-11-06 16:46:44,461] Trial 1 finished with value: 277.3848520837519 and parameters: {'iterations': 35587}. Best is trial 1 with value: 277.3848520837519.

In [32]:
def objective3(trial):
    params = {
        "has_time": True,
        # From last:
        "iterations": 10000,
        'learning_rate': 0.0035170308367742866, 'depth': 9, 'subsample': 0.6201041487044322, 'colsample_bylevel': 0.7600364600270351, 'min_data_in_leaf': 92
    }

    model_A = cb.CatBoostRegressor(**params, silent=True)
    model_A.fit(X_data_A_train, y_data_A_train)
    predictions = model_A.predict(X_data_A_val)
    rmae = mean_absolute_error(y_data_A_val, predictions)
    return rmae

In [33]:
study3 = optuna.create_study(direction='minimize')
study3.optimize(objective3, n_trials=1)

print('Best hyperparameters:', study3.best_params)
print('Best RMAE:', study3.best_value)

[I 2023-11-06 16:54:39,817] A new study created in memory with name: no-name-2728d784-bc3e-4079-8bb7-ea78b1749def
[I 2023-11-06 16:56:37,547] Trial 0 finished with value: 274.55185863206276 and parameters: {}. Best is trial 0 with value: 274.55185863206276.


Best hyperparameters: {}
Best RMAE: 274.55185863206276


[I 2023-11-06 16:54:39,817] A new study created in memory with name: no-name-2728d784-bc3e-4079-8bb7-ea78b1749def

[I 2023-11-06 16:56:37,547] Trial 0 finished with value: 274.55185863206276 and parameters: {}. Best is trial 0 with value: 274.55185863206276.

Best hyperparameters: {}

Best RMAE: 274.55185863206276