In [1]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import cudf
import optuna
import importlib

sys.path.append(os.path.abspath(".."))

import src.models.rfc.rfc_cv_trainer as cv
import src.models.rfc.rfc_optuna_optimizer as op
import src.utils.telegram as te

In [2]:
# load data
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df1 = cudf.read_parquet("../artifacts/features/base/tr_df1.parquet").astype("float32")

In [6]:
# Tuning
importlib.reload(cv)
importlib.reload(op)
objective = op.create_objective(tr_df1)

op.run_optuna_search(
    objective,
    n_trials=30,
    direction="minimize",
    study_name="rfc_v1",
    storage=url,
    sampler=optuna.samplers.TPESampler(
        n_startup_trials=20, seed=42)
)
te.send_telegram_message("RFC Training Complete!")

[I 2025-08-01 21:31:44,182] Using an existing study with name 'rfc_v1' instead of creating a new one.


  0%|          | 0/30 [00:00<?, ?it/s]

Training time: 00:00:04
Valid Logloss: 0.15790
[I 2025-08-01 21:31:50,200] Trial 90 finished with value: 0.1579044052283344 and parameters: {'n_estimators': 112, 'max_depth': 43}. Best is trial 51 with value: 0.15767291749877244.
Training time: 00:00:04
Valid Logloss: 0.16003
[I 2025-08-01 21:31:55,002] Trial 91 finished with value: 0.16002891161315924 and parameters: {'n_estimators': 112, 'max_depth': 51}. Best is trial 51 with value: 0.15767291749877244.
Training time: 00:00:03
Valid Logloss: 0.15748
[I 2025-08-01 21:31:58,597] Trial 92 finished with value: 0.15747756387478812 and parameters: {'n_estimators': 107, 'max_depth': 22}. Best is trial 92 with value: 0.15747756387478812.
Training time: 00:00:03
Valid Logloss: 0.15784
[I 2025-08-01 21:32:02,309] Trial 93 finished with value: 0.1578412680839151 and parameters: {'n_estimators': 105, 'max_depth': 30}. Best is trial 92 with value: 0.15747756387478812.
Training time: 00:00:03
Valid Logloss: 0.15855
[I 2025-08-01 21:32:05,646] Tri