In [14]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import optuna
import importlib

sys.path.append(os.path.abspath(".."))

import src.models.xgb.xgb_optuna_optimizer as op
import src.models.xgb.xgb_cv_trainer as cv
import src.utils.telegram as te

In [15]:
# データの読み込み
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df1 = pd.read_parquet("../artifacts/features/base/tr_df1.parquet")
l1_tr_df3 = pd.read_parquet("../artifacts/features/l1/l1_tr_df3.parquet")
l1_tr_df3_1 = pd.read_parquet("../artifacts/features/l1/l1_tr_df3_1.parquet")
l1_tr_df4 = pd.read_parquet("../artifacts/features/l1/l1_tr_df4.parquet")

In [16]:
# tuning
importlib.reload(cv)
importlib.reload(op)
objective = op.create_objective(
    l1_tr_df4,
    early_stopping_rounds=500,
    tree_method="gpu_hist",
    n_jobs=10
)

random_sampler = optuna.samplers.RandomSampler(seed=42)

op.run_optuna_search(
    objective,
    n_trials=30,
    n_jobs=1,
    study_name="l1_xgb_v3",
    storage=url,
    sampler=optuna.samplers.TPESampler(seed=42, n_startup_trials=10)
)
te.send_telegram_message("Training complete!")

[I 2025-07-26 16:49:23,542] A new study created in RDB with name: l1_xgb_v3


  0%|          | 0/30 [00:00<?, ?it/s]

[0]	train-rmse:3.70060	eval-rmse:3.70032
[100]	train-rmse:0.63648	eval-rmse:0.63765
[200]	train-rmse:0.12349	eval-rmse:0.12530
[300]	train-rmse:0.06149	eval-rmse:0.06352
[400]	train-rmse:0.05852	eval-rmse:0.06051
[500]	train-rmse:0.05832	eval-rmse:0.06039
[600]	train-rmse:0.05823	eval-rmse:0.06041
[700]	train-rmse:0.05815	eval-rmse:0.06043
[800]	train-rmse:0.05807	eval-rmse:0.06045
[900]	train-rmse:0.05800	eval-rmse:0.06048
[993]	train-rmse:0.05794	eval-rmse:0.06051
Training time: 00:00:21
Train rmse: 0.05833
Valid rmse: 0.06039
[I 2025-07-26 16:49:45,569] Trial 0 finished with value: 0.06039081062151969 and parameters: {'learning_rate': 0.01749080237694725, 'max_depth': 15, 'min_child_weight': 74.53942447208348, 'colsample_bytree': 1.0, 'subsample': 0.7394633936788146, 'reg_alpha': 0.0006715208365130138, 'reg_lambda': 0.000602521573620386}. Best is trial 0 with value: 0.06039081062151969.
[0]	train-rmse:3.72445	eval-rmse:3.72416
[100]	train-rmse:1.21422	eval-rmse:1.21504
[200]	train-r