In [1]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import optuna
import importlib

sys.path.append(os.path.abspath(".."))

import src.models.lgbm.lgbm_cv_trainer as cv
import src.models.lgbm.lgbm_optuna_optimizer as op

In [2]:
# データの読み込み
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df1 = pd.read_csv("../artifacts/features/tr_df1.csv")
test_df1 = pd.read_csv("../artifacts/features/test_df1.csv")

cat_cols = ["Sex"]

In [5]:
# tr_df1のtuning
importlib.reload(cv)
importlib.reload(op)
objective = op.create_objective(
    tr_df1,
    early_stopping_rounds=500,
    cat_cols=cat_cols,
    n_jobs=25
)

random_sampler = optuna.samplers.RandomSampler(seed=42)

study = op.run_optuna_search(
    objective,
    n_trials=10,
    n_jobs=1,
    study_name="lgb_v1.1",
    storage=url,
    sampler=random_sampler
)

[I 2025-07-17 04:18:12,816] Using an existing study with name 'lgb_v1.1' instead of creating a new one.


  0%|          | 0/10 [00:00<?, ?it/s]

Training until validation scores don't improve for 500 rounds
[100]	train's rmse: 0.108151	eval's rmse: 0.106974
[200]	train's rmse: 0.0948969	eval's rmse: 0.0938244
[300]	train's rmse: 0.0909264	eval's rmse: 0.0899019
[400]	train's rmse: 0.0892709	eval's rmse: 0.088285
[500]	train's rmse: 0.0885346	eval's rmse: 0.0875684
[600]	train's rmse: 0.0883313	eval's rmse: 0.0873808
[700]	train's rmse: 0.0881251	eval's rmse: 0.0871906
[800]	train's rmse: 0.0880492	eval's rmse: 0.0871209
[900]	train's rmse: 0.087992	eval's rmse: 0.087069
[1000]	train's rmse: 0.0879148	eval's rmse: 0.0870015
[1100]	train's rmse: 0.0878489	eval's rmse: 0.0869393
[1200]	train's rmse: 0.0878252	eval's rmse: 0.0869172
[1300]	train's rmse: 0.0878134	eval's rmse: 0.0869062
[1400]	train's rmse: 0.0877965	eval's rmse: 0.0868924
[1500]	train's rmse: 0.0877821	eval's rmse: 0.0868795
[1600]	train's rmse: 0.0877718	eval's rmse: 0.0868699
[1700]	train's rmse: 0.0877646	eval's rmse: 0.0868649
[1800]	train's rmse: 0.0877559	eva

KeyboardInterrupt: 

In [5]:
# 16のfull train
params = {
    "max_depth": 8,
    "num_leaves": 621,
    "min_child_samples": 9729,
    "min_split_gain": 0.0008793022734111571,
    "feature_fraction": 0.3514478163144178,
    "bagging_fraction": 0.8842473071543802,
    "bagging_freq": 2,
    "lambda_l1": 0.00026003894788305436,
    "lambda_l2": 2.112199050376843
}

trainer = cv.LGBCVTrainer(params=params, cat_cols=cat_cols)
trainer.full_train(tr_df4, test_df4, 851, 16)

Training time: 00:18:46
Successfully saved test predictions to ../artifacts/test_preds/full/test_full_16.npy
