In [5]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import optuna

sys.path.append(os.path.abspath(".."))

import src.models.xgb.xgb_optuna_optimizer as op
import src.models.xgb.xgb_cv_trainer as cv

In [6]:
# データの読み込み
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df4 = pd.read_csv("../artifacts/features/tr_df4.csv")
test_df4 = pd.read_csv("../artifacts/features/test_df4.csv")

cat_cols = [
    "Tem", "Hum", "Moi", "N", "K", "P", "Soil", "Crop"
]

In [4]:
# tr_df4 eta 0.02のrandom探索→TPE探索
# eta 0.1 のbest paramsの取得
best_params = {
    "max_depth": 8,
    "min_child_weight": 29.803558296988122,
    "colsample_bytree": 0.3970689908418626,
    "subsample": 0.856532598287343,
    "reg_alpha": 9.002010481036006,
    "reg_lambda": 0.004363285541408288
}

objective = op.create_objective(
    tr_df4,
    early_stopping_rounds=500,
    tree_method="gpu_hist",
    n_jobs=10,
    cat_cols=cat_cols
)

random_sampler = optuna.samplers.RandomSampler(seed=42)

op.run_optuna_search(
    objective,
    n_trials=1,
    n_jobs=1,
    study_name="xgb_v4.1",
    storage=url,
    initial_params=best_params
)

[I 2025-07-12 14:46:26,772] Using an existing study with name 'xgb_v4.1' instead of creating a new one.


  0%|          | 0/1 [00:00<?, ?it/s]

[0]	train-map@3:0.30832	eval-map@3:0.29452
[100]	train-map@3:0.35338	eval-map@3:0.32591
[200]	train-map@3:0.36443	eval-map@3:0.33122
[300]	train-map@3:0.37450	eval-map@3:0.33485
[400]	train-map@3:0.38393	eval-map@3:0.33801
[500]	train-map@3:0.39284	eval-map@3:0.34143
[600]	train-map@3:0.40138	eval-map@3:0.34408
[700]	train-map@3:0.40998	eval-map@3:0.34669
[800]	train-map@3:0.41757	eval-map@3:0.34941
[900]	train-map@3:0.42496	eval-map@3:0.35173
[1000]	train-map@3:0.43178	eval-map@3:0.35385
[1100]	train-map@3:0.43788	eval-map@3:0.35578
[1200]	train-map@3:0.44386	eval-map@3:0.35717
[1300]	train-map@3:0.44927	eval-map@3:0.35839
[1400]	train-map@3:0.45420	eval-map@3:0.35971
[1500]	train-map@3:0.45874	eval-map@3:0.36083
[1600]	train-map@3:0.46290	eval-map@3:0.36188
[1700]	train-map@3:0.46688	eval-map@3:0.36273
[1800]	train-map@3:0.47038	eval-map@3:0.36334
[1900]	train-map@3:0.47386	eval-map@3:0.36418
[2000]	train-map@3:0.47700	eval-map@3:0.36457
[2100]	train-map@3:0.48011	eval-map@3:0.36532


<optuna.study.study.Study at 0x176478f02f0>

In [10]:
# ID19のfull train
params = {
    "max_depth": 8,
    "min_child_weight": 29.803558296988122,
    "colsample_bytree": 0.3970689908418626,
    "subsample": 0.856532598287343,
    "reg_alpha": 9.002010481036006,
    "reg_lambda": 0.004363285541408288,
    "learning_rate": 0.02
}

trainer = cv.XGBCVTrainer(params=params, cat_cols=cat_cols)
trainer.full_train(tr_df4, test_df4, 4613, 19)

Training time: 00:06:44
Successfully saved test predictions to ../artifacts/test_preds/full/test_full_19.npy


In [11]:
trainer = cv.XGBCVTrainer(params=params, cat_cols=cat_cols)
oof_preds, test_preds = trainer.fit(tr_df4, test_df4)


Fold 1
[0]	train-map@3:0.30832	eval-map@3:0.29452
[100]	train-map@3:0.35338	eval-map@3:0.32591
[200]	train-map@3:0.36443	eval-map@3:0.33122
[300]	train-map@3:0.37450	eval-map@3:0.33485
[400]	train-map@3:0.38393	eval-map@3:0.33801
[500]	train-map@3:0.39284	eval-map@3:0.34143
[600]	train-map@3:0.40138	eval-map@3:0.34408
[700]	train-map@3:0.40998	eval-map@3:0.34669
[800]	train-map@3:0.41757	eval-map@3:0.34941
[900]	train-map@3:0.42496	eval-map@3:0.35173
[1000]	train-map@3:0.43178	eval-map@3:0.35385
[1100]	train-map@3:0.43788	eval-map@3:0.35578
[1200]	train-map@3:0.44386	eval-map@3:0.35717
[1300]	train-map@3:0.44927	eval-map@3:0.35839
[1400]	train-map@3:0.45420	eval-map@3:0.35971
[1500]	train-map@3:0.45874	eval-map@3:0.36083
[1600]	train-map@3:0.46290	eval-map@3:0.36188
[1700]	train-map@3:0.46688	eval-map@3:0.36273
[1800]	train-map@3:0.47038	eval-map@3:0.36334
[1900]	train-map@3:0.47386	eval-map@3:0.36418
[2000]	train-map@3:0.47700	eval-map@3:0.36457
[2100]	train-map@3:0.48011	eval-map@3: