In [1]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import optuna

sys.path.append(os.path.abspath(".."))

import src.models.lgbm.lgbm_cv_trainer as cv
import src.models.lgbm.lgbm_optuna_optimizer as op

In [3]:
# データの読み込み
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df4 = pd.read_csv("../artifacts/features/tr_df4.csv")
test_df4 = pd.read_csv("../artifacts/features/test_df4.csv")

cat_cols = [
    "Tem", "Hum", "Moi", "N", "K", "P", "Soil", "Crop"
]

In [7]:
# tr_df4のtuning
objective = op.create_objective(
    tr_df4,
    early_stopping_rounds=100,
    cat_cols=cat_cols,
    n_jobs=25
)

random_sampler = optuna.samplers.RandomSampler(seed=42)

study = op.run_optuna_search(
    objective,
    n_trials=30,
    n_jobs=1,
    study_name="lgb_v4",
    storage=url,
    # initial_params=params
)

[I 2025-07-09 17:57:01,864] Using an existing study with name 'lgb_v4' instead of creating a new one.


  0%|          | 0/30 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds
[100]	train's multi_logloss: 1.92115	train's map@3: 0.329293	eval's multi_logloss: 1.92748	eval's map@3: 0.318653
[200]	train's multi_logloss: 1.91989	train's map@3: 0.3296	eval's multi_logloss: 1.92693	eval's map@3: 0.319139
Early stopping, best iteration is:
[187]	train's multi_logloss: 1.91994	train's map@3: 0.329685	eval's multi_logloss: 1.92693	eval's map@3: 0.319275
Training time: 00:03:25
Train map@3: 0.32969
Valid map@3: 0.31927
[I 2025-07-09 18:00:27,498] Trial 3 finished with value: 0.3192745098039216 and parameters: {'max_depth': 7, 'num_leaves': 525, 'min_child_samples': 5692, 'min_split_gain': 1.7962148524739476, 'feature_fraction': 0.2648088714001867, 'bagging_fraction': 0.9244412457992262, 'bagging_freq': 10, 'lambda_l1': 2.43976509540032, 'lambda_l2': 0.10367666255746932}. Best is trial 3 with value: 0.3192745098039216.
Training until validation scores don't improve for 100 rounds
[100]	train's multi_logloss:

KeyboardInterrupt: 

In [5]:
# 16のfull train
params = {
    "max_depth": 8,
    "num_leaves": 621,
    "min_child_samples": 9729,
    "min_split_gain": 0.0008793022734111571,
    "feature_fraction": 0.3514478163144178,
    "bagging_fraction": 0.8842473071543802,
    "bagging_freq": 2,
    "lambda_l1": 0.00026003894788305436,
    "lambda_l2": 2.112199050376843
}

trainer = cv.LGBCVTrainer(params=params, cat_cols=cat_cols)
trainer.full_train(tr_df4, test_df4, 851, 16)

Training time: 00:18:46
Successfully saved test predictions to ../artifacts/test_preds/full/test_full_16.npy
