- -> exp048
- optuna でハイパラ調整

In [37]:
# %load_ext autotime
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
import copy
import json
import os
import sys
import warnings
from pathlib import Path

import wandb
import optuna

import lightgbm as lgb
import numpy as np
import pandas as pd
import torch
from rdkit import rdBase
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GroupKFold, KFold, ShuffleSplit
from tqdm.auto import tqdm

from sklearn.decomposition import TruncatedSVD
from scipy import sparse

rdBase.DisableLog("rdApp.warning")
warnings.filterwarnings("ignore")

wandb.login()



True

In [46]:
pr_number = 1
debug = False

In [40]:
%autoreload 2

src_path = "../"

sys.path.append(src_path)

from src.data import load_data, add_descriptors, add_descriptors_mordred, add_external_data, make_smile_canonical, add_maccs, add_augumented_gmm, add_graph_features, add_count_atoms
from src.model import train_lgb_for_target, save_lgb_model
from src.utils import NULL_FOR_SUBMISSION, generate_scaffold, score, add_scaffold_kfold, scaffold_cv_split, get_useless_cols


In [47]:
exp = "exp049"
notes = "optuna 探索"
model_name = "lgb"

config = {
    "debug": debug,
    "n_splits": 3,
    "num_epochs": 1000,
    "batch_size": 128,
    "drop_ratio": 0.5,
    "force_update_train": False,
    "augumented_gmm": False,
    "is_complement": True,
    "remove_external_cv": True,
}

dataset_title = f"model-{exp}"
dataset_id = f"koya346/{dataset_title}"


if config["debug"]:
    config["num_epochs"] = 10

targets = ["Tg", "FFV", "Tc", "Density", "Rg"]
org_target_cols = [f"org_{target}" for target in targets]        

# TODO: 学習パラメータ定義
params = {
    "objective": "regression",
    "metric": "mae",
    "verbosity": -1,
    "learning_rate": 0.01,
    "max_depth": 7,
    "seed": 42,
    "subsample": 0.7,
    "colsample_bytree": 0.6,
    "num_boost_round": 20000,
}

config.update(params)
pre_params = copy.deepcopy(params)
pre_params["num_boost_round"] = 300


In [58]:
# --------------------------------------------------
# 必須: org-only の fold 評価を返す関数
# --------------------------------------------------
def cv_mae_lgbm(params, X_df, y_series, target_col, target_features, early_stopping_rounds=100):
    """
    params: dict (LightGBM のパラメータ)
    X_df:   学習用の特徴 DataFrame（外部補完を含んでもOK）
    y_series: 目的変数（学習に使う列。外部補完された値も含む想定）
    target_col: 目的変数名（例: 'Tg'）
    target_features: 使用する特徴量列名のリストを要素に持つリスト（前処理済）
    return: (fold_mae_list, fold_best_iterations)
    """

    fold_mae_list = []
    fold_best_iters = []

    # X_df には org の列（例: f"org_{target_col}"）も入っている前提
    # 評価は org-only
    org_target_col = f"org_{target_col}"
    for fold, tr_idx, val_idx in scaffold_cv_split(X_df, target=target_col, n_splits=config["n_splits"], remove_external=config["remove_external_cv"]):
        features = target_features[fold]
        # 学習データ（外部補完を含む）
        X_tr, y_tr = X_df.loc[tr_idx, features], y_series.loc[tr_idx]
        X_va, y_va = X_df.loc[val_idx, features], y_series.loc[val_idx]

        lgb_trn = lgb.Dataset(X_tr, label=y_tr)
        lgb_val = lgb.Dataset(X_va, label=y_va, reference=lgb_trn)

        # 目的：MAE（Huberの場合も metric は l1 を見る）
        run_params = {
            "boosting_type": params.get("boosting", "gbdt"),
            "objective": params.get("objective", "mae"),
            "metric": "l1",
            "learning_rate": params.get("learning_rate", 0.03),
            "num_leaves": params.get("num_leaves", 63),
            "max_depth": params.get("max_depth", -1),
            "min_data_in_leaf": params.get("min_data_in_leaf", 120),
            "feature_fraction": params.get("feature_fraction", 0.75),
            "bagging_fraction": params.get("bagging_fraction", 0.8),
            "bagging_freq": params.get("bagging_freq", 1),
            "lambda_l1": params.get("lambda_l1", 0.0),
            "lambda_l2": params.get("lambda_l2", 0.0),
            "min_split_gain": params.get("min_split_gain", 0.0),
            "max_bin": params.get("max_bin", 255),
            "verbose": -1,
            "seed": params.get("seed", 42),
            "feature_fraction_seed": params.get("feature_fraction_seed", 42),
            "bagging_seed": params.get("bagging_seed", 42),
        }
        if run_params["objective"] == "huber":
            run_params["alpha"] = 0.9  # デフォに近い値
            run_params["huber_delta"] = params.get("huber_delta", 2.0)
        if run_params["boosting_type"] == "dart":
            run_params["drop_rate"] = params.get("drop_rate", 0.1)
            run_params["skip_drop"] = params.get("skip_drop", 0.5)

        model = lgb.train(
            run_params,
            lgb_trn,
            num_boost_round=params.get("num_boost_round", 5000),
            valid_sets=[lgb_trn, lgb_val],
            valid_names=["train", "valid"],
            callbacks=[lgb.early_stopping(early_stopping_rounds, verbose=False)],
        )

        pred = model.predict(X_va, num_iteration=model.best_iteration)
        fold_mae = mean_absolute_error(y_va, pred)

        fold_mae_list.append(fold_mae)
        fold_best_iters.append(model.best_iteration)

    return fold_mae_list, fold_best_iters

# --------------------------------------------------
# Optuna の objective
# --------------------------------------------------
def objective_lgbm(trial, X_df, y_series, target_col, target_features):
    # 探索空間
    boosting = trial.suggest_categorical("boosting", ["gbdt", "dart"])
    objective = trial.suggest_categorical("objective", ["mae", "huber"])

    params = {
        "boosting": boosting,
        "objective": objective,
        "learning_rate": trial.suggest_float("learning_rate", 0.02, 0.05),
        "num_leaves": trial.suggest_categorical("num_leaves", [31, 63, 127, 255]),
        "max_depth": trial.suggest_categorical("max_depth", [-1, 8, 10, 12]),
        "min_data_in_leaf": trial.suggest_categorical("min_data_in_leaf", [50, 100, 150, 200]),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 0.9),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.7, 0.9),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1, 2]),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-3, 1.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-3, 1.0, log=True),
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.1),
        "max_bin": 255,
        "num_boost_round": 5000,
        "seed": 42,
        "feature_fraction_seed": trial.suggest_int("feature_fraction_seed", 1, 9999),
        "bagging_seed": trial.suggest_int("bagging_seed", 1, 9999),
    }
    if boosting == "dart":
        params["drop_rate"] = trial.suggest_float("drop_rate", 0.05, 0.2)
        params["skip_drop"] = trial.suggest_float("skip_drop", 0.2, 0.8)
    if objective == "huber":
        params["huber_delta"] = trial.suggest_float("huber_delta", 1.0, 5.0)

    # CV 実行（org-only評価）
    fold_mae, fold_best = cv_mae_lgbm(params, X_df, y_series, target_col, target_features)

    # 途中経過を記録（プルーニングしたければここで trial.report/should_prune）
    if len(fold_mae) > 0:
        trial.report(np.mean(fold_mae), step=0)
        # if trial.should_prune():
        #     raise optuna.TrialPruned()

    # 目的は MAE を最小化
    score = np.mean(fold_mae) if len(fold_mae) else 1e9
    # best_iteration の中央値を属性として持たせておく
    trial.set_user_attr("best_iterations", fold_best)

    return score

def tune_lgbm_for_target(X_df, y_series, target_col, target_features, n_trials=500):
    """
    X_df:  特徴＋ fold + org_{target} を含む学習テーブル
    y_series: 学習に使う目的列（= 外部補完含む値）
    target_col: 目的名（例: 'Tg'）
    target_features: 使用列リストを要素とするリスト
    """
    study = optuna.create_study(direction="minimize", study_name=f"lgbm_{target_col}")
    study.optimize(lambda tr: objective_lgbm(tr, X_df, y_series, target_col, target_features),
                   n_trials=n_trials, gc_after_trial=True)

    best_params = study.best_trial.params
    best_iters = study.best_trial.user_attrs.get("best_iterations", [])
    best_iter_med = int(np.median(best_iters)) if len(best_iters) else 1000

    print(f"[{target_col}] best MAE (OOF): {study.best_value:.5f}")
    print(f"[{target_col}] best params:", best_params)
    print(f"[{target_col}] median(best_iteration across folds) = {best_iter_med}")

    return best_params, best_iter_med, study

In [59]:
wandb_name = f"{exp}_{model_name}" if not config["debug"] else f"{exp}_{model_name}_debug"
wandb.init(project="opp2025", name=wandb_name, config=config)
wandb.log({"Notes": notes})

# ---------------------------
# メイン処理
# ---------------------------
if config["debug"]:
    output_path = Path("/home/kouya-takahashi/kaggle/opp2025/outputs") / exp / "debug"
else:
    output_path = Path("/home/kouya-takahashi/kaggle/opp2025/outputs") / exp

model_output_path = output_path / "model_cv"
if not os.path.exists(model_output_path):
    os.makedirs(model_output_path)

data_dir = Path("/home/kouya-takahashi/kaggle/opp2025/data/raw")

# 学習データ用意

if os.path.exists(output_path / "train.csv") and not config["force_update_train"]:
    train = pd.read_csv(output_path / "train.csv")
else:
    train, _ = load_data(data_dir)
    # 外部データ判定用に元の目的変数を保持しておく
    # 外部データ判定は scaffold_cv_split 内で行う
    for target in targets:
        train[f"org_{target}"] = train[target]
    

    train["org_SMILES"] = train["SMILES"]
    train["SMILES"] = train["SMILES"].apply(make_smile_canonical)
    if config["debug"]:
        # 各ターゲットが欠損していないデータを30 件ずつ取り出す
        tmp_dfs = []
        for target in targets:
            cond = train[target].notnull()
            tmp_dfs.append(train[cond].iloc[:30])
        train = pd.concat(tmp_dfs).reset_index(drop=True)
    else:
        print(train.shape)
        external_data_dict = [
            {
                "ex_path": data_dir / "neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv",
                "col": "Tg",
            },
            {
                "ex_path": data_dir / "neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv",
                "col": "Tc",
                "rename_d": {"TC_mean": "Tc"},
            },
            {
                "ex_path": data_dir / "neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv",
                "col": "FFV",
            },
            {
                "ex_path": data_dir / "tg-smiles-pid-polymer-class/TgSS_enriched_cleaned.csv",
                "col": "Tg",
            },
            {
                "ex_path": data_dir / "smiles-extra-data/data_dnst1.xlsx",
                "col": "Density",
                "rename_d": {"density(g/cm3)": "Density"}, 
            },
            {
                "ex_path": data_dir / "smiles-extra-data/data_tg3.xlsx",
                "col": "Tg",
                "rename_d": {"Tg [K]": "Tg"}, 
            },
            {
                "ex_path": data_dir / "smiles-extra-data/JCIM_sup_bigsmiles.csv",
                "col": "Tg",
                "rename_d": {"Tg (C)": "Tg"}, 
            },
        ]
        for d in external_data_dict:
            print(f"ex_path: {str(d['ex_path'])}")
            train = add_external_data(
                df=train,
                ex_path=d.get("ex_path"),
                col=d.get("col"),
                rename_d=d.get("rename_d"),
                is_complement=config["is_complement"]
            )
            print("after train.shape: ", train.shape)

    train["is_external"] = train["id"].isnull()
    train = add_maccs(train)

    # rdkit の記述子, morgan finger print
    train = add_descriptors(train, radius=2, fp_size=1024)
    train = add_descriptors_mordred(train, num_confs=10, ignore_3D=False)

    new_cols = []
    seen = {}
    for col in train.columns:
        if col in seen:
            seen[col] += 1
            new_cols.append(f"{col}_{seen[col]}")
        else:
            seen[col] = 0
            new_cols.append(col)
    
    train.columns = new_cols
    
    # グラフ特徴量
    train = add_graph_features(train)
    train = add_count_atoms(train)
    
    train["id"] = np.arange(len(train))
    features = train.drop(targets + org_target_cols + ["id", "org_SMILES", "SMILES", "is_external"], axis=1).columns
    for col in features:
        if train[col].dtype == "object":
            train[col] = pd.to_numeric(train[col], errors="coerce")
    useless_cols = get_useless_cols(train.drop(targets + org_target_cols + ["id", "org_SMILES", "SMILES", "is_external"], axis=1))
    
    train = train.drop(useless_cols, axis=1)
    
    os.makedirs(output_path, exist_ok=True)
    train.to_csv(output_path / "train.csv", index=False)
    print("Saved train.csv")

# 不要なビット列を除去
bit_cols = []
remove_cols = []

for col in train.drop(targets + org_target_cols + ["id", "org_SMILES", "SMILES", "is_external"], axis=1).columns:
    if len(train[col].unique()) != 2:
        continue
    if np.all(train[col].unique() == np.array([0, 1])):
        bit_cols.append(col)
        p = train[col].mean()
        if p > 0.01 and p < 0.99:
            continue
        remove_cols.append(col)
print("bit cols: ", len(bit_cols))
print("remove cols: ", len(remove_cols))

train = train.drop(remove_cols, axis=1)

# 計算済の fold を突合
folds = pd.read_csv("/home/kouya-takahashi/kaggle/opp2025/data/preprocess/fold/folds.csv")
train = train.merge(folds[["SMILES", "fold"]], how="left", on="SMILES")

features = train.drop(targets + org_target_cols + ["id", "org_SMILES", "SMILES", "fold", "is_external"], axis=1).columns
print(len(features))
oof_dfs = []

loss_table_wandb = wandb.Table(["exp", "model_name", "fold", "target", "mae", "mse"])
all_loss_tables = []
mae_dict = {}
all_models = {}
all_features = {}

# 特徴量選定
for idx, target_col in enumerate(targets):
    df_train = train[train[target_col].notnull()].reset_index(drop=True)
    X = df_train[features]
    y = df_train[target_col]
    features_by_folds = []
    for fold, tr_idx, val_idx in scaffold_cv_split(df_train, target=target_col, n_splits=config["n_splits"], remove_external=config["remove_external_cv"]):
        if len(tr_idx) == 0 or len(val_idx) == 0:
            print(f"Skip fold... tr_idx: {len(tr_idx)}, val_idx: {len(val_idx)}")
            continue
        hits = np.zeros(X.shape[1], dtype=int)

        rs = ShuffleSplit(n_splits=3, train_size=0.9, random_state=42)
        for idx, (tr_in, va_in) in enumerate(rs.split(tr_idx)):
            X_tr_in, X_va_in = X.iloc[tr_in][features], X.iloc[va_in][features]
            y_tr_in, y_va_in = y.iloc[tr_in], y.iloc[va_in]
            dtrain_in = lgb.Dataset(X_tr_in, label=y_tr_in)
            dvalid_in = lgb.Dataset(X_va_in, label=y_va_in)
            model = lgb.train(
                params,
                dtrain_in,
                valid_sets=[dtrain_in, dvalid_in],
                callbacks=[
                    lgb.early_stopping(stopping_rounds=50),
                    lgb.log_evaluation(200)
                ]
            )
            rank = model.feature_importance("gain").argsort()[::-1]
            topk = set(rank[:int(0.3 * len(rank))])
            hits[list(topk)] += 1
        keep = hits >= 2
        use_features = features[keep]
        features_by_folds.append(use_features)
    all_features[target_col] = features_by_folds

# ハイパラ調整とスコア確認
for idx, target_col in enumerate(targets):
    target_features = all_features[target_col]
    loss_tables = []
    print(f"\n=== Training for target: {target_col} ===")
    
    df_train = train[train[target_col].notnull()].reset_index(drop=True)
    y = df_train[target_col]
    oof = np.full(len(df_train), np.nan, dtype=float)
    
    models = []
    print("n features: ", [len(feat) for feat in target_features])
    best_params, best_iter_med, study = tune_lgbm_for_target(df_train, y, target_col, target_features, n_trials=60)

    for fold, tr_idx, val_idx in scaffold_cv_split(df_train, target=target_col, n_splits=config["n_splits"], remove_external=config["remove_external_cv"]):
        features = target_features[fold]
        
        if len(tr_idx) == 0 or len(val_idx) == 0:
            print(f"Skip fold... tr_idx: {len(tr_idx)}, val_idx: {len(val_idx)}")
            continue
        loss_table = {}
        print(f"fold: {fold + 1}")
        use_features = features
        print(len(use_features))        

        # 特徴量選択して valid データとともに学習
        X_train, X_val = df_train.iloc[tr_idx][use_features], df_train.iloc[val_idx][use_features]
        y_train, y_val = y.iloc[tr_idx], y.iloc[val_idx]

        if config["augumented_gmm"]:
            X_train, y_train = add_augumented_gmm(X_train, y_train)    
        
        dtrain = lgb.Dataset(X_train, label=y_train)
        dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

        model = lgb.train(
            best_params,
            dtrain,
            valid_sets=[dtrain, dval],
            callbacks=[
                lgb.early_stopping(stopping_rounds=50),
                lgb.log_evaluation(200)
            ]
        )

        save_lgb_model(model, str(model_output_path / f"model_{target_col}_{fold}.txt"))

        pred = model.predict(X_val, num_iteration=model.best_iteration)
        oof[val_idx] = pred

        mse = mean_squared_error(y_val, pred)
        mae = mean_absolute_error(y_val, pred)
        print(f"fold: {fold}, target: {target_col}, mae: {mae}")
        loss_table["fold"] = fold
        loss_table["target"] = target_col
        loss_table["mae"] = mae
        loss_table["mse"] = mse

        loss_tables.append(loss_table)
        models.append(model)

    # 評価に使うのは元データのみ
    cond = (~np.isnan(oof)) & (df_train[f"org_{target_col}"].notnull())
    y_true = y[cond]
    y_pred = oof[cond]
    score_mse = mean_squared_error(y_true, y_pred)
    score_mae = mean_absolute_error(y_true, y_pred)
    print(f"RMSE for {target_col}: {score_mse:.4f}")
    print(f"MAE for {target_col}: {score_mae:.4f}")
    mae_dict[target_col] = score_mae

    for loss_table in loss_tables:
        loss_table_wandb.add_data(exp, model_name, loss_table["fold"], loss_table["target"], loss_table["mae"], loss_table["mse"])
    all_loss_tables += loss_tables

    oof_df = pd.DataFrame({
        "id": df_train["id"].values,
        target_col: oof
    })
    oof_dfs.append(oof_df)   

    all_models[target_col] = models

wandb.log({"fold_target_losses": loss_table_wandb})
# target 毎の 平均 mae を記録
for target in targets:
    key_name = f"{target}_mean_mae"
    mae_values = mae_dict[target]
    # mae_values = [d["mae"] for d in all_loss_tables if d["target"] == target]
    wandb.log({key_name: np.mean(mae_values)})

# CV 計算
cond = ~train["is_external"]
oof_df = pd.DataFrame()

# 元の目的変数も入れておく
for target in targets:
    oof_df[f"org_{target}"] = train.loc[cond, f"org_{target}"]

oof_df["id"] = train.loc[cond, "id"]

# 正規化前の SMILES
oof_df["SMILES"] = train.loc[cond, "org_SMILES"]

for i_oof in oof_dfs:
    oof_df = oof_df.merge(i_oof, on="id", how="left")

# # 予測しなかった部分は null にしておく
# for target in targets:
#     oof_df.loc[oof_df[target] == 0, target] = np.nan

oof_df.to_csv(output_path / "oof.csv", index=False)

solution = train.loc[cond, ["id"] + org_target_cols].copy()
solution.columns = ["id"] + targets

# solution = solution.fillna(NULL_FOR_SUBMISSION)

# oof_df = oof_df.fillna(NULL_FOR_SUBMISSION)

# 評価
final_score = score(
    solution=solution,
    submission=oof_df,
)
print(f"\n📊 Final OOF Score (wMAE): {final_score:.6f}")
wandb.log({"wMAE": final_score})

# target 毎の best_iteration を保存する。保存したモデルには記録されてなかった
best_iterations = {}
for target in targets:
    target_best_iterations = [model.best_iteration for model in all_models[target]]
    best_iterations[target] = target_best_iterations
print(best_iterations)

with open(output_path / "best_iterations.json", "w") as f:
    json.dump(best_iterations, f)

0,1
Notes,optuna 探索


bit cols:  480
remove cols:  47
581
train rows: 24, valid rows: 12, ignore rows: 0
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 61.2778	valid_1's l1: 103.248
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 71.4382	valid_1's l1: 58.7813
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 69.4325	valid_1's l1: 54.2674
train rows: 22, valid rows: 14, ignore rows: 0
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 62.7842	valid_1's l1: 112.407
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 68.0968	valid_1's l1: 94.3228
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 77.4697	valid_1's l1: 37.4573


[I 2025-08-30 13:27:43,292] A new study created in memory with name: lgbm_Tg
[I 2025-08-30 13:27:43,320] Trial 0 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03489328487055608, 'num_leaves': 31, 'max_depth': -1, 'min_data_in_leaf': 50, 'feature_fraction': 0.88882487416645, 'bagging_fraction': 0.7483773947529123, 'bagging_freq': 2, 'lambda_l1': 0.003238729512274587, 'lambda_l2': 0.044908918376439434, 'min_split_gain': 0.06369468611196646, 'feature_fraction_seed': 7449, 'bagging_seed': 1138}. Best is trial 0 with value: 56.401018078755804.


[200]	training's l1: 3.0714	valid_1's l1: 3.32401
[400]	training's l1: 2.33358	valid_1's l1: 2.73981
Early stopping, best iteration is:
[477]	training's l1: 2.15441	valid_1's l1: 2.65707
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's l1: 5.03009	valid_1's l1: 5.55013
Training until validation scores don't improve for 50 rounds
[200]	training's l1: 3.03086	valid_1's l1: 3.43994
[400]	training's l1: 2.29296	valid_1's l1: 2.88789
Early stopping, best iteration is:
[459]	training's l1: 2.17478	valid_1's l1: 2.84422

=== Training for target: Tg ===
n features:  [174, 174, 174]
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0

[I 2025-08-30 13:27:43,490] Trial 1 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.04434629708340425, 'num_leaves': 255, 'max_depth': 8, 'min_data_in_leaf': 100, 'feature_fraction': 0.6625030891714245, 'bagging_fraction': 0.761863370318359, 'bagging_freq': 1, 'lambda_l1': 0.015328341556948061, 'lambda_l2': 0.38458539701405997, 'min_split_gain': 0.03590948524898107, 'feature_fraction_seed': 1488, 'bagging_seed': 6183, 'huber_delta': 3.726207518776876}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:43,655] Trial 2 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02431548277860637, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.6604371042140155, 'bagging_fraction': 0.8684830274417583, 'bagging_freq': 2, 'lambda_l1': 0.003155808957537449, 'lambda_l2': 0.0018748754692317646, 'min_split_gain': 0.02167


train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:43,825] Trial 3 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.027728313681806626, 'num_leaves': 63, 'max_depth': 12, 'min_data_in_leaf': 50, 'feature_fraction': 0.6025478255126154, 'bagging_fraction': 0.7136300613769865, 'bagging_freq': 2, 'lambda_l1': 0.0032314612154270526, 'lambda_l2': 0.00880379269064843, 'min_split_gain': 0.04930676432147124, 'feature_fraction_seed': 7978, 'bagging_seed': 5005, 'huber_delta': 4.354576014237461}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:43,991] Trial 4 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.0336994493029655, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 100, 'feature_fraction': 0.6945143047570317, 'bagging_fraction': 0.7714004415492193, 'bagging_freq': 2, 'lambda_l1': 0.002877024868417138, 'lambda_l2': 0.00348944494724762, 'min_split_gain': 0.03581

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0

[I 2025-08-30 13:27:44,159] Trial 5 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.025074405347532688, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8462190475830855, 'bagging_fraction': 0.719283636253547, 'bagging_freq': 2, 'lambda_l1': 0.002913834740397229, 'lambda_l2': 0.0015117784293307102, 'min_split_gain': 0.02302312476610301, 'feature_fraction_seed': 7531, 'bagging_seed': 6844}. Best is trial 0 with value: 56.401018078755804.



train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:44,618] Trial 6 finished with value: 61.223520324540345 and parameters: {'boosting': 'dart', 'objective': 'huber', 'learning_rate': 0.03904706603577948, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 150, 'feature_fraction': 0.768314632920018, 'bagging_fraction': 0.8687979635034636, 'bagging_freq': 2, 'lambda_l1': 0.08723651773117608, 'lambda_l2': 0.6147226022563352, 'min_split_gain': 0.015121407795850928, 'feature_fraction_seed': 8943, 'bagging_seed': 1783, 'drop_rate': 0.17860299914187344, 'skip_drop': 0.490234523562802, 'huber_delta': 2.860757350071148}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:45,075] Trial 7 finished with value: 61.223520324540345 and parameters: {'boosting': 'dart', 'objective': 'huber', 'learning_rate': 0.03132878760347431, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8300485667625779, 'bagging_fraction': 0.838837363462439, 'bagging_freq': 1, 'lambda_l1': 0.05701713811202257, 'lambda_l2': 0.022840251758168027, 'min_split_gain': 0.037654434902135825, 'feature_fraction_seed': 4188, 'bagging_seed': 4161, 'drop_rate': 0.052344362847070995, 'skip_drop': 0.7052088906694918, 'huber_delta': 1.2751371822909117}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0


[I 2025-08-30 13:27:45,242] Trial 8 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.02521324685095369, 'num_leaves': 63, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.7440921746722782, 'bagging_fraction': 0.8043233835803589, 'bagging_freq': 2, 'lambda_l1': 0.206376403043786, 'lambda_l2': 0.16314528984784918, 'min_split_gain': 0.03191154864264441, 'feature_fraction_seed': 4883, 'bagging_seed': 277, 'huber_delta': 3.6022732043912136}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:45,408] Trial 9 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.030834667233342113, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 200, 'feature_fraction': 0.8931454971523575, 'bagging_fraction': 0.8516892565653821, 'bagging_freq': 1, 'lambda_l1': 0.009356048130101819, 'lambda_l2': 0.18184352310019516, 'min_split_gain': 0.0617443

train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:45,892] Trial 10 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.04759785427172606, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.8995627561738989, 'bagging_fraction': 0.7652953084144519, 'bagging_freq': 1, 'lambda_l1': 0.7666744451712899, 'lambda_l2': 0.06839064736218929, 'min_split_gain': 0.09483252741222606, 'feature_fraction_seed': 1659, 'bagging_seed': 2392, 'drop_rate': 0.08921954446381662, 'skip_drop': 0.21872892739348887}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:46,068] Trial 11 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02191503508311154, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.6087160244545885, 'bagging_fraction': 0.8866100549030769, 'bagging_freq': 2, 'lambda_l1': 0.0011693361137910213, 'lambda_l2': 0.021227575422749726, 'min_split_gain': 0.0034034325680436933, 'feature_fraction_seed': 3496, 'bagging_seed': 7987}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:46,244] Trial 12 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03911481561755614, 'num_leaves': 127, 'max_depth': -1, 'min_data_in_leaf': 50, 'feature_fraction': 0.7548562766723572, 'bagging_fraction': 0.811443489196285, 'bagging_freq': 2, 'lambda_l1': 0.01106645512640969, 'lambda_l2': 0.00103080427168624, 'min_split_gain': 0.07663858375112109, 'feature_fraction_seed': 6226, 'bagging_seed': 3425}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:46,428] Trial 13 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.038546404167399975, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.6802790783475857, 'bagging_fraction': 0.7407586078702155, 'bagging_freq': 2, 'lambda_l1': 0.0010699883542652868, 'lambda_l2': 0.007090723248854958, 'min_split_gain': 0.06522220161412462, 'feature_fraction_se

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:46,908] Trial 14 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.020150300512342595, 'num_leaves': 31, 'max_depth': -1, 'min_data_in_leaf': 200, 'feature_fraction': 0.8073126057455877, 'bagging_fraction': 0.8286674247155412, 'bagging_freq': 2, 'lambda_l1': 0.004216933056020791, 'lambda_l2': 0.04289292923979542, 'min_split_gain': 0.08439588582232968, 'feature_fraction_seed': 6247, 'bagging_seed': 310, 'drop_rate': 0.19520286220745317, 'skip_drop': 0.22545965203140217}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:47,091] Trial 15 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.0353447304152343, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.7119389427232592, 'bagging_fraction': 0.8994335363961165, 'bagging_freq': 2, 'lambda_l1': 0.024993941973253362, 'lambda_l2': 0.009568347196

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:47,275] Trial 16 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.04324411717592932, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 50, 'feature_fraction': 0.6436667550695033, 'bagging_fraction': 0.7875860522200752, 'bagging_freq': 2, 'lambda_l1': 0.006108667606512581, 'lambda_l2': 0.09051633352929418, 'min_split_gain': 0.0016400921815709936, 'feature_fraction_seed': 5340, 'bagging_seed': 7097}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:47,458] Trial 17 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.027865433369607832, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.7219568401700561, 'bagging_fraction': 0.7333683319784307, 'bagging_freq': 2, 'lambda_l1': 0.0018821960095139428, 'lambda_l2': 0.002790594504018383, 'min_split_gain': 0.05148350738380339, 'feature_fraction

train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:47,972] Trial 18 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.03618957031663018, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 200, 'feature_fraction': 0.7918752012041548, 'bagging_fraction': 0.7006065892026406, 'bagging_freq': 1, 'lambda_l1': 0.029289462396192678, 'lambda_l2': 0.020088249400067426, 'min_split_gain': 0.07071973728364084, 'feature_fraction_seed': 2803, 'bagging_seed': 2997, 'drop_rate': 0.1301433097101564, 'skip_drop': 0.6347114593374317}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:48,152] Trial 19 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.023363430716781274, 'num_leaves': 63, 'max_depth': -1, 'min_data_in_leaf': 100, 'feature_fraction': 0.8615088032921338, 'bagging_fraction': 0.7878131424161356, 'bagging_freq': 2, 'lambda_l1': 0.008112191471486495, 'lambda_l2': 0.0033904398474864325, 'min_split_gain': 0.017685009601474046, 'feature_fraction_seed': 5024, 'bagging_seed': 7975}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:48,332] Trial 20 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.049286713857965236, 'num_leaves': 63, 'max_depth': -1, 'min_data_in_leaf': 50, 'feature_fraction': 0.6477568941274532, 'bagging_fraction': 0.7457395317399954, 'bagging_freq': 2, 'lambda_l1': 0.10137438236412537, 'lambda_l2': 0.04128808718701541, 'min_split_gain': 0.08849054327777216, 'feature_fraction_seed': 8873, 'bagging_seed': 3880}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:48,511] Trial 21 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03191370832205184, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 100, 'feature_fraction': 0.6944198590328576, 'bagging_fraction': 0.7704989247813039, 'bagging_freq': 2, 'lambda_l1': 0.0022255884244545146, 'lambda_l2': 0.003252532375035889, 'min_split_gain': 0.04448858655800373, 'feature_fraction_s

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:48,692] Trial 22 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.033883546392790315, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 100, 'feature_fraction': 0.6283051328337297, 'bagging_fraction': 0.7524399492390424, 'bagging_freq': 2, 'lambda_l1': 0.005229875337958661, 'lambda_l2': 0.005367518419003244, 'min_split_gain': 0.025662486468069318, 'feature_fraction_seed': 6994, 'bagging_seed': 1342}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:48,872] Trial 23 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.028789752604776803, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 100, 'feature_fraction': 0.674786890828087, 'bagging_fraction': 0.7813512174845083, 'bagging_freq': 2, 'lambda_l1': 0.0016245305795567794, 'lambda_l2': 0.0013997278587723715, 'min_split_gain': 0.011359976967826438, 'feature_frac

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:49,051] Trial 24 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.042532627677538966, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 100, 'feature_fraction': 0.716608841985608, 'bagging_fraction': 0.8209416836719305, 'bagging_freq': 2, 'lambda_l1': 0.016849631561729642, 'lambda_l2': 0.013708459848979917, 'min_split_gain': 0.04095709315126914, 'feature_fraction_seed': 4059, 'bagging_seed': 6563}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:49,231] Trial 25 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03333082405892762, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.693670379008669, 'bagging_fraction': 0.8588231327079665, 'bagging_freq': 2, 'lambda_l1': 0.003959231395162879, 'lambda_l2': 0.0024253579967821494, 'min_split_gain': 0.05880405842991313, 'feature_fraction_s

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:49,732] Trial 26 finished with value: 61.223520324540345 and parameters: {'boosting': 'dart', 'objective': 'huber', 'learning_rate': 0.03575973602827245, 'num_leaves': 255, 'max_depth': 12, 'min_data_in_leaf': 100, 'feature_fraction': 0.7337813563166756, 'bagging_fraction': 0.7282792432047582, 'bagging_freq': 1, 'lambda_l1': 0.0018844428073801895, 'lambda_l2': 0.00496029341165544, 'min_split_gain': 0.02754738463384888, 'feature_fraction_seed': 8407, 'bagging_seed': 4428, 'drop_rate': 0.1407978067495215, 'skip_drop': 0.4455996957045537, 'huber_delta': 1.1470502615065898}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:49,913] Trial 27 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02968996863086807, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 200, 'feature_fraction': 0.7853621321296673, 'bagging_fraction': 0.7930802969440864, 'bagging_freq': 2, 'lambda_l1': 0.00610124

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0

[I 2025-08-30 13:27:50,096] Trial 28 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.026956394864460297, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.6324656776938765, 'bagging_fraction': 0.7775954426167847, 'bagging_freq': 2, 'lambda_l1': 0.015340980114539729, 'lambda_l2': 0.07701809175804197, 'min_split_gain': 0.044835524297128955, 'feature_fraction_seed': 4387, 'bagging_seed': 5948}. Best is trial 0 with value: 56.401018078755804.



train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:50,298] Trial 29 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.04048682986414519, 'num_leaves': 255, 'max_depth': 8, 'min_data_in_leaf': 100, 'feature_fraction': 0.6622768358050408, 'bagging_fraction': 0.7555519079897828, 'bagging_freq': 1, 'lambda_l1': 0.0010002834969889244, 'lambda_l2': 0.29209417178365954, 'min_split_gain': 0.031653984376159766, 'feature_fraction_seed': 9787, 'bagging_seed': 956, 'huber_delta': 2.30113513016141}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:50,478] Trial 30 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.045654361317404475, 'num_leaves': 31, 'max_depth': -1, 'min_data_in_leaf': 100, 'feature_fraction': 0.66225113904376, 'bagging_fraction': 0.8751281468836132, 'bagging_freq': 2, 'lambda_l1': 0.0026254153099848306, 'lambda_l2': 0.012891324093235177, 'min_split_gain': 0.00

train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:50,660] Trial 31 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02465117441611721, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8659363616158122, 'bagging_fraction': 0.7209658044474746, 'bagging_freq': 2, 'lambda_l1': 0.0032940401881121923, 'lambda_l2': 0.001511355809734751, 'min_split_gain': 0.020024063057327027, 'feature_fraction_seed': 7608, 'bagging_seed': 6804}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:50,841] Trial 32 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02625089828513534, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8514869828786173, 'bagging_fraction': 0.7042122570947442, 'bagging_freq': 2, 'lambda_l1': 0.0025825738871981603, 'lambda_l2': 0.001017573424773051, 'min_split_gain': 0.024538808741294757, 'feature_fraction_seed': 7917, 'bagging_seed': 7909}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:51,024] Trial 33 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.022283425319205928, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.884724044061549, 'bagging_fraction': 0.7378613374200254, 'bagging_freq': 2, 'lambda_l1': 0.003642070993319404, 'lambda_l2': 0.004057470183557076, 'min_split_gain': 0.034034271995362415, 'feature_frac

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:51,207] Trial 34 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.03752968513975935, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8349891572817032, 'bagging_fraction': 0.7186719345660847, 'bagging_freq': 2, 'lambda_l1': 0.0015340223511091673, 'lambda_l2': 0.00200163702646149, 'min_split_gain': 0.02139736227412708, 'feature_fraction_seed': 5665, 'bagging_seed': 5157, 'huber_delta': 4.975453298618525}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:51,697] Trial 35 finished with value: 61.223520324540345 and parameters: {'boosting': 'dart', 'objective': 'huber', 'learning_rate': 0.020072806214513295, 'num_leaves': 63, 'max_depth': 12, 'min_data_in_leaf': 150, 'feature_fraction': 0.8751327956391834, 'bagging_fraction': 0.7548335138088796, 'bagging_freq': 2, 'lambda_l1': 0.00684398429349337, 'lambda_l2': 0.8156767277804641, 'min_split_gain': 0.03864928961229842, 'feature_fraction_seed': 6652, 'bagging_seed': 8862, 'drop_rate': 0.05214704826551535, 'skip_drop': 0.7885708245625909, 'huber_delta': 2.044648702648245}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:51,879] Trial 36 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03310400408351548, 'num_leaves': 255, 'max_depth': -1, 'min_data_in_leaf': 150, 'feature_fraction': 0.8341393068249758, 'bagging_fraction': 0.7648695216011243, 'bagging_freq': 2, 'lambda_l1': 0.011519654621824442, 'lambda_l2': 0.007371086121427044, 'min_split_gain': 0.01363767898749556, 'feature_fraction_seed': 9369, 'bagging_seed': 5992}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:52,061] Trial 37 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.029647823695599825, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.81559977977787, 'bagging_fraction': 0.8419115915569413, 'bagging_freq': 1, 'lambda_l1': 0.004481245506746423, 'lambda_l2': 0.03208180177343517, 'min_split_gain': 0.0550991012377432, 'feature_fraction_seed': 8274, 'bagging_seed': 7522}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:52,614] Trial 38 finished with value: 61.223520324540345 and parameters: {'boosting': 'dart', 'objective': 'huber', 'learning_rate': 0.02485817420642478, 'num_leaves': 63, 'max_depth': -1, 'min_data_in_leaf': 50, 'feature_fraction': 0.7760985337778562, 'bagging_fraction': 0.7114879319113853, 'bagging_freq': 2, 'lambda_l1': 0.04430909965019808, 'lambda_l2': 0.15239019907292833, 'min_split_gain': 0.02915900543907153, 'feature_fraction_seed': 4431, 'bagging_seed': 4589, 'drop_rate': 0.09940533051337275, 'skip_drop': 0.3450252103300575, 'huber_delta': 2.0363434071432107}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:52,807] Trial 39 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.04053409479963968, 'num_leaves': 127, 'max_depth': 10, 'min_data_in_leaf': 150, 'feature_fraction': 0.7575314112141264, 'bagging_fraction': 0.8085063717713831, 'bagging_freq': 1, 'lambda_l1': 0.6523505164539528, 'lambda_l2': 0.0015328981280595495, 'min_split_gain': 0.04385318151255024, 'feature_fraction_seed': 7302, 'bagging_seed': 8775}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:52,997] Trial 40 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.023030673922264138, 'num_leaves': 255, 'max_depth': 8, 'min_data_in_leaf': 200, 'feature_fraction': 0.8524896377477449, 'bagging_fraction': 0.7264160207143018, 'bagging_freq': 2, 'lambda_l1': 0.021035680381218013, 'lambda_l2': 0.0023148482407581374, 'min_split_gain': 0.03727496215726635, 'feature_fraction_seed': 5847, 'bagging_seed': 6419}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:53,182] Trial 41 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03183878990270561, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 200, 'feature_fraction': 0.8922633978456361, 'bagging_fraction': 0.8604228451686189, 'bagging_freq': 1, 'lambda_l1': 0.010638928355284806, 'lambda_l2': 0.24020448851273127, 'min_split_gain': 0.06304085475742319, 'feature_fraction

train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:53,367] Trial 42 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03054488986427222, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 200, 'feature_fraction': 0.8824074437958018, 'bagging_fraction': 0.8436359111240472, 'bagging_freq': 1, 'lambda_l1': 0.008700314957281085, 'lambda_l2': 0.478350887031162, 'min_split_gain': 0.06535999394009505, 'feature_fraction_seed': 9047, 'bagging_seed': 8709}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:53,552] Trial 43 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.034325034560457296, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 200, 'feature_fraction': 0.8706300711218563, 'bagging_fraction': 0.8864202592395227, 'bagging_freq': 1, 'lambda_l1': 0.0027628623030991895, 'lambda_l2': 0.10822590366491117, 'min_split_gain': 0.08111160538164172, 'feature_fraction_seed': 6451, 'bagging_seed': 9502}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:53,736] Trial 44 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.02819111316338914, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 50, 'feature_fraction': 0.8981013413817809, 'bagging_fraction': 0.8611075164074702, 'bagging_freq': 1, 'lambda_l1': 0.0013809839833121304, 'lambda_l2': 0.05039156088406088, 'min_split_gain': 0.007007295012203377, 'feature_fraction

train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:53,918] Trial 45 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.03672172656182888, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 200, 'feature_fraction': 0.6130717012403618, 'bagging_fraction': 0.8496844412976415, 'bagging_freq': 1, 'lambda_l1': 0.00466008856267366, 'lambda_l2': 0.14304553646354393, 'min_split_gain': 0.0572569647922319, 'feature_fraction_seed': 7836, 'bagging_seed': 7043}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:54,407] Trial 46 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.02571211440064153, 'num_leaves': 63, 'max_depth': -1, 'min_data_in_leaf': 50, 'feature_fraction': 0.7002258157970873, 'bagging_fraction': 0.8284291419911127, 'bagging_freq': 2, 'lambda_l1': 0.0032692720676324592, 'lambda_l2': 0.06031070043632122, 'min_split_gain': 0.070968680739045, 'feature_fraction_seed': 5094, 'bagging_seed': 9194, 'drop_rate': 0.1595085386173467, 'skip_drop': 0.6018575742827967}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:54,590] Trial 47 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.032231458552558906, 'num_leaves': 31, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.8468418178153929, 'bagging_fraction': 0.7704375492797216, 'bagging_freq': 2, 'lambda_l1': 0.002104084628243821, 'lambda_l2': 0.01742484571936

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:54,772] Trial 48 finished with value: 56.401018078755804 and parameters: {'boosting': 'gbdt', 'objective': 'mae', 'learning_rate': 0.023891059712739374, 'num_leaves': 127, 'max_depth': -1, 'min_data_in_leaf': 200, 'feature_fraction': 0.8144782476775309, 'bagging_fraction': 0.8834832738726343, 'bagging_freq': 2, 'lambda_l1': 0.008643215897150103, 'lambda_l2': 0.001268726045768507, 'min_split_gain': 0.04695816668551942, 'feature_fraction_seed': 3214, 'bagging_seed': 2234}. Best is trial 0 with value: 56.401018078755804.
[I 2025-08-30 13:27:54,957] Trial 49 finished with value: 61.223520324540345 and parameters: {'boosting': 'gbdt', 'objective': 'huber', 'learning_rate': 0.021813293016721583, 'num_leaves': 63, 'max_depth': 10, 'min_data_in_leaf': 50, 'feature_fraction': 0.6826145686018391, 'bagging_fraction': 0.8994867069397426, 'bagging_freq': 1, 'lambda_l1': 0.0013249637626132965, 'lambda_l2': 0.25168233160008885, 'min_split_gain': 0.04877085993034809, 'feature_fract

train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0
train rows: 26, valid rows: 10, ignore rows: 0


[I 2025-08-30 13:27:55,446] Trial 50 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.030682449364789706, 'num_leaves': 31, 'max_depth': 12, 'min_data_in_leaf': 100, 'feature_fraction': 0.7402459653833511, 'bagging_fraction': 0.8685292262620746, 'bagging_freq': 2, 'lambda_l1': 0.00568649804471866, 'lambda_l2': 0.028983952601239274, 'min_split_gain': 0.09692844600483505, 'feature_fraction_seed': 2415, 'bagging_seed': 5576, 'drop_rate': 0.0972966056421343, 'skip_drop': 0.39357625129106943}. Best is trial 0 with value: 56.401018078755804.


train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:55,938] Trial 51 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.04764230771535253, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.8949598840291854, 'bagging_fraction': 0.7499226157431097, 'bagging_freq': 1, 'lambda_l1': 0.21249772162984218, 'lambda_l2': 0.0739675412398254, 'min_split_gain': 0.09874205233912198, 'feature_fraction_seed': 1549, 'bagging_seed': 1100, 'drop_rate': 0.08991802114934919, 'skip_drop': 0.2156737195775213}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:56,435] Trial 52 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.03503013642075702, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.8807989913705692, 'bagging_fraction': 0.7615942298255493, 'bagging_freq': 1, 'lambda_l1': 0.8666352525833266, 'lambda_l2': 0.031848814288521834, 'min_split_gain': 0.0912431557147269, 'feature_fraction_seed': 13, 'bagging_seed': 775, 'drop_rate': 0.08204426942177519, 'skip_drop': 0.3433770245631179}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:56,933] Trial 53 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.038497177124768404, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.8608704091884578, 'bagging_fraction': 0.7993983531786465, 'bagging_freq': 1, 'lambda_l1': 0.2743261541893815, 'lambda_l2': 0.11949790428824002, 'min_split_gain': 0.08059659946326067, 'feature_fraction_seed': 2270, 'bagging_seed': 1568, 'drop_rate': 0.11331527260138162, 'skip_drop': 0.2896206387863573}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0
train rows: 24, valid rows: 12, ignore rows: 0
train rows: 22, valid rows: 14, ignore rows: 0


[I 2025-08-30 13:27:57,429] Trial 54 finished with value: 56.401018078755804 and parameters: {'boosting': 'dart', 'objective': 'mae', 'learning_rate': 0.02699185874859829, 'num_leaves': 127, 'max_depth': 8, 'min_data_in_leaf': 50, 'feature_fraction': 0.8963994017259376, 'bagging_fraction': 0.7367692869690866, 'bagging_freq': 1, 'lambda_l1': 0.33612749639044776, 'lambda_l2': 0.0028914267553496477, 'min_split_gain': 0.016470862133522522, 'feature_fraction_seed': 784, 'bagging_seed': 2212, 'drop_rate': 0.07450851461694824, 'skip_drop': 0.5870269517913339}. Best is trial 0 with value: 56.401018078755804.


train rows: 26, valid rows: 10, ignore rows: 0


[W 2025-08-30 13:27:57,571] Trial 55 failed with parameters: {'boosting': 'gbdt', 'objective': 'mae'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/kouya-takahashi/kaggle/opp2025/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_877964/1556373603.py", line 131, in <lambda>
    study.optimize(lambda tr: objective_lgbm(tr, X_df, y_series, target_col, target_features),
  File "/tmp/ipykernel_877964/1556373603.py", line 85, in objective_lgbm
    "learning_rate": trial.suggest_float("learning_rate", 0.02, 0.05),
  File "/home/kouya-takahashi/kaggle/opp2025/.venv/lib/python3.10/site-packages/optuna/trial/_trial.py", line 161, in suggest_float
    suggested_value = self._suggest(name, distribution)
  File "/home/kouya-takahashi/kaggle/opp2025/.venv/lib/python3.10/site-packages/optuna/trial/_trial.py", line 635, in _suggest
    param_value = self

KeyboardInterrupt: 