In [1]:
import sys
import pandas as pd; pd.set_option('display.max_columns', 100)
import numpy as np

import lightgbm as lgb
import optuna
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error

import logging
logging.basicConfig(level=logging.ERROR)
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# 変数

In [2]:
preprocess_num = "P2"
ML = "ML1"

# csvをimportする

In [3]:
train = pd.read_csv(f"../preprocess_results/{preprocess_num}_train.csv", sep=',')
train = train.drop(columns = ["id"])
train.head()

Unnamed: 0,Brand,Material,Size,Compartments,Laptop Compartment,Waterproof,Style,Color,Weight Capacity (kg),Price
0,1,1,1,7.0,1,0,2,0,11.611723,112.15875
1,1,0,2,10.0,1,1,1,3,27.078537,68.88056
2,4,1,2,2.0,1,0,1,5,16.64376,39.1732
3,2,2,2,8.0,1,0,1,3,12.93722,80.60793
4,0,0,1,1.0,1,1,1,3,17.749338,86.02312


In [4]:
test = pd.read_csv(f"../preprocess_results/{preprocess_num}_test.csv", sep=',')
test = test.drop(columns = ["id","Price"])
test.head()

Unnamed: 0,Brand,Material,Size,Compartments,Laptop Compartment,Waterproof,Style,Color,Weight Capacity (kg)
0,3,1,2,2.0,0,0,2,3,20.671147
1,2,0,1,7.0,0,1,0,3,13.564105
2,0,0,0,9.0,0,1,1,1,11.809799
3,0,2,0,1.0,1,0,1,3,18.477036
4,5,2,0,2.0,1,1,2,0,9.907953


In [5]:
sample_submission = pd.read_csv(f"../data/sample_submission.csv", sep=',')
sample_submission.head()

Unnamed: 0,id,Price
0,300000,81.411
1,300001,81.411
2,300002,81.411
3,300003,81.411
4,300004,81.411


# 学習データとテストデータに分割

In [6]:
X = train.drop(columns=['Price'])
y = train["Price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# パラメーターチューリング

In [7]:
def objective(trial):
    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
        "num_leaves": trial.suggest_int("num_leaves", 20, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "n_estimators": 1000,
        "early_stopping_rounds": 10,
        "verbose" : 0,
    }

    # 交差検証
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_list = []

    for train_idx, valid_idx in kf.split(X_train):
        X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[valid_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[valid_idx]

        # モデルの学習
        model = lgb.LGBMRegressor(**params)
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_val, y_val)],
            eval_metric="rmse",
            callbacks=[optuna.integration.LightGBMPruningCallback(trial, "rmse")],
        )
        
        # 予測
        y_pred = model.predict(X_val)

        # RMSEの計算
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        rmse_list.append(rmse)

    return np.mean(rmse_list)

In [None]:
%%time
# Optunaで最適化
study = optuna.create_study(
    direction="minimize", 
    sampler=optuna.samplers.TPESampler(), 
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)
study.optimize(objective, n_trials=50)

# 最適なパラメータ
best_params = study.best_params

# 最適化履歴を可視化
optuna.visualization.matplotlib.plot_optimization_history(study)
plt.show()

[I 2025-02-19 02:29:03,065] A new study created in memory with name: no-name-81a434ac-04a1-4546-bfe4-d33bbccd4ece
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 02:32:45,801] Trial 0 finished with value: 38.8819182920637 and parameters: {'learning_rate': 0.1039522958368663, 'num_leaves': 140, 'max_depth': 5, 'min_child_samples': 46, 'subsample': 0.5823760376485578, 'colsample_bytree': 0.8396721622553416, 'reg_alpha': 2.7331712807145025e-05, 'reg_lambda': 0.0035114721408573877}. Best is trial 0 with value: 38.8819182920637.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 02:54:50,089] Trial 1 finished with value: 38.882287721086975 and parameters: {'learning_rate': 0.014412959528674503, 'num_leaves': 62, 'max_depth': 5, 'min_child_samples': 70, 'subsample': 0.6801320450917836, 'colsample_bytree': 0.8213644466288756, 'reg_alpha': 0.13584735835852157, 'reg_lambda': 0.0022123569795725025}. Best is trial 0 with value: 38.8819182920637.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 02:57:23,509] Trial 2 finished with value: 38.88143074606833 and parameters: {'learning_rate': 0.174434692142053, 'num_leaves': 83, 'max_depth': 6, 'min_child_samples': 56, 'subsample': 0.7391971041281364, 'colsample_bytree': 0.6288719232024425, 'reg_alpha': 5.213047543000211e-07, 'reg_lambda': 0.001988747129635646}. Best is trial 2 with value: 38.88143074606833.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:00:21,499] Trial 3 finished with value: 38.87960503820741 and parameters: {'learning_rate': 0.09564626581196874, 'num_leaves': 76, 'max_depth': 8, 'min_child_samples': 38, 'subsample': 0.8510687877428869, 'colsample_bytree': 0.8379736229098804, 'reg_alpha': 1.495898505927737e-05, 'reg_lambda': 0.00031498146916077}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:13:52,470] Trial 4 finished with value: 38.88148371611583 and parameters: {'learning_rate': 0.018717341422191325, 'num_leaves': 177, 'max_depth': 11, 'min_child_samples': 96, 'subsample': 0.8863471894281671, 'colsample_bytree': 0.6322768061441562, 'reg_alpha': 0.17367060639466897, 'reg_lambda': 3.21738838335877e-06}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:14:00,306] Trial 5 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:14:06,065] Trial 6 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:14:14,163] Trial 7 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:14:20,377] Trial 8 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:14:26,166] Trial 9 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:17:57,621] Trial 10 finished with value: 38.88244054229819 and parameters: {'learning_rate': 0.062357211334217004, 'num_leaves': 223, 'max_depth': 8, 'min_child_samples': 42, 'subsample': 0.9992825961632563, 'colsample_bytree': 0.9948737097080781, 'reg_alpha': 1.2894034472359126e-05, 'reg_lambda': 8.85307460097247}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:19:51,787] Trial 11 finished with value: 38.88192144675247 and parameters: {'learning_rate': 0.17869156664214167, 'num_leaves': 89, 'max_depth': 7, 'min_child_samples': 64, 'subsample': 0.5014658778556, 'colsample_bytree': 0.6675282553961218, 'reg_alpha': 1.2615788898442008e-07, 'reg_lambda': 9.764866458792149e-05}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:19:57,223] Trial 12 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:22:40,386] Trial 13 finished with value: 38.88197635104616 and parameters: {'learning_rate': 0.10296758023443656, 'num_leaves': 121, 'max_depth': 7, 'min_child_samples': 33, 'subsample': 0.7215643485911523, 'colsample_bytree': 0.9122723167162698, 'reg_alpha': 1.3045714037595517e-06, 'reg_lambda': 6.959660761838226e-06}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:23:03,509] Trial 14 pruned. Trial was pruned at iteration 55.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:23:09,022] Trial 15 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:23:14,808] Trial 16 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:26:20,381] Trial 17 finished with value: 38.88259583893469 and parameters: {'learning_rate': 0.06749530577736182, 'num_leaves': 249, 'max_depth': 9, 'min_child_samples': 37, 'subsample': 0.6663819398049681, 'colsample_bytree': 0.9798457073019526, 'reg_alpha': 4.2863662967296166e-05, 'reg_lambda': 2.947017928421819e-07}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:26:34,177] Trial 18 pruned. Trial was pruned at iteration 40.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:29:47,498] Trial 19 finished with value: 38.88037244965275 and parameters: {'learning_rate': 0.07518097347572719, 'num_leaves': 140, 'max_depth': 9, 'min_child_samples': 19, 'subsample': 0.7893038768624616, 'colsample_bytree': 0.8960877672820474, 'reg_alpha': 1.4058473984092774e-07, 'reg_lambda': 0.02547418899404368}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:29:55,080] Trial 20 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:30:02,364] Trial 21 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:32:36,854] Trial 22 finished with value: 38.881131769566366 and parameters: {'learning_rate': 0.13056210848726746, 'num_leaves': 125, 'max_depth': 7, 'min_child_samples': 19, 'subsample': 0.7144989867280096, 'colsample_bytree': 0.7961805236450206, 'reg_alpha': 1.258849218073644e-08, 'reg_lambda': 0.5159570768076565}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:32:44,796] Trial 23 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:34:42,580] Trial 24 finished with value: 38.88115168189382 and parameters: {'learning_rate': 0.1286059009123133, 'num_leaves': 134, 'max_depth': 10, 'min_child_samples': 14, 'subsample': 0.8677018093892457, 'colsample_bytree': 0.9510856815449363, 'reg_alpha': 9.050016251365918e-08, 'reg_lambda': 0.5675498167825058}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:34:49,712] Trial 25 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:34:57,054] Trial 26 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:37:46,466] Trial 27 finished with value: 38.88112788346062 and parameters: {'learning_rate': 0.08639223666462141, 'num_leaves': 183, 'max_depth': 12, 'min_child_samples': 39, 'subsample': 0.6369578040574169, 'colsample_bytree': 0.7814867027603007, 'reg_alpha': 3.1259122445543354e-07, 'reg_lambda': 0.0004679430277687184}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:38:12,230] Trial 28 pruned. Trial was pruned at iteration 57.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:38:21,088] Trial 29 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:38:28,968] Trial 30 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:38:50,494] Trial 31 pruned. Trial was pruned at iteration 53.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:38:57,322] Trial 32 pruned. Trial was pruned at iteration 10.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:39:03,241] Trial 33 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:39:13,347] Trial 34 pruned. Trial was pruned at iteration 23.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:39:20,775] Trial 35 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:39:28,267] Trial 36 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:42:11,708] Trial 37 finished with value: 38.88063241442069 and parameters: {'learning_rate': 0.09261297173707955, 'num_leaves': 175, 'max_depth': 9, 'min_child_samples': 17, 'subsample': 0.8302347158279197, 'colsample_bytree': 0.8135758058610206, 'reg_alpha': 7.172908166967846e-07, 'reg_lambda': 1.5765322729436968e-05}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:42:29,799] Trial 38 pruned. Trial was pruned at iteration 41.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:42:37,905] Trial 39 pruned. Trial was pruned at iteration 10.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




[I 2025-02-19 03:42:59,015] Trial 40 pruned. Trial was pruned at iteration 41.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















[I 2025-02-19 03:45:00,089] Trial 41 finished with value: 38.882040622997465 and parameters: {'learning_rate': 0.14042817782304862, 'num_leaves': 187, 'max_depth': 8, 'min_child_samples': 20, 'subsample': 0.7771192063129669, 'colsample_bytree': 0.7768884493487147, 'reg_alpha': 2.7757744698460897e-08, 'reg_lambda': 2.445054387155887e-05}. Best is trial 3 with value: 38.87960503820741.
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),








[I 2025-02-19 03:45:43,920] Trial 42 pruned. Trial was pruned at iteration 58.




  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.2),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),




















In [None]:
print("Best Parameters:", best_params)

# GBDT

In [None]:
# 最適なパラメータでモデルを学習
best_model = lgb.LGBMRegressor(**best_params)
print(f"best_model: {best_model}")
kf = KFold(n_splits=5, shuffle=True, random_state=42)
oof_preds = np.zeros(len(X_train))

for train_idx, valid_idx in kf.split(X_train):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[valid_idx]  # ilocを使用
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[valid_idx]  # 同様にy_trainもilocでアクセス

    best_model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        eval_metric="rmse",
    )

    oof_preds[valid_idx] = best_model.predict(X_val)

# OOFスコア
rmse_oof = mean_squared_error(y_train, oof_preds, squared=False)
print(f"OOF RMSE: {rmse_oof:.4f}")

In [None]:
# テストデータで評価
y_test_pred = best_model.predict(X_test)
rmse_test = mean_squared_error(y_test, y_test_pred, squared=False)
print(f"Test RMSE: {rmse_test:.4f}")

In [None]:
# 本番データで予測出し
y_prod_pred = best_model.predict(test)

# csvをmodel_resultsに作成

In [None]:
sample_submission["Price"] = y_prod_pred
display(sample_submission.head())

path = f"../model_results/{preprocess_num}_{ML}.csv"
sample_submission.to_csv(path, index=False)