# Optunaでハイパーパラメーターの調整

パラメーターは回帰分析を前提として設定

・LightGBM  
・CatBoost  
・RandomForest

In [1]:
import numpy as np
import pandas as pd

import joblib
import optuna

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.metrics import mean_squared_error, mean_squared_log_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
import lightgbm as lgb
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings("ignore")

In [None]:
train_df = pd.read_csv("train.csv")

In [None]:
X = train_df[['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']]
y = train_df["Calories_log"]

# 1. LightGBM

In [None]:
params_base = {"boosting_type": "gbdt",
              "objective": "regression",
              "metric": "rmse",
              "learning_rate": 0.1,
              'n_estimators': 100000,
              "bagging_freq": 1,
              "seed": 42}

In [None]:
def objective_LGBM(trial):
    
    params_tuning = {
        "num_leaves": trial.suggest_int("num_leaves", 16, 128),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 5, 50),  # 100 → 50 に
        "min_sum_hessian_in_leaf": trial.suggest_float("min_sum_hessian_in_leaf", 1e-3, 5.0, log=True),  # 20.0 → 5.0
        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),  # 0.5 → 0.6
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-2, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-2, 10.0, log=True),
        }
    
    print(f"Trial {trial.number} parameters: {params_tuning}")
    
    params_tuning.update(params_base)
    
    list_metrics = []
    cv = list(KFold(n_splits=5, shuffle=True, random_state=42).split(X, y))
    
    for nfold in range(5):
        idx_tr, idx_va = cv[nfold]
        X_tr, y_tr = X.iloc[idx_tr], y.iloc[idx_tr]
        X_va, y_va = X.iloc[idx_va], y.iloc[idx_va]
        
        model = lgb.LGBMRegressor(**params_tuning)
        model.fit(X_tr,
                  y_tr,
                  eval_set=[(X_tr,y_tr), (X_va,y_va)],
                  eval_metric="rmse",
                  callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=False), lgb.log_evaluation(0)]
                  )
        y_va_pred = model.predict(X_va, num_iteration=model.best_iteration_)
        metric_test = np.sqrt(mean_squared_error(y_va, y_va_pred))
        list_metrics.append(metric_test)

    metrics = np.mean(list_metrics)
    
    return metrics

In [None]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(sampler=sampler, direction="minimize")
study.optimize(objective_LGBM, n_trials=50)

In [None]:
trial = study.best_trial
print(f"BestScore={trial.value:.5f}")
display(trial.params)

In [None]:
best_params = {
    'num_leaves': 79,
    'min_data_in_leaf': 7,
    'min_sum_hessian_in_leaf': 0.016726647861247466,
    'feature_fraction': 0.6197653331486828,
    'bagging_fraction': 0.968348369135033,
    'lambda_l1': 3.4435101234752388,
    'lambda_l2': 1.5321517484670413
    }
best_params.update(params_base)