In [70]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import optuna
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.seasonal import STL
from scipy.optimize import minimize_scalar

import pickle
import random
import importlib
import sys

import shap
from pdpbox import pdp
import matplotlib.pyplot as plt
import japanize_matplotlib

from pytorch_forecasting import TimeSeriesDataSet

ModuleNotFoundError: No module named 'packaging.requirements'

In [62]:
import common

# モジュールの再読み込み
importlib.reload(common)

_common = common.Common()
_common.BASE_PATH

PosixPath('/Users/iwasakitakahiro/github')

In [63]:
common_func_path = _common.COMMON_FUNC_PATH
sys.path.append(str(common_func_path))

import func

importlib.reload(func)

<module 'func' from '/Users/iwasakitakahiro/github/共通関数/func.py'>

In [64]:
train_df = pd.read_csv('../output/中間データ/学習用データ/train_preprocessed.csv')

In [65]:
train_df[_common.UNIQUE_KEY_COLS] = pd.to_datetime(train_df[_common.UNIQUE_KEY_COLS])
train_df = train_df.sort_values(_common.UNIQUE_KEY_COLS).set_index(_common.UNIQUE_KEY_COLS)

price_series = train_df[_common.TARGET_COL]

In [66]:
# STL分解（1ヶ月周期＝24時間×30日）
stl_result = STL(price_series, period=24*30).fit()
train_df['trend'] = stl_result.trend
train_df['seasonal'] = stl_result.seasonal
train_df['residual'] = stl_result.resid
train_df['trend_seasonal'] = train_df['trend'] + train_df['seasonal']

# モデル1用に1日 or 1ヶ月単位で集約（例：月平均）
df_model1 = train_df.resample('M').mean().reset_index()

In [67]:
window_month = 24 * 30
window_day = 24

# price_actual列を基準に平均
train_df['monthly_avg'] = train_df['price_actual'].rolling(window=window_month, min_periods=1).mean()
train_df['daily_avg'] = train_df['price_actual'].rolling(window=window_day, min_periods=1).mean()

In [68]:
# --- STL分解後の目的変数を作成 ---
# trend + seasonal を教師データとする
y_true = train_df['trend'] + train_df['seasonal']

# --- monthly_avg, daily_avg の定義 ---
x_monthly = train_df['monthly_avg']
x_daily = train_df['daily_avg']

# --- RMSE を返す目的関数を定義 ---
def rmse_weight(alpha):
    y_pred = alpha * x_monthly + (1 - alpha) * x_daily
    return np.sqrt(mean_squared_error(y_true, y_pred))

# --- 最適化実行 ---
result = minimize_scalar(rmse_weight, bounds=(0, 1), method='bounded')

# --- 最適αとRMSE出力 ---
best_alpha = result.x
best_rmse = result.fun

print(f"✅ 最適なα（月次の重み）: {best_alpha:.4f}")
print(f"✅ 最小RMSE: {best_rmse:.4f}")

# --- 最適比率で trend_seasonal 列を再計算 ---
train_df['trend_seasonal'] = best_alpha * x_monthly + (1 - best_alpha) * x_daily

✅ 最適なα（月次の重み）: 0.5700
✅ 最小RMSE: 7.4472


## モデル1（前日12時までの情報で暫定価格を予測）

In [5]:
def build_model1_dataset(df, is_test=False):
    df = df.copy()
    df[_common.UNIQUE_KEY_COLS] = pd.to_datetime(df[_common.UNIQUE_KEY_COLS])
    records = []

    for day in pd.date_range(df[_common.UNIQUE_KEY_COLS].min() + pd.Timedelta(days=2),
                              df[_common.UNIQUE_KEY_COLS].max() - pd.Timedelta(days=0), freq='D'):
        day_start = day.replace(hour=0)
        day_end = day.replace(hour=23)
        day_times = pd.date_range(day_start, day_end, freq='h')

        feature_end = day - pd.Timedelta(hours=12)
        feature_start = feature_end - pd.Timedelta(hours=23)
        feature_times = pd.date_range(feature_start, feature_end, freq='h')

        x = df[df[_common.UNIQUE_KEY_COLS].isin(feature_times)].copy()
        y = df[df[_common.UNIQUE_KEY_COLS].isin(day_times)].copy() if not is_test else None

        if len(x) == 24 and (is_test or len(y) == 24):
            # target_col がない場合は drop から除外する
            drop_cols = [_common.UNIQUE_KEY_COLS]
            if not is_test:
                drop_cols.append(_common.TARGET_COL)
            x_agg = x.drop(columns=drop_cols).mean().to_dict()

            x_agg['day'] = day.date()
            for i, t in enumerate(day_times):
                row = x_agg.copy()
                row['forecast_hour'] = t.hour
                row['target_time'] = t
                if not is_test:
                    row['target_price'] = y.loc[y[_common.UNIQUE_KEY_COLS] == t, _common.TARGET_COL].values[0]
                records.append(row)

    return pd.DataFrame(records)

In [6]:
df_model1 = build_model1_dataset(train_df)

In [7]:
# 特徴量と目的変数
features_model1 = [col for col in df_model1.columns if col not in ['day', 'forecast_hour', 'target_time', 'target_price']]
X_model1 = df_model1[features_model1]
y_model1 = df_model1['target_price']

In [8]:
# 再現性確保のための固定シード
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)

# Optunaのシードも固定
sampler = optuna.samplers.TPESampler(seed=SEED)

# LightGBMにも同様に
default_params = {
    'objective': 'regression_l2',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'random_state': SEED,
    'verbosity': -1
}

In [9]:
tscv = TimeSeriesSplit(n_splits=5)

In [10]:
# 最適化の対象関数
def objective(trial):
    return func.run_optuna(
        trial=trial,
        features=X_model1,         # 特徴量データフレーム
        target=y_model1,           # 目的変数
        cv_strategy=tscv,         # ← partition_col → cv_strategy に変更済み
        model_name='lgb',
        default_columns=default_params,
        metric='rmse',
        random_seed=SEED
    )

# Optuna スタディの作成・実行
study = optuna.create_study(
    direction='minimize',
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=50, timeout=1800)

# 最良トライアルの結果
trial = study.best_trial
lgb_params1 = default_params | trial.params  # Python 3.9以降

# 結果の表示
print('Best trial:')
print(f'RMSE: {trial.value:.4f}')
print('Params:')
for key, value in trial.params.items():
    print(f'{key}: {value}')

[I 2025-06-12 09:20:13,932] A new study created in memory with name: no-name-c085d89c-cd24-4296-bbc3-0c8cd1592af8


Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[17]	valid_0's rmse: 11.5119
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[320]	valid_0's rmse: 20.4456
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 10.479
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[18]	valid_0's rmse: 13.9024
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:21,507] Trial 0 finished with value: 13.066626105638274 and parameters: {'learning_rate': 0.015542448978618607, 'num_leaves': 74, 'feature_fraction': 0.8875455478014229, 'bagging_fraction': 0.9570717167427538, 'bagging_freq': 8}. Best is trial 0 with value: 13.066626105638274.


Early stopping, best iteration is:
[209]	valid_0's rmse: 8.99424
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's rmse: 11.5933
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[306]	valid_0's rmse: 20.5939
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[38]	valid_0's rmse: 10.326
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 14.0885
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:26,702] Trial 1 finished with value: 13.09622846741662 and parameters: {'learning_rate': 0.01873236473539007, 'num_leaves': 50, 'feature_fraction': 0.9603744355070039, 'bagging_fraction': 0.991627870736741, 'bagging_freq': 9}. Best is trial 0 with value: 13.066626105638274.


Early stopping, best iteration is:
[208]	valid_0's rmse: 8.87951
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.5391
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[229]	valid_0's rmse: 20.6808
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[52]	valid_0's rmse: 10.3671
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 13.9595
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:32,182] Trial 2 finished with value: 13.115393003341321 and parameters: {'learning_rate': 0.02279382816537334, 'num_leaves': 66, 'feature_fraction': 0.9366925870344273, 'bagging_fraction': 0.94254040539658, 'bagging_freq': 4}. Best is trial 0 with value: 13.066626105638274.


Early stopping, best iteration is:
[147]	valid_0's rmse: 9.03051
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's rmse: 11.3495
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[205]	valid_0's rmse: 20.3171
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 10.4829
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 13.885
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:36,858] Trial 3 finished with value: 13.01213319363288 and parameters: {'learning_rate': 0.036407946646282316, 'num_leaves': 66, 'feature_fraction': 0.8027536899181364, 'bagging_fraction': 0.9545653243224748, 'bagging_freq': 9}. Best is trial 3 with value: 13.01213319363288.


Early stopping, best iteration is:
[99]	valid_0's rmse: 9.02624
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[17]	valid_0's rmse: 11.3617
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[261]	valid_0's rmse: 20.5343
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 10.5524
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[18]	valid_0's rmse: 13.8497
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:43,920] Trial 4 finished with value: 13.060202455702642 and parameters: {'learning_rate': 0.02316786340105235, 'num_leaves': 74, 'feature_fraction': 0.8150762483285954, 'bagging_fraction': 0.8737648012003949, 'bagging_freq': 10}. Best is trial 3 with value: 13.01213319363288.


Early stopping, best iteration is:
[219]	valid_0's rmse: 9.00286
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[6]	valid_0's rmse: 11.3725
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[144]	valid_0's rmse: 20.795
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[22]	valid_0's rmse: 10.4109
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 13.9408
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:47,201] Trial 5 finished with value: 13.085224720266075 and parameters: {'learning_rate': 0.04481033010266881, 'num_leaves': 58, 'feature_fraction': 0.9577460285881492, 'bagging_fraction': 0.8633672244337742, 'bagging_freq': 6}. Best is trial 3 with value: 13.01213319363288.


Early stopping, best iteration is:
[65]	valid_0's rmse: 8.90692
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's rmse: 11.4039
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[95]	valid_0's rmse: 20.3133
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's rmse: 10.2007
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's rmse: 14.0338
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:50,067] Trial 6 finished with value: 12.995771466416079 and parameters: {'learning_rate': 0.07398222518217204, 'num_leaves': 61, 'feature_fraction': 0.9604295284160318, 'bagging_fraction': 0.828753364902913, 'bagging_freq': 8}. Best is trial 6 with value: 12.995771466416079.
[I 2025-06-12 09:20:50,108] Trial 7 pruned. Trial was pruned at iteration 5.
[I 2025-06-12 09:20:50,123] Trial 8 pruned. Trial was pruned at iteration 0.


Early stopping, best iteration is:
[64]	valid_0's rmse: 9.02717
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 11.4237
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[158]	valid_0's rmse: 20.6674
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[26]	valid_0's rmse: 10.0995
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's rmse: 13.7435
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:52,384] Trial 9 finished with value: 12.959418669980156 and parameters: {'learning_rate': 0.034143667068185966, 'num_leaves': 34, 'feature_fraction': 0.9122866160126796, 'bagging_fraction': 0.865933689124183, 'bagging_freq': 6}. Best is trial 9 with value: 12.959418669980156.


Early stopping, best iteration is:
[79]	valid_0's rmse: 8.86294
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 11.4214
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[76]	valid_0's rmse: 20.6106
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[11]	valid_0's rmse: 10.3469
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.679
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:53,714] Trial 10 finished with value: 12.980715917285744 and parameters: {'learning_rate': 0.08314237192881471, 'num_leaves': 31, 'feature_fraction': 0.8818402735782713, 'bagging_fraction': 0.806695940147704, 'bagging_freq': 1}. Best is trial 9 with value: 12.959418669980156.


Early stopping, best iteration is:
[31]	valid_0's rmse: 8.84557
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's rmse: 11.4662
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 20.6238
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[18]	valid_0's rmse: 10.3346
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[6]	valid_0's rmse: 13.6744
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:54,979] Trial 11 finished with value: 13.007579080402948 and parameters: {'learning_rate': 0.0938940405042952, 'num_leaves': 32, 'feature_fraction': 0.8836182576675167, 'bagging_fraction': 0.809312130090698, 'bagging_freq': 1}. Best is trial 9 with value: 12.959418669980156.


Early stopping, best iteration is:
[22]	valid_0's rmse: 8.93898
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 11.3983
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[75]	valid_0's rmse: 20.4654
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 10.707
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.7174
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:20:58,611] Trial 12 finished with value: 13.05068960036501 and parameters: {'learning_rate': 0.06346216122533317, 'num_leaves': 95, 'feature_fraction': 0.8499069209571629, 'bagging_fraction': 0.8351205624045043, 'bagging_freq': 1}. Best is trial 9 with value: 12.959418669980156.
[I 2025-06-12 09:20:58,632] Trial 13 pruned. Trial was pruned at iteration 0.


Early stopping, best iteration is:
[45]	valid_0's rmse: 8.96526
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 11.4248
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[66]	valid_0's rmse: 20.3123
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 10.491
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[6]	valid_0's rmse: 13.5263
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:00,217] Trial 14 finished with value: 12.956739716814784 and parameters: {'learning_rate': 0.09848298104468174, 'num_leaves': 38, 'feature_fraction': 0.8569576978946065, 'bagging_fraction': 0.8393830350982093, 'bagging_freq': 3}. Best is trial 14 with value: 12.956739716814784.


Early stopping, best iteration is:
[34]	valid_0's rmse: 9.02935
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's rmse: 11.3248
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[102]	valid_0's rmse: 20.5497
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[21]	valid_0's rmse: 10.3767
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.818
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:02,474] Trial 15 finished with value: 12.966008767862022 and parameters: {'learning_rate': 0.055158473476608155, 'num_leaves': 41, 'feature_fraction': 0.854810890530028, 'bagging_fraction': 0.8474893770334594, 'bagging_freq': 3}. Best is trial 14 with value: 12.956739716814784.
[I 2025-06-12 09:21:02,497] Trial 16 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:02,536] Trial 17 pruned. Trial was pruned at iteration 1.
[I 2025-06-12 09:21:02,558] Trial 18 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:02,600] Trial 19 pruned. Trial was pruned at iteration 2.
[I 2025-06-12 09:21:02,622] Trial 20 pruned. Trial was pruned at iteration 0.


Early stopping, best iteration is:
[67]	valid_0's rmse: 8.76072
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 11.2997
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[96]	valid_0's rmse: 20.5493
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's rmse: 10.365
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.7515
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:05,028] Trial 21 finished with value: 12.955722912875505 and parameters: {'learning_rate': 0.05977839766920579, 'num_leaves': 44, 'feature_fraction': 0.8521842705483471, 'bagging_fraction': 0.8484816844633801, 'bagging_freq': 3}. Best is trial 21 with value: 12.955722912875505.


Early stopping, best iteration is:
[93]	valid_0's rmse: 8.81311
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 11.3571
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[146]	valid_0's rmse: 20.564
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's rmse: 10.561
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[6]	valid_0's rmse: 13.5619
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:07,875] Trial 22 finished with value: 12.974779462837024 and parameters: {'learning_rate': 0.06970026424720498, 'num_leaves': 49, 'feature_fraction': 0.8322207738227406, 'bagging_fraction': 0.8426074837816652, 'bagging_freq': 2}. Best is trial 21 with value: 12.955722912875505.
[I 2025-06-12 09:21:07,902] Trial 23 pruned. Trial was pruned at iteration 1.
[I 2025-06-12 09:21:07,932] Trial 24 pruned. Trial was pruned at iteration 1.


Early stopping, best iteration is:
[73]	valid_0's rmse: 8.82987
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 11.321
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[62]	valid_0's rmse: 20.5164
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[11]	valid_0's rmse: 10.2159
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.6971
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:09,888] Trial 25 finished with value: 12.927258458610376 and parameters: {'learning_rate': 0.07303989435698531, 'num_leaves': 44, 'feature_fraction': 0.8948499975545757, 'bagging_fraction': 0.8506858808589413, 'bagging_freq': 5}. Best is trial 25 with value: 12.927258458610376.


Early stopping, best iteration is:
[59]	valid_0's rmse: 8.88594
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's rmse: 11.4166
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[66]	valid_0's rmse: 20.9599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[11]	valid_0's rmse: 10.5178
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 13.5962
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:11,803] Trial 26 finished with value: 13.060032251624605 and parameters: {'learning_rate': 0.07980750283959928, 'num_leaves': 46, 'feature_fraction': 0.8325319562181925, 'bagging_fraction': 0.8197179233062364, 'bagging_freq': 2}. Best is trial 25 with value: 12.927258458610376.
[I 2025-06-12 09:21:11,857] Trial 27 pruned. Trial was pruned at iteration 4.
[I 2025-06-12 09:21:11,878] Trial 28 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:11,903] Trial 29 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:11,950] Trial 30 pruned. Trial was pruned at iteration 2.
[I 2025-06-12 09:21:11,971] Trial 31 pruned. Trial was pruned at iteration 0.


Early stopping, best iteration is:
[38]	valid_0's rmse: 8.80968
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:12,007] Trial 32 pruned. Trial was pruned at iteration 2.
[I 2025-06-12 09:21:12,030] Trial 33 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:12,051] Trial 34 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:12,085] Trial 35 pruned. Trial was pruned at iteration 2.
[I 2025-06-12 09:21:12,107] Trial 36 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:12,128] Trial 37 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:12,151] Trial 38 pruned. Trial was pruned at iteration 0.


Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.2884
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[122]	valid_0's rmse: 20.6909
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 10.2664
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's rmse: 13.8983
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:14,470] Trial 39 finished with value: 13.028140772749882 and parameters: {'learning_rate': 0.0727405368146837, 'num_leaves': 43, 'feature_fraction': 0.985051946437993, 'bagging_fraction': 0.8674803261540727, 'bagging_freq': 9}. Best is trial 25 with value: 12.927258458610376.
[I 2025-06-12 09:21:14,497] Trial 40 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:14,528] Trial 41 pruned. Trial was pruned at iteration 2.


Early stopping, best iteration is:
[65]	valid_0's rmse: 8.9967
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's rmse: 11.3811
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[59]	valid_0's rmse: 20.254
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 10.2772
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.4826
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:16,038] Trial 42 finished with value: 12.83131877773829 and parameters: {'learning_rate': 0.08657339830018494, 'num_leaves': 36, 'feature_fraction': 0.8830666366692606, 'bagging_fraction': 0.8429387571767321, 'bagging_freq': 2}. Best is trial 42 with value: 12.83131877773829.
[I 2025-06-12 09:21:16,060] Trial 43 pruned. Trial was pruned at iteration 0.
[I 2025-06-12 09:21:16,095] Trial 44 pruned. Trial was pruned at iteration 3.
[I 2025-06-12 09:21:16,133] Trial 45 pruned. Trial was pruned at iteration 2.
[I 2025-06-12 09:21:16,166] Trial 46 pruned. Trial was pruned at iteration 3.
[I 2025-06-12 09:21:16,200] Trial 47 pruned. Trial was pruned at iteration 2.


Early stopping, best iteration is:
[40]	valid_0's rmse: 8.76173
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's rmse: 11.421
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[139]	valid_0's rmse: 20.2276
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[17]	valid_0's rmse: 10.0532
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 13.5722
Training until validation scores don't improve for 30 rounds


[I 2025-06-12 09:21:18,171] Trial 48 finished with value: 12.82649640460453 and parameters: {'learning_rate': 0.06953540158438691, 'num_leaves': 34, 'feature_fraction': 0.9058326266100843, 'bagging_fraction': 0.8439798053738218, 'bagging_freq': 2}. Best is trial 48 with value: 12.82649640460453.
[I 2025-06-12 09:21:18,196] Trial 49 pruned. Trial was pruned at iteration 0.


Early stopping, best iteration is:
[52]	valid_0's rmse: 8.85852
Best trial:
RMSE: 12.8265
Params:
learning_rate: 0.06953540158438691
num_leaves: 34
feature_fraction: 0.9058326266100843
bagging_fraction: 0.8439798053738218
bagging_freq: 2


In [11]:
def get_lgb_oof_predictions(X, y, lgb_params, n_splits=5, num_boost_round=1000, es=50):
    """
    LightGBMで時系列交差検証を用いたOOF予測を作成
    """
    oof_preds = np.zeros(len(X))
    models = []
    tscv = TimeSeriesSplit(n_splits=n_splits)

    for fold, (train_idx, val_idx) in enumerate(tscv.split(X)):
        print(f"Fold {fold+1}/{n_splits}")

        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

        dtrain = lgb.Dataset(X_train, label=y_train)
        dval = lgb.Dataset(X_val, label=y_val)

        model = lgb.train(
            lgb_params1,
            dtrain,
            valid_sets=[dval],
            num_boost_round=num_boost_round,
            callbacks=[
                lgb.early_stopping(es),
                lgb.log_evaluation(period=0),
            ]
        )

        preds = model.predict(X_val, num_iteration=model.best_iteration)
        oof_preds[val_idx] = preds
        models.append(model)

    return oof_preds, models

In [12]:
# OOF予測
oof_preds_model1, model1_folds = get_lgb_oof_predictions(
    X_model1, y_model1,
    lgb_params=lgb_params1,  # ここにOptunaなどでチューニング済のパラメータを指定
    n_splits=5
)

# df_model1 に予測値を追加
df_model1['price_day_ahead_pred'] = oof_preds_model1

Fold 1/5
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's rmse: 11.421
Fold 2/5
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[139]	valid_0's rmse: 20.2276
Fold 3/5
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[17]	valid_0's rmse: 10.0532
Fold 4/5
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 13.5722
Fold 5/5
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[52]	valid_0's rmse: 8.85852


#### 全データで学習

In [13]:
X_model1 = X_model1.drop(columns=['weight'])

In [14]:
# LightGBM Dataset の作成
lgb_train = lgb.Dataset(X_model1, label=y_model1)

callbacks = [
    lgb.log_evaluation(period=50)
]

# モデルの再学習（最終学習）
final_model1 = lgb.train(
    lgb_params1,
    train_set=lgb_train,
    callbacks=callbacks,
)

# トレーニングデータで予測
y_train_pred = final_model1.predict(X_model1)

# RMSEを計算
final_rmse = np.sqrt(mean_squared_error(y_model1, y_train_pred))

print(f"LightGBMのトレーニングRMSE: {final_rmse:.4f}")

LightGBMのトレーニングRMSE: 7.7668


In [15]:
# 保存用のディクショナリにまとめる
model_package1 = {
    'model': final_model1,
    'feature_name': X_model1.columns.tolist()
}

# 保存
with open('../output/モデル/sep_model1_lgb.pkl', 'wb') as f:
    pickle.dump(model_package1, f)

## モデル2(TFT）

In [None]:
def make_tft_dataframe(X_list, y_array, model1_pred_df, time_col='time', max_seq_len=36):
    """
    X_list: 各サンプルが [seq_len × features] の list
    y_array: 各サンプルに対する target（=target_price）
    model1_pred_df: 各サンプルの予測仮価格が入ったDataFrame（time, price_day_ahead_pred あり）
    """

    rows = []
    for i, (x_seq, y, meta_row) in enumerate(zip(X_list, y_array, model1_pred_df.itertuples())):
        time_base = pd.to_datetime(meta_row.time)
        for j in range(x_seq.shape[0]):
            time_step = time_base - pd.Timedelta(hours=max_seq_len - j)
            row = {
                'time_idx': j,  # 時間軸
                'group_id': i,  # 系列ごとのID
                'actual_price': y,  # 目的変数
                'price_day_ahead_pred': meta_row.price_day_ahead_pred,
                'time': time_step,
            }
            # 特徴量列追加
            for k in range(x_seq.shape[1]):
                row[f'feature_{k}'] = x_seq[j, k]
            rows.append(row)

    df_tft = pd.DataFrame(rows)
    return df_tft

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.1.post0-py3-none-any.whl.metadata (20 kB)
Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.4.0-py3-none-any.whl.metadata (14 kB)
Collecting torch>=2.1.0 (from pytorch-lightning)
  Downloading torch-2.7.1-cp311-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting fsspec>=2022.5.0 (from fsspec[http]>=2022.5.0->pytorch-lightning)
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.7.3-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.5.1.post0-py3-none-any.whl.metadata (39 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]>=2022.5.0->pytorch-lightning)
  Downloading aiohttp-3.12.12-cp311-cp31

In [None]:
from pytorch_forecasting import TimeSeriesDataSet

def create_tft_dataset(df_tft, max_seq_len=36):
    """
    df_tft: long形式のDataFrame
    """
    dataset = TimeSeriesDataSet(
        df_tft,
        time_idx="time_idx",
        target="actual_price",
        group_ids=["group_id"],
        max_encoder_length=max_seq_len,
        max_prediction_length=1,
        static_categoricals=[],
        static_reals=[],
        time_varying_known_reals=["time_idx", "price_day_ahead_pred"],
        time_varying_unknown_reals=[c for c in df_tft.columns if c.startswith("feature_")],
    )
    return dataset