In [3]:
# 01 Import

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
import holidays

import lightgbm as lgb

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

import warnings
warnings.simplefilter('ignore')

SEED = 86

In [4]:
df_2022 = pd.read_csv('../data/jepx/spot_summary_2022.csv', encoding='cp932')
df_2023 = pd.read_csv('../data/jepx/spot_summary_2022.csv', encoding='cp932')
df_2024 = pd.read_csv('../data/jepx/spot_summary_2022.csv', encoding='cp932')
df_2025 = pd.read_csv('../data/jepx/spot_summary_2022.csv', encoding='cp932')

In [5]:
dfs = [df_2022, df_2023, df_2024, df_2025]
df_all = pd.concat(dfs, ignore_index=True)
df_all['受渡日'] = pd.to_datetime(df_all['受渡日'])
df_all.head()

Unnamed: 0,受渡日,時刻コード,売り入札量(kWh),買い入札量(kWh),約定総量(kWh),システムプライス(円/kWh),エリアプライス北海道(円/kWh),エリアプライス東北(円/kWh),エリアプライス東京(円/kWh),エリアプライス中部(円/kWh),エリアプライス北陸(円/kWh),エリアプライス関西(円/kWh),エリアプライス中国(円/kWh),エリアプライス四国(円/kWh),エリアプライス九州(円/kWh),売りブロック入札総量(kWh),売りブロック約定総量(kWh),買いブロック入札総量(kWh),買いブロック約定総量(kWh)
0,2022-04-01,1,17007100,16995100,15959150,11.97,10.39,10.39,12.12,12.12,12.12,12.12,12.12,12.12,12.12,3454600,3260650,1329300,1036000
1,2022-04-01,2,17252550,17306150,15896600,14.47,14.47,14.47,14.47,14.47,14.47,14.47,14.47,14.47,14.47,3686600,3317300,1698000,1360900
2,2022-04-01,3,17496550,17531950,15988650,15.0,24.32,24.32,24.32,14.49,14.49,14.49,14.49,14.49,14.49,3867450,3318250,2071250,1662200
3,2022-04-01,4,17994700,17695250,16003200,16.55,16.55,16.55,16.55,16.55,16.55,16.55,16.55,16.55,16.55,4106800,3320550,2270650,1844100
4,2022-04-01,5,18144250,17845100,16073050,17.3,24.32,24.32,24.32,16.55,16.55,16.55,16.55,16.55,16.55,4297450,3321000,2246900,1773350


In [6]:
# 02 Feature Engineering

df_all['year'] = df_all['受渡日'].dt.year
df_all['quarter'] = df_all['受渡日'].dt.quarter
df_all['month'] = df_all['受渡日'].dt.month
df_all['day_of_week'] = df_all['受渡日'].dt.day_of_week
df_all['week_number'] = df_all['受渡日'].dt.isocalendar().week
df_all['day_of_year'] = df_all['受渡日'].dt.dayofyear


In [7]:
df_all.head(10)

Unnamed: 0,受渡日,時刻コード,売り入札量(kWh),買い入札量(kWh),約定総量(kWh),システムプライス(円/kWh),エリアプライス北海道(円/kWh),エリアプライス東北(円/kWh),エリアプライス東京(円/kWh),エリアプライス中部(円/kWh),...,売りブロック入札総量(kWh),売りブロック約定総量(kWh),買いブロック入札総量(kWh),買いブロック約定総量(kWh),year,quarter,month,day_of_week,week_number,day_of_year
0,2022-04-01,1,17007100,16995100,15959150,11.97,10.39,10.39,12.12,12.12,...,3454600,3260650,1329300,1036000,2022,2,4,4,13,91
1,2022-04-01,2,17252550,17306150,15896600,14.47,14.47,14.47,14.47,14.47,...,3686600,3317300,1698000,1360900,2022,2,4,4,13,91
2,2022-04-01,3,17496550,17531950,15988650,15.0,24.32,24.32,24.32,14.49,...,3867450,3318250,2071250,1662200,2022,2,4,4,13,91
3,2022-04-01,4,17994700,17695250,16003200,16.55,16.55,16.55,16.55,16.55,...,4106800,3320550,2270650,1844100,2022,2,4,4,13,91
4,2022-04-01,5,18144250,17845100,16073050,17.3,24.32,24.32,24.32,16.55,...,4297450,3321000,2246900,1773350,2022,2,4,4,13,91
5,2022-04-01,6,18607750,17605500,16139850,14.77,14.77,14.77,14.77,14.77,...,4471700,3321350,2164950,1694900,2022,2,4,4,13,91
6,2022-04-01,7,18704900,17833650,16201100,14.89,14.89,14.89,14.89,14.89,...,4637200,3323000,2104700,1650650,2022,2,4,4,13,91
7,2022-04-01,8,18884750,17974450,16244700,14.89,15.0,15.0,15.0,14.89,...,4738250,3323200,2085350,1622300,2022,2,4,4,13,91
8,2022-04-01,9,18972300,18076650,16322850,14.89,14.89,14.89,14.89,14.89,...,4740850,3325800,2142850,1653300,2022,2,4,4,13,91
9,2022-04-01,10,18883250,18244750,16271350,16.55,19.8,19.8,19.8,16.55,...,4745000,3327950,2136500,1646950,2022,2,4,4,13,91


# LightGBM

In [None]:
# 03 Training
target = 'エリアプライス東京(円/kWh)'

select_column = [
    'year', 'quarter', 'month', '時刻コード', 'day_of_week', 'week_number', 'day_of_year'
]

df_train = df_all[:-48*7]
df_val = df_train[-48*7:]
df_test = df_all[-48*7:]

X_train = df_train[select_column]
y_train = df_train[target]

X_val = df_val[select_column]
y_val = df_val[target]

X_test = df_test[select_column]
y_test = df_test[target]

params = {
    'objective': 'regression',
    'metric': 'mae',
    'num_iterations':100000,
    'learning_rate': 0.02,
    'num_leaves': 16,
    'max_depth': -1,
    'early_stopping_rounds': 1000,
    'min_data_in_leaf': 20,
    'min_sum_hessian_in_leaf': 1e-3,
    'bagging_fanction': 0.9,
    'bagging_freq': 1,
    'feature_fraction': 0.9,
    'lambda_l1': 0.0,
    'lambda_l2': 0.0,
    'random_state': SEED,
    'verbosity': -1,
}

model = lgb.LGBMRegressor(**params)

model.fit(
    X_train, y_train,
    eval_set = [(X_val, y_val)],
    eval_metric = 'mae', 
    callbacks = [
        lgb.log_evaluation(1000)
    ]
)

[1000]	valid_0's l1: 1.62837
[2000]	valid_0's l1: 1.43695
[3000]	valid_0's l1: 1.38435
[4000]	valid_0's l1: 1.27688
[5000]	valid_0's l1: 1.19356
[6000]	valid_0's l1: 1.1785
[7000]	valid_0's l1: 1.11038
[8000]	valid_0's l1: 1.09657
[9000]	valid_0's l1: 0.930185
[10000]	valid_0's l1: 0.897338
[11000]	valid_0's l1: 0.893748
[12000]	valid_0's l1: 0.879299
[13000]	valid_0's l1: 0.850031
[14000]	valid_0's l1: 0.792333
[15000]	valid_0's l1: 0.753078
[16000]	valid_0's l1: 0.742487
[17000]	valid_0's l1: 0.718358
[18000]	valid_0's l1: 0.705673
[19000]	valid_0's l1: 0.69425
[20000]	valid_0's l1: 0.674885
[21000]	valid_0's l1: 0.669473
[22000]	valid_0's l1: 0.665431
[23000]	valid_0's l1: 0.661694
[24000]	valid_0's l1: 0.648026
[25000]	valid_0's l1: 0.629884
[26000]	valid_0's l1: 0.619945
[27000]	valid_0's l1: 0.599505
[28000]	valid_0's l1: 0.591446
[29000]	valid_0's l1: 0.546439
[30000]	valid_0's l1: 0.5442
[31000]	valid_0's l1: 0.536993
[32000]	valid_0's l1: 0.53033
[33000]	valid_0's l1: 0.52457


0,1,2
,boosting_type,'gbdt'
,num_leaves,16
,max_depth,-1
,learning_rate,0.02
,n_estimators,100
,subsample_for_bin,200000
,objective,'regression'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [None]:
# 04 Test - 再学習していない

y_pred = model.predict(X_test)

mae  = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Test MAE: {mae:.4f}')
print(f'Test RMSE: {rmse:.4f}')


Test RMSE: 0.8815
Test MAE: 0.5772


In [None]:
# Train-Test with all area

target_cols = [
    'エリアプライス北海道(円/kWh)',
    'エリアプライス東北(円/kWh)',
    'エリアプライス東京(円/kWh)',
    'エリアプライス中部(円/kWh)',
    'エリアプライス北陸(円/kWh)',
    'エリアプライス関西(円/kWh)',
    'エリアプライス中国(円/kWh)',
    'エリアプライス四国(円/kWh)',
    'エリアプライス九州(円/kWh)',
]

select_column = [
    'year', 'quarter', 'month', '時刻コード', 'day_of_week', 'week_number', 'day_of_year'
]

df_train = df_all[:-48*7]
df_val = df_train[-48*7:]
df_test = df_all[-48*7:]

X_train = df_train[select_column]
y_train = df_train[target]

X_val = df_val[select_column]
y_val = df_val[target]

X_test = df_test[select_column]
y_test = df_test[target]

params = {
    'objective': 'regression',
    'metric': 'mae',
    'num_iterations':100000,
    'learning_rate': 0.02,
    'num_leaves': 16,
    'max_depth': -1,
    'early_stopping_rounds': 1000,
    'min_data_in_leaf': 20,
    'min_sum_hessian_in_leaf': 1e-3,
    'bagging_fanction': 0.9,
    'bagging_freq': 1,
    'feature_fraction': 0.9,
    'lambda_l1': 0.0,
    'lambda_l2': 0.0,
    'random_state': SEED,
    'verbosity': -1,
}

mae_results = []
rmse_results = []

# 各地域ごとにループ
for target in target_cols:
    y_train, y_val, y_test = df_train[target], df_val[target], df_test[target]

    model = lgb.LGBMRegressor(**params)

    model.fit(
        X_train, y_train,
        eval_set = [(X_val, y_val)],
        eval_metric = 'mae', 
        callbacks = [
            lgb.log_evaluation(1000)
        ]
    )

    y_pred = model.predict(X_test)
    mae  = mean_absolute_error(y_test, y_pred)
    mse  = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    mae_results.append({"Region": target, "MAE": mae})
    rmse_results.append({"Region": target, "RMSE": rmse})

# 表形式にまとめる
df_results = pd.DataFrame(results)
print(df_results)


