In [None]:
# 改良版：月内の偏りの排除、0.01コマの連続性、決定的


import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import jpholiday

# --- 読み込み ---
df1 = pd.read_csv('price_2023.csv', parse_dates=['datetime'])
df2 = pd.read_csv('price_2024.csv', parse_dates=['datetime'])
df_all = pd.concat([df1, df2], ignore_index=True)

# --- 日付情報を追加 ---
df_all['month'] = df_all['datetime'].dt.month
df_all['time'] = df_all['datetime'].dt.time
df_all['date'] = df_all['datetime'].dt.date
df_all['weekday'] = df_all['datetime'].dt.weekday
df_all['is_holiday'] = df_all['date'].apply(jpholiday.is_holiday)
df_all['day_type'] = df_all.apply(
    lambda row: 'holiday' if row['weekday'] >= 5 or row['is_holiday'] else 'weekday', axis=1
)

# --- 0.01円判定 ---
df_all['is_001'] = df_all['price'] == 0.01

# --- 月×曜日タイプ×時間ごとの0.01円出現頻度 ---
grouped = (
    df_all[df_all['is_001']]
    .groupby(['month', 'day_type', 'time'])
    .size()
    .reset_index(name='count')
    .sort_values(['month', 'count', 'time'], ascending=[True, False, True])
)

# --- 月ごとの平均0.01円回数 ---
monthly_001 = df_all.groupby('month')['is_001'].sum().reset_index(name='count_001')
monthly_001['avg_count'] = (monthly_001['count_001'] / 2).astype(int)

# --- 月ごとの時間帯別平均価格を計算 ---
monthly_avg = df_all.groupby(['month', 'time'])['price'].mean().reset_index()

# --- 標準モデルのベースを作成 ---
standard_dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')
standard_rows = []
for date in standard_dates:
    month = date.month
    weekday = date.weekday()
    is_holiday = jpholiday.is_holiday(date)
    day_type = 'holiday' if weekday >= 5 or is_holiday else 'weekday'
    for _, row in monthly_avg[monthly_avg['month'] == month].iterrows():
        standard_rows.append({
            'datetime': datetime.combine(date, row['time']),
            'month': month,
            'time': row['time'],
            'day_type': day_type,
            'price': row['price']
        })
df_standard = pd.DataFrame(standard_rows)

# --- 月ごとのブロック分割と連続0.01円割り当て ---
block_size = 3  # 0.01円を連続3コマにする
for month in range(1, 13):
    avg_count = monthly_001.loc[monthly_001['month'] == month, 'avg_count'].values[0]
    num_blocks = avg_count // block_size

    # 対象月の日付一覧を取得してブロック分割
    month_dates = sorted(df_standard[df_standard['month'] == month]['datetime'].dt.date.unique())
    blocks = np.array_split(month_dates, num_blocks)

    top_times = grouped[grouped['month'] == month]
    selected_indices = []

    for i, block in enumerate(blocks):
        block_days = set(block)
        block_df = df_standard[
            (df_standard['month'] == month) &
            (df_standard['datetime'].dt.date.isin(block_days))
        ].copy()

        for _, row in top_times.iterrows():
            sub = block_df[
                (block_df['day_type'] == row['day_type']) &
                (block_df['time'] == row['time']) &
                (~block_df.index.isin(selected_indices))
            ]
            if not sub.empty:
                center_idx = sub.index[0]
                all_times = df_standard[df_standard['month'] == month]
                center_loc = all_times.index.get_loc(center_idx)
                for offset in range(-(block_size // 2), block_size // 2 + 1):
                    idx = center_loc + offset
                    if 0 <= idx < len(all_times):
                        selected_indices.append(all_times.index[idx])
                break  # 次のブロックへ

    # 0.01円割当
    df_standard.loc[selected_indices, 'price'] = 0.01

# --- 出力 ---
df_standard.sort_values('datetime', inplace=True)
df_standard.reset_index(drop=True, inplace=True)
df_standard[['datetime', 'price']].to_csv('jepx_standard_model_seasonal_blocked.csv', index=False)
print("標準モデルを出力しました（0.01円コマ分散＆連続性あり）")