In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tqdm import tqdm

# =========================
# تنظیمات
# =========================
INPUT_FILE = "merged_output2.csv"
OUTPUT_FILE = "arima_rolling_3days_2024.csv"

DATE_COL = "DATE_MILADI"
TARGET_COL = "POWER"
EBRAZ_COL = "ebraz"

ARIMA_ORDER = (2, 0, 2)

# =========================
# خواندن داده
# =========================
df = pd.read_csv(INPUT_FILE)
df[DATE_COL] = pd.to_datetime(df[DATE_COL])
df = df.sort_values(DATE_COL)

# سری زمانی ساعتی
ts = df.set_index(DATE_COL)[TARGET_COL].asfreq("H")

# خروجی نهایی
all_results = []

# =========================
# لیست روزهای سال 2024
# =========================
days_2024 = pd.date_range(
    start="2024-01-01",
    end="2024-12-31",
    freq="D"
)

# =========================
# rolling forecast
# =========================
for day in tqdm(days_2024, desc="Rolling ARIMA 3-days ahead"):

    forecast_day = day
    train_end = forecast_day - pd.Timedelta(days=3)

    train_ts = ts[:train_end - pd.Timedelta(hours=1)]
    if len(train_ts.dropna()) < 500:
        continue

    # آموزش مدل
    model = SARIMAX(
        train_ts,
        order=ARIMA_ORDER,
        trend="c",
        enforce_stationarity=False,
        enforce_invertibility=False
    )

    model_fit = model.fit(disp=False)

    # پیش‌بینی فقط همان روز (24 ساعت)
    forecast = model_fit.predict(
        start=forecast_day,
        end=forecast_day + pd.Timedelta(hours=23)
    )

    forecast = np.maximum(forecast, 0)

    # ساخت دیتافریم خروجی
    day_df = df[
        (df[DATE_COL] >= forecast_day) &
        (df[DATE_COL] <= forecast_day + pd.Timedelta(hours=23))
    ].copy()

    if day_df.empty:
        continue

    day_df["DECLARED"] = forecast.values

    # ستون‌های زمانی
    day_df["year"] = day_df[DATE_COL].dt.year
    day_df["month"] = day_df[DATE_COL].dt.month
    day_df["dayofweek"] = day_df[DATE_COL].dt.dayofweek

    # قانون بازار برق
    day_df.loc[day_df[EBRAZ_COL] == 0, "DECLARED"] = 0

    all_results.append(day_df)

# =========================
# خروجی نهایی
# =========================
final_df = pd.concat(all_results)

final_columns = [
    "HOUR", "DATE_MILADI", "DATE_SHAMSI", "POWER", "CODE", "UNIT_NO",
    "DAMA", "ROTOOBAT", "12209_G13", "12210_G13",
    "ebraz", "importance_factor",
    "year", "month", "dayofweek", "DECLARED"
]

final_df[final_columns].to_csv(OUTPUT_FILE, index=False)

print("✅ پیش‌بینی ۳ روز جلوتر برای کل 2024 ساخته شد:", OUTPUT_FILE)


  ts = df.set_index(DATE_COL)[TARGET_COL].asfreq("H")
Rolling ARIMA 3-days ahead:   2%|▏         | 7/366 [24:53<23:35:25, 236.56s/it]