In [2]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor

# ======================================================
# 1. خواندن داده
# ======================================================
input_file = "merged_output2.csv"
df = pd.read_csv(input_file)

df['DATE_MILADI'] = pd.to_datetime(df['DATE_MILADI'])
df = df.sort_values('DATE_MILADI').reset_index(drop=True)

# ======================================================
# 2. Lag Features
# ======================================================
lags = [1, 24, 48, 72, 168]
for lag in lags:
    df[f'POWER_LAG_{lag}'] = df['POWER'].shift(lag)

# ======================================================
# 3. Rolling Features
# ======================================================
df['POWER_ROLL_MEAN_24'] = df['POWER'].rolling(24).mean()
df['POWER_ROLL_MEAN_72'] = df['POWER'].rolling(72).mean()

df['POWER_ROLL_STD_24'] = df['POWER'].rolling(24).std()
df['POWER_ROLL_STD_48'] = df['POWER'].rolling(48).std()

df['POWER_ROLL_MIN_72'] = df['POWER'].rolling(72).min()
df['POWER_ROLL_MAX_72'] = df['POWER'].rolling(72).max()

# ======================================================
# 4. Featureهای تقویمی
# ======================================================
df['HOUR'] = df['DATE_MILADI'].dt.hour
df['DAY_OF_WEEK'] = df['DATE_MILADI'].dt.weekday
df['MONTH'] = df['DATE_MILADI'].dt.month
df['IS_HOLIDAY'] = df['DAY_OF_WEEK'].isin([4, 5]).astype(int)

# ======================================================
# 5. Target (24 ساعت جلوتر → فردا)
# ======================================================
HORIZON = 24
df['TARGET_POWER_24H'] = df['POWER'].shift(-HORIZON)

# ======================================================
# 6. حذف ردیف‌های ناقص (برای مدل)
# ======================================================
df_model = df.dropna().reset_index(drop=True)

# ======================================================
# 7. Featureها و Target
# ======================================================
feature_cols = [
    'POWER_LAG_1', 'POWER_LAG_24', 'POWER_LAG_48',
    'POWER_LAG_72', 'POWER_LAG_168',
    'POWER_ROLL_MEAN_24', 'POWER_ROLL_MEAN_72',
    'POWER_ROLL_STD_24', 'POWER_ROLL_STD_48',
    'POWER_ROLL_MIN_72', 'POWER_ROLL_MAX_72',
    'HOUR', 'DAY_OF_WEEK', 'MONTH', 'IS_HOLIDAY'
]

X = df_model[feature_cols]
y = df_model['TARGET_POWER_24H']

# ======================================================
# 8. Train / Test Mask
# ======================================================
train_mask = (
    (df_model['DATE_MILADI'].dt.year >= 2021) &
    (df_model['DATE_MILADI'].dt.year <= 2023)
)

test_mask = (df_model['DATE_MILADI'].dt.year >= 2024)

# ======================================================
# 9. آموزش مدل
# ======================================================
model = XGBRegressor(
    n_estimators=600,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='reg:squarederror',
    random_state=42
)

model.fit(X[train_mask], y[train_mask])

# ======================================================
# 10. پیش‌بینی (فقط test)
# ======================================================
df_model.loc[test_mask, 'DECLARED'] = model.predict(X[test_mask])

# ======================================================
# 11. قانون بازار برق
# ebraz == 0 → DECLARED = 0
# ======================================================
df_model.loc[
    (test_mask) & (df_model['ebraz'] == 0),
    'DECLARED'
] = 0

# ======================================================
# 12. تاریخ واقعی پیش‌بینی‌شده (یک روز جلوتر)
# ======================================================
df_model['FORECAST_DATE'] = df_model['DATE_MILADI'] + pd.Timedelta(hours=24)

# ======================================================
# 13. خروجی نهایی
# فقط رکوردهای پیش‌بینی‌شده
# ولی با تمام ستون‌ها
# ======================================================
final_output = df_model.loc[test_mask].copy()

# فقط ساعت‌های 0 تا 23 روز مقصد
final_output = final_output[
    final_output['FORECAST_DATE'].dt.hour.between(0, 23)
]

# ======================================================
# 14. ذخیره خروجی
# ======================================================
output_file = "xgboost_forecast_72h_declared_power.csv"  # نام فایل بدون تغییر
final_output.to_csv(output_file, index=False)

print("✅ فایل خروجی نهایی ساخته شد (پیش‌بینی فردا + همه ستون‌ها):")
print(output_file)


✅ فایل خروجی نهایی ساخته شد (پیش‌بینی فردا + همه ستون‌ها):
xgboost_forecast_72h_declared_power.csv
