In [1]:
# =====================================================
# Anchored KNN + Percentile
# Walk-Forward Day-Ahead Forecast
# Electricity Market - Declared Power
# =====================================================

import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

# =========================
# تنظیمات
# =========================
INPUT_FILE = "merged_output2.csv"
OUTPUT_FILE = "declared_knn_anchored.xlsx"

TARGET = "POWER"
DATE_COL = "DATE_MILADI"
HOUR_COL = "HOUR"
EBRAZ_COL = "ebraz"

K = 40                  # تعداد همسایه‌ها
PERCENTILE = 30         # محافظه‌کار ولی واقعی
LAMBDA = 0.6            # وزن الگو (0.5 تا 0.7 عالی)
MIN_RATIO = 0.75        # کف نهایی نسبت به lag_24

# =========================
# خواندن داده
# =========================
df = pd.read_csv(INPUT_FILE)
df[DATE_COL] = pd.to_datetime(df[DATE_COL])

df = df.sort_values([DATE_COL, HOUR_COL]).reset_index(drop=True)

# =========================
# ویژگی‌های زمانی
# =========================
df["hour"] = df[HOUR_COL]
df["dayofweek"] = df[DATE_COL].dt.dayofweek
df["month"] = df[DATE_COL].dt.month

# =========================
# Lag Features (فقط گذشته)
# =========================
df["lag_24"] = df[TARGET].shift(24)
df["lag_48"] = df[TARGET].shift(48)
df["lag_72"] = df[TARGET].shift(72)

df = df.dropna().reset_index(drop=True)

FEATURES = [
    "hour",
    "dayofweek",
    "month",
    "DAMA",
    "ROTOOBAT",
    "lag_24",
    "lag_48",
    "lag_72"
]

# =========================
# آماده‌سازی
# =========================
df["DECLARED"] = np.nan
unique_days = df[DATE_COL].dt.date.unique()

scaler = StandardScaler()

# =========================
# Walk-Forward Day-Ahead
# =========================
for i in range(3, len(unique_days) - 1):

    train_days = unique_days[:i]
    predict_day = unique_days[i]

    train_idx = df[DATE_COL].dt.date.isin(train_days)
    test_idx = df[DATE_COL].dt.date == predict_day

    X_train = df.loc[train_idx, FEATURES]
    y_train = df.loc[train_idx, TARGET]

    X_test = df.loc[test_idx, FEATURES]

    # ---------- Scaling ----------
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # ---------- KNN ----------
    knn = NearestNeighbors(
        n_neighbors=K,
        metric="euclidean"
    )
    knn.fit(X_train_s)

    distances, indices = knn.kneighbors(X_test_s)

    ratios = []

    for idx_list in indices:
        neigh_vals = y_train.iloc[idx_list].values

        p_q = np.percentile(neigh_vals, PERCENTILE)
        mean_n = np.mean(neigh_vals)

        ratio = p_q / mean_n if mean_n > 0 else 1.0
        ratios.append(ratio)

    ratios = np.array(ratios)

    # ---------- Anchor به lag_24 ----------
    lag24 = df.loc[test_idx, "lag_24"].values

    declared = lag24 * ((1 - LAMBDA) + LAMBDA * ratios)

    # ---------- کف نهایی ----------
    declared = np.maximum(declared, lag24 * MIN_RATIO)

    df.loc[test_idx, "DECLARED"] = declared

# =========================
# منطق بازار برق
# =========================
df.loc[df[EBRAZ_COL] == 0, "DECLARED"] = 0
df["DECLARED"] = df["DECLARED"].clip(lower=0)

# =========================
# ارزیابی
# =========================
err = df["DECLARED"] - df[TARGET]

mae_pos = np.mean(np.abs(err[err > 0]))   # بیش‌ابرازی
mae_neg = np.mean(np.abs(err[err < 0]))   # کم‌ابرازی

print("===================================")
print("Anchored KNN + Percentile")
print("===================================")
print("MAE Positive (Over):", round(mae_pos, 2))
print("MAE Negative (Under):", round(mae_neg, 2))
print("Market Score:", round(5 * mae_pos + mae_neg, 2))

# =========================
# ذخیره خروجی
# =========================
df.to_excel(OUTPUT_FILE, index=False)
print("Saved:", OUTPUT_FILE)


Anchored KNN + Percentile
MAE Positive (Over): 11.65
MAE Negative (Under): 12.59
Market Score: 70.83
Saved: declared_knn_anchored.xlsx
