In [None]:
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR

# =========================
# 1) Load and prepare data
# =========================
input_csv = "merged_output2.csv"
df = pd.read_csv(input_csv)

# Ensure DATE_MILADI exists and is datetime
if 'DATE_MILADI' not in df.columns:
    raise ValueError("DATE_MILADI column not found in merged_output2.csv")

df['DATE_MILADI'] = pd.to_datetime(df['DATE_MILADI'], errors='coerce')
df = df[df['DATE_MILADI'].notna()].copy()

# =========================
# 2) Train/test split rules
# =========================
train_mask = (df['DATE_MILADI'].dt.year >= 2021) & (df['DATE_MILADI'].dt.year <= 2023)
test_mask = (df['DATE_MILADI'].dt.year >= 2024)

train_df = df.loc[train_mask].copy()
test_df = df.loc[test_mask].copy()

if train_df.empty or test_df.empty:
    raise ValueError("Train or test subset is empty. Check date ranges and input data.")

# =========================
# 3) Remove POWER==0 and POWER<90 from training
# =========================
target_col = 'POWER'
if target_col not in df.columns:
    raise ValueError("POWER column not found in merged_output2.csv")

train_df = train_df[train_df[target_col] >= 90].copy()
if train_df.empty:
    raise ValueError("Training data became empty after removing POWER<90. Check data quality.")

# =========================
# 4) Features & target (only selected ones)
# =========================
feature_cols = ['DAMA', 'ROTOOBAT', '12209_G13', '12210_G13', 'HOUR']

# Ensure all features exist
for col in feature_cols:
    if col not in df.columns:
        raise ValueError(f"Feature column {col} not found in merged_output2.csv")

X_train = train_df[feature_cols].copy()
y_train = train_df[target_col].copy()
X_test = test_df[feature_cols].copy()

# =========================
# 5) Build preprocessing + SVM model
# =========================
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())   # نرمالایز داده‌های عددی
])

preprocess = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, feature_cols)
    ],
    remainder='drop'
)

# SVM Regressor
model = SVR(
    kernel='rbf',       # کرنل RBF برای روابط غیرخطی
    C=100,              # پارامتر جریمه
    epsilon=0.1         # حساسیت به خطا
)

pipeline = Pipeline(steps=[
    ('preprocess', preprocess),
    ('model', model)
])

# =========================
# 6) Fit and predict
# =========================
pipeline.fit(X_train, y_train)
declared_pred = pipeline.predict(X_test)

# =========================
# 7) Build output Excel for 2024+
#    If ebraz==0, force DECLARED=0
# =========================
output_df = test_df.copy()
output_df['DECLARED'] = declared_pred

# Ensure ebraz column exists (case-insensitive fallback)
ebraz_col = None
for c in output_df.columns:
    if c.lower() == 'ebraz':
        ebraz_col = c
        break

if ebraz_col is None:
    raise ValueError("ebraz column not found for zero-forcing DECLARED.")

output_df.loc[output_df[ebraz_col] == 0, 'DECLARED'] = 0

# Keep input columns order + DECLARED appended
ordered_cols = list(df.columns) + ['DECLARED']
output_df = output_df.reindex(columns=ordered_cols)

output_excel = "merged_output2_svm_selected_2024.xlsx"
output_df.to_excel(output_excel, index=False)

print(f"✅ Done. Saved SVM predictions to {output_excel}")
