In [2]:
# pip install shap

In [3]:
import pandas as pd
import numpy as np
import shap
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
from datetime import datetime

# ================================
# مسیرها
# ================================
SHAP_OUTPUT_FILE = r"C:\BI\shap_feature_importance_windows_8341_g11.xlsx"
LOG_FILE = r"C:\BI\regression_assetid_8341_log.txt"
INPUT_FILE = r"C:\BI\lube_oil_system_data_g11.xlsx"

os.makedirs(r"C:\BI", exist_ok=True)

def log(msg):
    print(msg)
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}\n")

log("شروع اجرای مدل و محاسبه اهمیت فیچرها (فقط خروجی SHAP)")

# ================================
# خواندن داده
# ================================
df = pd.read_excel(INPUT_FILE)
log(f"داده خوانده شد → {len(df):,} ردیف")

# ================================
# ساخت datetime
# ================================
df["datetime"] = pd.to_datetime(df["RecordDate"] + " " + df["RecordTime"], errors="coerce")
df = df.sort_values("datetime").reset_index(drop=True)

# ================================
# آماده‌سازی داده‌ها
# ================================
TARGET_COL = "AssetID_8341"

feature_cols = [c for c in df.columns if c not in ['id', 'RecordDate', 'RecordTime', 'datetime']]
X = df[feature_cols].drop(columns=[TARGET_COL])
y = df[TARGET_COL]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, shuffle=False
)

# ================================
# مدل
# ================================
model = XGBRegressor(
    n_estimators=800,
    learning_rate=0.03,
    max_depth=7,
    subsample=0.9,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)
log("مدل آموزش دید")

# ================================
# محاسبه SHAP
# ================================
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_scaled_df)

shap_df = pd.DataFrame(shap_values, columns=X.columns)
shap_df["datetime"] = df["datetime"]

# ================================
# تابع محاسبه اهمیت فیچر
# ================================
def compute_importance(window_df):
    if len(window_df) == 0:
        return []
    imp = window_df.abs().mean().sort_values(ascending=False)
    return list(imp.index)

# ================================
# تعیین پنجره‌ها
# ================================
end_date = df["datetime"].max()

windows = {
    "10_days": (end_date - pd.Timedelta(days=10), end_date),
    "10_to_20_days": (end_date - pd.Timedelta(days=20), end_date - pd.Timedelta(days=10)),
    "20_to_30_days": (end_date - pd.Timedelta(days=30), end_date - pd.Timedelta(days=20)),
    "1_month": (end_date - pd.DateOffset(months=1), end_date),
    "2_month": (end_date - pd.DateOffset(months=2), end_date - pd.DateOffset(months=1)),
    "3_month": (end_date - pd.DateOffset(months=3), end_date - pd.DateOffset(months=2)),
}

# ================================
# محاسبه اهمیت در هر پنجره
# ================================
importance_table = {}

for key, (start, end) in windows.items():
    win = shap_df[(shap_df["datetime"] >= start) & (shap_df["datetime"] <= end)]
    win = win.drop(columns=["datetime"])
    importance_table[key] = compute_importance(win)

max_len = max(len(v) for v in importance_table.values())

# ================================
# ساخت جدول خروجی 6 ستونه
# ================================
output_table = pd.DataFrame({
    "Top_Features_10_Days": importance_table["10_days"] + [""] * (max_len - len(importance_table["10_days"])),
    "Top_Features_10_to_20_Days": importance_table["10_to_20_days"] + [""] * (max_len - len(importance_table["10_to_20_days"])),
    "Top_Features_20_to_30_Days": importance_table["20_to_30_days"] + [""] * (max_len - len(importance_table["20_to_30_days"])),
    "Top_Features_1_Month": importance_table["1_month"] + [""] * (max_len - len(importance_table["1_month"])),
    "Top_Features_2_Month": importance_table["2_month"] + [""] * (max_len - len(importance_table["2_month"])),
    "Top_Features_3_Month": importance_table["3_month"] + [""] * (max_len - len(importance_table["3_month"])),
})

# ================================
# ذخیره خروجی
# ================================
output_table.to_excel(SHAP_OUTPUT_FILE, index=False)

log("فایل SHAP با 6 ستون ساخته شد")
print("\n" + "="*70)
print("SHAP 6-column feature importance file created!")
print("Saved to:", SHAP_OUTPUT_FILE)
print("="*70)


شروع اجرای مدل و محاسبه اهمیت فیچرها (فقط خروجی SHAP)
داده خوانده شد → 10,927 ردیف
مدل آموزش دید
فایل SHAP با 6 ستون ساخته شد

SHAP 6-column feature importance file created!
Saved to: C:\BI\shap_feature_importance_windows_8341_g11.xlsx
