In [None]:
# -----------------------------------------------------------
# IMPORTS
# -----------------------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor
from statsmodels.tsa.arima.model import ARIMA
import shap
import joblib
import warnings
warnings.filterwarnings("ignore")

# -----------------------------------------------------------
# LOAD DATA
# -----------------------------------------------------------
CSV_PATH = r"C:/Users/hp/Desktop/Shalini - Project/Dataset.csv"
df = pd.read_csv(CSV_PATH)

df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values("timestamp")

# -----------------------------------------------------------
# FEATURE ENGINEERING
# -----------------------------------------------------------
df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek
df["day"] = df["timestamp"].dt.day
df["month"] = df["timestamp"].dt.month

LAGS = [1, 24, 168] 
for lag in LAGS:
    df[f"lag_{lag}"] = df["energy_consumption"].shift(lag)

df["roll_mean_24"] = df["energy_consumption"].rolling(24).mean()
df["roll_mean_168"] = df["energy_consumption"].rolling(168).mean()

df = df.dropna()

# -----------------------------------------------------------
# TRAIN / TEST SPLIT
# -----------------------------------------------------------
train = df.iloc[:-500]
test = df.iloc[-500:]

feature_cols = [
    "temperature", "humidity", "solar_radiation", "wind_speed",
    "hour", "dayofweek", "day", "month"
] + [f"lag_{lag}" for lag in LAGS] + ["roll_mean_24", "roll_mean_168"]

X_train = train[feature_cols]
y_train = train["energy_consumption"]

X_test = test[feature_cols]
y_test = test["energy_consumption"]

# -----------------------------------------------------------
# SCALE FEATURES
# -----------------------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -----------------------------------------------------------
# BASELINE MODEL — ARIMA
# -----------------------------------------------------------
arima_model = ARIMA(train["energy_consumption"], order=(5,1,3))
arima_fit = arima_model.fit()
arima_forecast = arima_fit.forecast(steps=len(test))

arima_mae = mean_absolute_error(y_test, arima_forecast)
arima_rmse = np.sqrt(mean_squared_error(y_test, arima_forecast))

# -----------------------------------------------------------
# ADVANCED MODEL — GRADIENT BOOSTING (GBM)
# -----------------------------------------------------------
gbm = GradientBoostingRegressor(
    n_estimators=400,
    learning_rate=0.03,
    max_depth=5,
    subsample=0.8
)

gbm.fit(X_train_scaled, y_train)
gbm_pred = gbm.predict(X_test_scaled)

gbm_mae = mean_absolute_error(y_test, gbm_pred)
gbm_rmse = np.sqrt(mean_squared_error(y_test, gbm_pred))

# -----------------------------------------------------------
# SELECT BEST MODEL
# -----------------------------------------------------------
print("\nBaseline ARIMA")
print("MAE:", arima_mae)
print("RMSE:", arima_rmse)

print("\nAdvanced GBM")
print("MAE:", gbm_mae)
print("RMSE:", gbm_rmse)

best_model = gbm if gbm_rmse < arima_rmse else arima_fit

# -----------------------------------------------------------
# SAVE MODELS
# -----------------------------------------------------------
joblib.dump(gbm, "gbm_model.joblib")
joblib.dump(scaler, "feature_scaler.joblib")

# -----------------------------------------------------------
# SHAP EXPLAINABILITY (FOR BEST MODEL = GBM)
# -----------------------------------------------------------
print("\nApplying SHAP explainability...")

explainer = shap.TreeExplainer(gbm)
shap_values = explainer.shap_values(X_test_scaled)

# GLOBAL SUMMARY PLOT
shap.summary_plot(shap_values, X_test, show=True)

# LOCAL EXPLANATION FOR LAST TEST POINT
shap.force_plot(
    explainer.expected_value,
    shap_values[-1],
    X_test.iloc[-1],
    matplotlib=True
)

print("\nSHAP explanation complete.")

# -----------------------------------------------------------
# FINAL FORECASTS
# -----------------------------------------------------------
test["ARIMA_Forecast"] = arima_forecast
test["GBM_Forecast"] = gbm_pred

test.to_csv("final_timeseries_forecasts_with_SHAP.csv", index=False)
print("\nForecast CSV saved: final_timeseries_forecasts_with_SHAP.csv")
