## Goals: Explore explainability and feature importance with *EBM* model

# 1. Data Import and Setup

Imports necessary libraries, sets up environment paths.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from interpret import show
from interpret.glassbox import ExplainableBoostingRegressor

Defines constants :
* INPUT_DIR must be the same as the one defined in *00 Preprocessing/Feature Engineering*.

In [None]:
INPUT_DIR = "../../../../data/input/"

TO_DROP = ["water_flow_week1", "station_code", "water_flow_week2", "water_flow_week3", "water_flow_week4"]

### 2. Load Your Pre-Processed Data

Same as in *02 - Training notebook* but only for the first week.

In [None]:
ds_train = pd.read_csv(f"{INPUT_DIR}ds_train.csv")
ds_test_spatio_temporal = pd.read_csv(f"{INPUT_DIR}ds_test_spatio_temporal.csv")
ds_test_temporal = pd.read_csv(f"{INPUT_DIR}ds_test_temporal.csv")

ds_train["ObsDate"] = pd.to_datetime(ds_train["ObsDate"])
ds_test_spatio_temporal["ObsDate"] = pd.to_datetime(ds_test_spatio_temporal["ObsDate"])
ds_test_temporal["ObsDate"] = pd.to_datetime(ds_test_temporal["ObsDate"])

ds_train = ds_train.set_index("ObsDate")
ds_test_spatio_temporal = ds_test_spatio_temporal.set_index("ObsDate")
ds_test_temporal = ds_test_temporal.set_index("ObsDate")


Creation of predictors and target dataset for week 1.

In [None]:
X_train = ds_train.drop(columns=TO_DROP)
y_train = ds_train["water_flow_week1"]


X_test_spatio_temporal = ds_test_spatio_temporal.drop(columns=TO_DROP)
y_test_spatio_temporal = ds_test_spatio_temporal["water_flow_week1"]

X_test_temporal = ds_test_temporal.drop(columns=TO_DROP)
y_test_temporal = ds_test_temporal[f"water_flow_week1"]

### 3. Train an Explainable Boosting Model
EBM is an interpretable model that provides insights into feature importance and individual predictions.

In [None]:
ebm = ExplainableBoostingRegressor()
ebm.fit(X_train, y_train)


### 4. Visualize Feature Importance
Once the model is trained, you can explore how different features contribute to the predictions.

In [None]:
show(ebm.explain_global())  # Shows feature importance and interactions


### 6. Explore Individual Predictions
If you want to explain a specific data point.

#### 6.a On the Spatio-temporal Split


In [None]:
instance = X_test_spatio_temporal.iloc[0:10].values  # Take the first row from evaluation set
show(ebm.explain_local(instance, y_test_spatio_temporal.iloc[0:10]))


#### 6.b On the Temporal Split


In [None]:
instance = X_test_temporal.iloc[0:10].values  # Take the first row from evaluation set
show(ebm.explain_local(instance, y_test_temporal.iloc[0:10]))