### This is Script of testing of Model

In [None]:
import os, pandas as pd, numpy as np, joblib, shap, matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

# 1) Load new featured + clustered data
df = pd.read_csv('CSV_files/final_clustered_Test_set.csv')  # must match training schema [feature names/order] [1]

# 2) Exact feature list used during training
exclude = ['deviceID','tripID','cluster','label',
           'anomaly_score_ebm','alert_ebm',
           'anomaly_score_iforest','alert_iforest',
           'y_pred','residual']
feature_cols = [c for c in df.columns if c not in exclude]  # keep order consistent with training [1]

# 3) Ensure predictions exist
if 'y_pred' not in df.columns:
    df['y_pred'] = np.nan
    for c in sorted(df['cluster'].unique()):
        model = joblib.load(f'models/ebm_regressor_cluster_{int(c)}.joblib')  # load saved EBM per cluster [3]
        m = df['cluster'] == c
        Xc = df.loc[m, feature_cols]
        df.loc[m, 'y_pred'] = model.predict(Xc)  # produce expected values for residuals/explanations [3]

os.makedirs('explanations_new/per_row', exist_ok=True)
os.makedirs('explanations_new/summary', exist_ok=True)

# 4) Explain per cluster
for c in sorted(df['cluster'].unique()):
    m_cluster = df['cluster'] == c
    if m_cluster.sum() == 0:
        continue

    # Load cluster model
    model = joblib.load(f'models/ebm_regressor_cluster_{int(c)}.joblib')  # ensure correct segment model [3]

    # Background sample for SHAP (stability/speed)
    X_bg = df.loc[m_cluster, feature_cols].sample(
        n=min(200, m_cluster.sum()), random_state=42
    ).copy()
    X_bg = X_bg[feature_cols]  # enforce column order [1]

    # Build explainer with a callable to avoid "model not callable" errors
    predict_fn = lambda X: model.predict(pd.DataFrame(X, columns=feature_cols))  # wrap to preserve names [1]
    explainer = shap.Explainer(predict_fn, X_bg)  # generic wrapper that works across model types [1]

    # 4a) Global summary on new data
    Xc = df.loc[m_cluster, feature_cols]
    sv = explainer(Xc)  # SHAP values for cluster slice [1]
    shap.summary_plot(sv, Xc, show=False)
    plt.savefig(f'explanations_new/summary/shap_summary_cluster_{int(c)}.png', dpi=160, bbox_inches='tight')  # save PNG for slides [1]
    plt.close()

    # 4b) Per-row force plots: alerts first, else top residuals
    MAX_ROWS = 20  # cap per cluster
    RANDOM_SEED = 42  # keep results reproducible

    sub = df[m_cluster].copy()  
# Prioritize alerts
    if 'alert_ebm' in sub.columns:
        sub = sub[sub['alert_ebm'] == True]

# If no alerts, fallback to residuals
    if sub.empty:
        if 'residual' not in df.columns and 'kpl_mean' in df.columns and 'y_pred' in df.columns:
            df.loc[m_cluster, 'residual'] = (df.loc[m_cluster, 'kpl_mean'] - df.loc[m_cluster, 'y_pred']).abs()
        sub = df.loc[m_cluster].sort_values('residual', ascending=False)

# --- NEW: Random sampling instead of always top-N ---
    if len(sub) > MAX_ROWS:
        sub = sub.sample(n=MAX_ROWS, random_state=RANDOM_SEED)


    for idx, r in sub.iterrows():
        # Build a one-row DataFrame with exact schema
        x_row = r[feature_cols].to_frame().T
        x_row = x_row[feature_cols]  # enforce order [1]

        sv_row = explainer(x_row)  # explain that single row [1]
        base = float(sv_row.base_values[0])  # scalar baseline [1]
        vals = sv_row.values[0]              # 1D SHAP vector [1]
        feat = x_row.iloc[0]                 # the row as a Series (NOT x_row.iloc) [2]

        # Interactive HTML (requires JS)
        html = shap.force_plot(base, vals, feat, matplotlib=False)  # single-row force plot [1]
        name = f"force_cluster{int(c)}_dev{r.get('deviceID','NA')}_trip{r.get('tripID','NA')}"
        with open(f'explanations_new/per_row/{name}.html', 'w', encoding='utf-8') as f:
            f.write(shap.getjs() + html.html())  # embed JS to view locally [1]

        # Static PNG (works everywhere)
        plt.figure()
        shap.force_plot(base, vals, feat, matplotlib=True, show=False)  # static image for slides/email [1]
        plt.savefig(f'explanations_new/per_row/{name}.png', dpi=200, bbox_inches='tight')
        plt.close()


PermutationExplainer explainer: 540it [00:54,  8.12it/s]                         
  shap.summary_plot(sv, Xc, show=False)
  fig, ax = plt.subplots(figsize=figsize)
PermutationExplainer explainer: 4067it [02:43, 23.27it/s]                          
  shap.summary_plot(sv, Xc, show=False)
  fig, ax = plt.subplots(figsize=figsize)
  plt.figure()
