In [1]:
# Set the working directory to the parent directory
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant packages
from src.eda import data_info
from src.models import AutoencoderTrainer, AnomalyDetector, AutoencoderTuner

# Import necessary libraries
import shap
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle

In [None]:
def explain_with_kernelshap(detector, X_test, background_size=100):
    """
    Applies SHAP KernelExplainer to anomaly scores from a detector.

    Parameters:
    - detector: AnomalyDetector object with _compute_anomaly_scores method
    - X_test: pd.DataFrame, test data
    - background_size: int, number of samples to use as background

    Returns:
    - shap_values: list of arrays, SHAP values for each test instance
    - explainer: the trained SHAP explainer object
    """
    # Use a subset of the test set as background (mean + diverse set is typical)
    background = X_test.sample(n=min(background_size, len(X_test)), random_state=42)

    # Define a wrapper function for SHAP to call
    def anomaly_scorer(X_input):
        X_df = pd.DataFrame(X_input, columns=X_test.columns)
        return detector._compute_anomaly_scores(X_df)

    # Instantiate the KernelExplainer
    explainer = shap.KernelExplainer(anomaly_scorer, background)

    # Compute SHAP values for all test instances
    shap_values = explainer.shap_values(X_test, nsamples='auto')

    return shap_values, explainer

In [3]:
# Read relevant files
X_test = pd.read_feather("../data/processed/X_test.feather")
y_test = pd.read_feather("../data/processed/y_test.feather")

# --- Extract variable types from metadata ---
var_info = data_info(X_test)
all_cols = X_test.columns
real_cols = var_info[var_info["var_type"] == "numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"] == "binary"]["var_name"].tolist()

version = "202504180259"

model = tf.keras.models.load_model(f"../models/baseline/{version}")
with open(f"../hyperparams/baseline/{version}.pkl", "rb") as f:
    params = pickle.load(f)

# After training
detector = AnomalyDetector(
    model=model,
    real_cols=real_cols,
    binary_cols=binary_cols,
    all_cols=all_cols,
    lam=params['lam'],
    gamma=params['gamma'],
)



In [None]:
explain_with_kernelshap(detector, X_test)

  0%|          | 26/11457 [00:12<1:34:44,  2.01it/s]