# Model Explainability with SHAP

This notebook provides interpretability analysis using SHAP for the trained models.

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

# For SHAP (if available)
try:
    import shap
    SHAP_AVAILABLE = True
except ImportError:
    SHAP_AVAILABLE = False
    print("SHAP not available")

# Set style for plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

SHAP not available


In [2]:
# Load models and data
rf_fraud_model = joblib.load('../models/fraud_rf_model.pkl')
rf_credit_model = joblib.load('../models/credit_rf_model.pkl')

# Load test data (original, not SMOTE)
fraud_df = pd.read_csv('../data/processed/fraud_data_processed.csv')
credit_df = pd.read_csv('../data/processed/creditcard_processed.csv')

# Load preprocessors
fraud_preprocessor = joblib.load('../models/fraud_preprocessor.pkl')
credit_scaler = joblib.load('../models/credit_scaler.pkl')
fraud_feature_names = joblib.load('../data/processed/fraud_feature_names.pkl')

print("Models and data loaded successfully")

FileNotFoundError: [Errno 2] No such file or directory: '../models/fraud_rf_model.pkl'

In [None]:
# Prepare test data for fraud model
cols_to_drop = ['user_id', 'signup_time', 'purchase_time', 'device_id', 'ip_address', 'ip_address_int']
fraud_test_features = fraud_df.drop(cols_to_drop + ['class'], axis=1)
fraud_test_target = fraud_df['class']

# Transform
X_fraud_test = fraud_preprocessor.transform(fraud_test_features)

# Take a sample for SHAP
sample_size = min(1000, len(X_fraud_test))
sample_indices = np.random.choice(len(X_fraud_test), sample_size, replace=False)
X_fraud_sample = X_fraud_test[sample_indices]
y_fraud_sample = fraud_test_target.iloc[sample_indices]

print("Fraud test sample shape:", X_fraud_sample.shape)

In [None]:
if SHAP_AVAILABLE:
    # Create SHAP explainer for fraud model
    explainer_fraud = shap.TreeExplainer(rf_fraud_model)
    
    # Calculate SHAP values
    shap_values_fraud = explainer_fraud.shap_values(X_fraud_sample)
    
    if isinstance(shap_values_fraud, list):
        shap_values_fraud = shap_values_fraud[1]
    
    print("SHAP values calculated for fraud model")
else:
    print("SHAP not available, skipping analysis")