# Load the processed dataset

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib
import shap
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt

# Load processed data
fraud_data = pd.read_csv('../data/processed/processed_fraud_data_with_country.csv')
credit_card_data = pd.read_csv('../data/processed/processed_credit_card_data.csv')

  from .autonotebook import tqdm as notebook_tqdm


## Split the data into features and target

In [2]:
# Split the data into features and target
X_fraud = fraud_data.drop(columns=['class'])
y_fraud = fraud_data['class']

X_credit = credit_card_data.drop(columns=['Class'])
y_credit = credit_card_data['Class']

## Split the data into training and test sets

In [3]:
# Split the data into training and test sets
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

## Loading Pre-trained Models
Assuming you have already trained and saved your models in the specified directories (notebooks/models/fraud_data/ and notebooks/models/credit_card_data/):

In [5]:
# Load pre-trained fraud detection model
fraud_model_path = './models/fraud_data/random_forest_model.joblib'
fraud_model = joblib.load(fraud_model_path)

# Load pre-trained credit card default prediction model
credit_model_path = './models/credit_card_data/random_forest_model.joblib'
credit_model = joblib.load(credit_model_path)

# Model Explainability with SHAP and LIME
Assuming you have already defined your test sets (X_test_fraud, X_test_credit) and imported necessary libraries (shap, lime, matplotlib.pyplot):

## SHAP Explainability

In [6]:
# Ensure test data is only numeric for SHAP explainability
X_test_fraud_numeric = X_test_fraud.select_dtypes(include=['number'])
X_test_credit_numeric = X_test_credit.select_dtypes(include=['number'])

In [7]:
# SHAP explainability for fraud model
explainer_shap_fraud = shap.TreeExplainer(fraud_model)
shap_values_fraud = explainer_shap_fraud.shap_values(X_test_fraud_numeric)

# SHAP summary plot for fraud model
shap.summary_plot(shap_values_fraud, X_test_fraud_numeric)
plt.show()

In [None]:
# SHAP force plot for the first instance in the fraud test set
shap.force_plot(explainer_shap_fraud.expected_value[1], shap_values_fraud[1][0], X_test_fraud_numeric.iloc[0])

# SHAP dependence plot for a specific feature, e.g., 'feature_name'
shap.dependence_plot('feature_name', shap_values_fraud[1], X_test_fraud_numeric)
plt.show()

In [None]:
# SHAP explainability for credit card model
explainer_shap_credit = shap.TreeExplainer(credit_model)
shap_values_credit = explainer_shap_credit.shap_values(X_test_credit_numeric)

# SHAP summary plot for credit card model
shap.summary_plot(shap_values_credit, X_test_credit_numeric)
plt.show()

In [None]:
# SHAP force plot for the first instance in the credit card test set
shap.force_plot(explainer_shap_credit.expected_value[1], shap_values_credit[1][0], X_test_credit_numeric.iloc[0])

# SHAP dependence plot for a specific feature, e.g., 'feature_name'
shap.dependence_plot('feature_name', shap_values_credit[1], X_test_credit_numeric)
plt.show()

## LIME Explainability

In [None]:
# LIME explainability for fraud model
explainer_lime_fraud = lime.lime_tabular.LimeTabularExplainer(X_train_fraud.values, feature_names=X_train_fraud.columns, class_names=['Not Fraud', 'Fraud'], discretize_continuous=True)

# Explain the prediction for the first instance in the fraud test set
i = 0
exp_fraud = explainer_lime_fraud.explain_instance(X_test_fraud_numeric.iloc[i].values, fraud_model.predict_proba, num_features=10)

# Show the explanation in a notebook
exp_fraud.show_in_notebook(show_all=False)

# LIME feature importance plot for fraud model
exp_fraud.as_pyplot_figure()
plt.show()

In [None]:
# LIME explainability for credit card model
explainer_lime_credit = lime.lime_tabular.LimeTabularExplainer(X_train_credit.values, feature_names=X_train_credit.columns, class_names=['Class 0', 'Class 1'], discretize_continuous=True)

# Explain the prediction for the first instance in the credit card test set
i = 0
exp_credit = explainer_lime_credit.explain_instance(X_test_credit_numeric.iloc[i].values, credit_model.predict_proba, num_features=10)

# Show the explanation in a notebook
exp_credit.show_in_notebook(show_all=False)

# LIME feature importance plot for credit card model
exp_credit.as_pyplot_figure()
plt.show()