In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import xgboost as xgb

import warnings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

shap.initjs()

In [None]:
ecom_df = pd.read_csv('../data/processed/ecommerce_processed.csv')
X_ecom = ecom_df.drop('class', axis=1)
y_ecom = ecom_df['class']
X_ecom_train, X_ecom_test, y_ecom_train, y_ecom_test = train_test_split(
    X_ecom, y_ecom, test_size=0.3, random_state=42, stratify=y_ecom
)
smote = SMOTE(random_state=42)
X_ecom_train_smote, y_ecom_train_smote = smote.fit_resample(X_ecom_train, y_ecom_train)

cc_df = pd.read_csv('../data/processed/creditcard_processed.csv')
X_cc = cc_df.drop('Class', axis=1)
y_cc = cc_df['Class']
X_cc_train, X_cc_test, y_cc_train, y_cc_test = train_test_split(
    X_cc, y_cc, test_size=0.3, random_state=42, stratify=y_cc
)
X_cc_train_smote, y_cc_train_smote = smote.fit_resample(X_cc_train, y_cc_train)
print("Data loading and preparation steps reproduced.")

In [None]:
xgb_ecom = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_ecom.fit(X_ecom_train_smote, y_ecom_train_smote)

neg_count = y_cc_train.value_counts()[0]
pos_count = y_cc_train.value_counts()[1]
scale_pos_weight_value = neg_count / pos_count
xgb_cc = xgb.XGBClassifier(
    random_state=42, use_label_encoder=False, eval_metric='logloss', scale_pos_weight=scale_pos_weight_value
)
xgb_cc.fit(X_cc_train_smote, y_cc_train_smote)
print("Best-performing XGBoost models are trained and ready.")

In [None]:
print("\n--- Generating SHAP explanations for the E-commerce Model ---")
explainer_ecom = shap.Explainer(xgb_ecom)
shap_values_ecom = explainer_ecom(X_ecom_test)

In [None]:
print("\n--- SHAP Summary Plot for E-commerce Model ---")
shap.summary_plot(shap_values_ecom, X_ecom_test, show=False)
plt.title('Feature Importance for E-commerce Fraud Detection', size=14)
plt.show()

In [None]:
true_fraud_indices = np.where(y_ecom_test == 1)[0]
fraud_idx = true_fraud_indices[0]
print(f"\n--- Local Explanation for a Single Fraudulent E-commerce Transaction ---")
shap.force_plot(explainer_ecom.expected_value, shap_values_ecom[fraud_idx], X_ecom_test.iloc[fraud_idx], matplotlib=True, show=False)
plt.show()

In [None]:
print("\n" + "="*50)
print("\n--- Generating SHAP explanations for the Credit Card Model ---")
explainer_cc = shap.Explainer(xgb_cc)
shap_values_cc = explainer_cc(X_cc_test)

In [None]:
print("\n--- SHAP Summary Plot for Credit Card Model ---")
shap.summary_plot(shap_values_cc, X_cc_test, show=False)
plt.title('Feature Importance for Credit Card Fraud Detection', size=14)
plt.show()

In [None]:
true_fraud_indices_cc = np.where(y_cc_test == 1)[0]
fraud_idx_cc = true_fraud_indices_cc[0]
print(f"\n--- Local Explanation for a Single Fraudulent Credit Card Transaction ---")
shap.force_plot(explainer_cc.expected_value, shap_values_cc[fraud_idx_cc], X_cc_test.iloc[fraud_idx_cc], matplotlib=True, show=False)
plt.show()