In [None]:
import pandas as pd
import shap
import matplotlib.pyplot as plt
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load processed data
df = pd.read_csv('../data/processed/creditcard_processed.csv')

# Prepare features
y = df['Class']
X = df.drop(columns=['Class', 'Time', 'Amount_bin'])

# Encode categorical bin column
X['Amount_bin'] = LabelEncoder().fit_transform(df['Amount_bin'])

# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, stratify=y, test_size=0.3, random_state=42)


In [None]:
model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)


In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)


In [None]:
shap.summary_plot(shap_values[1], X_test, plot_type="bar")
shap.summary_plot(shap_values[1], X_test)


In [None]:
# Select one fraud prediction
fraud_index = np.where(y_test.values == 1)[0][0]

shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_values[1][fraud_index], X_test[fraud_index], matplotlib=True)
