# Load model & data


In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import joblib
import shap

# Load processed dataset
X_test = pd.read_csv("../data/processed/fraud_processed.csv")

# Separate target
y = X_test['class']

# Drop identifiers and unnecessary columns
X_test = X_test.drop(columns=[
    'class', 'user_id', 'device_id', 'ip_address',
    'signup_time', 'purchase_time',
    'lower_bound_ip_address', 'upper_bound_ip_address'
], errors='ignore')

# One-hot encode low-cardinality features
low_card_cols = ['sex', 'browser']
X_test = pd.get_dummies(X_test, columns=low_card_cols, drop_first=True)

# Label encode medium/high-cardinality features
le = LabelEncoder()
for col in ['source', 'country']:
    X_test[col] = le.fit_transform(X_test[col].astype(str))

# Check shape to ensure it's manageable
print("X_test shape after encoding:", X_test.shape)

# Load trained Random Forest model
rf = joblib.load("../models/random_forest.pkl")

X_test shape after encoding: (151112, 14)


# Predict probabilities

In [None]:
y_pred = rf.predict(X_test)
y_prob = rf.predict_proba(X_test)[:, 1]

# SHAP Explainer

In [None]:
explainer = shap.TreeExplainer(rf)
shap_values = explainer.shap_values(X_test)


# Global SHAP summary plot

In [None]:
shap.summary_plot(shap_values[1], X_test)


# Built-in feature importance (compare)

In [None]:
importances = pd.Series(
    rf.feature_importances_,
    index=X_test.columns
).sort_values(ascending=False)

importances.head(10).plot(kind="barh", title="Top 10 Feature Importances")


# SHAP force plots

In [None]:
# True Positive
tp_index = y[(y == 1)].index[0]

# False Positive
fp_index = y[(y == 0)].index[0]

# False Negative
fn_index = y[(y == 1)].index[1]


In [None]:
shap.force_plot(
    explainer.expected_value[1],
    shap_values[1][tp_index],
    X_test.iloc[tp_index]
)


# Business recommendations

High-risk early transactions

Transactions occurring shortly after signup should trigger additional verification.

Geolocation-based risk

Transactions from high-fraud countries should be subjected to stricter controls.

Behavioral velocity checks

Users with unusually high transaction frequency should be temporarily limited.