# Init

In [58]:
!pip install shap



In [59]:
import pickle
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
import shap
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [60]:
def plot_confusion_matrix(y_true, y_pred, title='Confusion Matrix'):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Not Fraud', 'Fraud'], yticklabels=['Not Fraud', 'Fraud'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(title)
    plt.show()

In [61]:
with open('pickle/dt_final.p', 'rb') as f:
    dt_final = pickle.load(f)

with open('pickle/svm_linear.p', 'rb') as f:
    svm_linear = pickle.load(f)

with open('pickle/knn_grid.p', 'rb') as f:
    knn_grid = pickle.load(f)

In [62]:
with open('pickle/df_reduced.p', 'rb') as f:
    df_reduced = pickle.load(f)

In [63]:
data = pd.read_csv('files/creditcard.csv')

In [64]:
X = df_reduced.drop('Class', axis=1)
y = df_reduced['Class']

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [66]:
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Voting

In [67]:
voting_clf = VotingClassifier(estimators=[
    ('decision_tree', dt_final),
    ('svm', svm_linear),
    ('knn', knn_grid)
], voting='soft')

In [None]:
voting_clf.fit(X_train, y_train)

In [None]:
y_pred_voting = voting_clf.predict(X_test)

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred_voting))

In [None]:
plot_confusion_matrix(y_test, y_pred_voting, title='Confusion Matrix for Voting Classifier')

# Random Forest

## Model

In [None]:
rf = RandomForestClassifier(max_depth=dt_final.get_depth(), random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

In [None]:
y_pred_rf = rf.predict(X_test)

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

In [None]:
plot_confusion_matrix(y_test, y_pred_rf, title='Confusion Matrix for Voting Classifier')

## SHAP

In [None]:
explainer = shap.TreeExplainer(rf_model)

In [None]:
shap_values = explainer.shap_values(X_test)

In [None]:
shap.summary_plot(
    shap_values[:,:, 0], 
    X_test,
    plot_type="bar"
)