<a href="https://colab.research.google.com/github/shiffa-04/FraudDetectionModel/blob/main/credit_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, precision_recall_curve, auc, accuracy_score, roc_auc_score, confusion_matrix, f1_score, recall_score, precision_score
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier

np.random.seed(4)

In [62]:
df = pd.read_csv("creditcard.csv")
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [63]:
df.shape

(284807, 31)

In [64]:
df = df.drop_duplicates()
df.shape

(283726, 31)

In [65]:
# Drop 'Time' column and separate features and target
df = df.drop("Time", axis=1)
X = df.drop("Class", axis=1)
y = df["Class"]

In [66]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [67]:
# Scale the 'Amount' column
scaler = StandardScaler()
X_train['Amount'] = scaler.fit_transform(X_train['Amount'].values.reshape(-1, 1))
X_test['Amount'] = scaler.transform(X_test['Amount'].values.reshape(-1, 1))

In [68]:
# Apply SMOTE for handling class imbalance
smote = SMOTE(random_state=42, sampling_strategy=0.15,k_neighbors=2)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)


In [69]:
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation = 'relu', solver='adam', alpha=0.0001, learning_rate='constant', learning_rate_init=0.001,
                    batch_size=64, max_iter=100, early_stopping=True, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
extra_trees = ExtraTreesClassifier(random_state=42)
rf = RandomForestClassifier(random_state=42)

In [70]:
classifiers = {
    'MLP': mlp,
    'XGB': xgb,
    'Extra Trees': extra_trees,
    'Random Forest': rf
}

In [71]:
for name, clf in classifiers.items():
    clf.fit(X_train_res, y_train_res)
    y_pred = clf.predict(X_test)
    y_proba = clf.predict_proba(X_test)[:, 1]  # Get probabilities for the positive class

    print(f"Performance of {name}:")

    # Calculate and print precision and recall
    recall_acc = recall_score(y_test, y_pred)
    precision_acc = precision_score(y_test, y_pred)
    print(f'Precision: {precision_acc:.2f}')
    print(f'Recall: {recall_acc:.2f}')

    # Calculate and print F1 Score
    f1 = f1_score(y_test, y_pred)
    print(f"F1 Score: {f1:.2f}")

    # Calculate and print Precision-Recall AUC
    precision, recall, _ = precision_recall_curve(y_test, y_proba)
    pr_auc = auc(recall, precision)
    print(f"Precision-Recall AUC: {pr_auc:.6f}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

    print()

Performance of MLP:
Precision: 0.74
Recall: 0.83
F1 Score: 0.78
Precision-Recall AUC: 0.836579
Confusion Matrix:
[[28312    14]
 [    8    39]]

Performance of XGB:
Precision: 0.87
Recall: 0.87
F1 Score: 0.87
Precision-Recall AUC: 0.887402
Confusion Matrix:
[[28320     6]
 [    6    41]]

Performance of Extra Trees:
Precision: 0.95
Recall: 0.87
F1 Score: 0.91
Precision-Recall AUC: 0.889326
Confusion Matrix:
[[28324     2]
 [    6    41]]

Performance of Random Forest:
Precision: 0.95
Recall: 0.83
F1 Score: 0.89
Precision-Recall AUC: 0.881792
Confusion Matrix:
[[28324     2]
 [    8    39]]



In [72]:
ensemble_model = VotingClassifier(estimators=[
    ('mlp', mlp),
    ('xgb', xgb),
    ('extra_trees', extra_trees),
    ('rf', rf)
], voting='soft', n_jobs=-1)

In [73]:
ensemble_model.fit(X_train_res, y_train_res)

In [74]:
y_pred_vc = ensemble_model.predict(X_test)
y_proba_vc = ensemble_model.predict_proba(X_test)[:, 1]

In [75]:
print("Performance of VotingClassifier:")
print(classification_report(y_test, y_pred_vc))
print(f"Accuracy Score: {accuracy_score(y_test, y_pred_vc):.2f}")

Performance of VotingClassifier:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28326
           1       0.93      0.87      0.90        47

    accuracy                           1.00     28373
   macro avg       0.97      0.94      0.95     28373
weighted avg       1.00      1.00      1.00     28373

Accuracy Score: 1.00


In [76]:
# Calculate the recall score
recall_acc = recall_score(y_test, y_pred_vc)
print(f'Recall Score: {recall_acc}')

# Calculate the precision score
precision_acc = precision_score(y_test, y_pred_vc)
print(f'Precision Score: {precision_acc}')

f1_vc = f1_score(y_test, y_pred_vc)
print(f"F1 Score: {f1_vc:.2f}")

roc_auc_vc = roc_auc_score(y_test, y_proba_vc)
print(f"ROC AUC Score: {roc_auc_vc:.6f}")

precision_vc, recall_vc, _ = precision_recall_curve(y_test, y_proba_vc)
pr_auc_vc = auc(recall_vc, precision_vc)
print(f"Precision-Recall AUC: {pr_auc_vc:.6f}")

cm_vc = confusion_matrix(y_test, y_pred_vc)
print("Confusion Matrix:")
print(cm_vc)

Recall Score: 0.8723404255319149
Precision Score: 0.9318181818181818
F1 Score: 0.90
ROC AUC Score: 0.990462
Precision-Recall AUC: 0.889042
Confusion Matrix:
[[28323     3]
 [    6    41]]
