# Section 4: Performance Metrics for Fraud Detection
### Lecture 16: Implementing Performance Metrics in scikit-learn

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import xgboost as xgb

df = pd.read_csv('credit_card.csv')
y = df['Class']
X = df.drop(['Class','Amount','Time'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
model_xgb = xgb.XGBClassifier(max_depth=5, scale_pos_weight=100)
model_xgb.fit(X_train, y_train)
y_pred = model_xgb.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[28429,     3],
       [    6,    43]], dtype=int64)

In [2]:
from sklearn.metrics import (classification_report, precision_score, recall_score,
                             average_precision_score, roc_auc_score,
                             f1_score, matthews_corrcoef)                        

In [3]:
# Precision is the proportion of correctly predicted fraudulent instances among all instances predicted as fraud
# TP / TP + FP
# 43 / 3 + 43 = 0.934

precision_score(y_test, y_pred)

0.9347826086956522

In [4]:
# Recall is the proportion of the fraudulent instances that are successfully predicted
# TP / TP + FN
# 43 / 6 + 43 = 0.877

recall_score(y_test, y_pred)

0.8775510204081632

In [5]:
# F1-score is the harmonic balance of precision and recall (can be weighted more towards P or R if need be)
# F = 2 * (Precision * Recall)/(Precision + Recall)
# F = 2 * (0.934 * 0.877)/(0.934 + 0.877)
# F = 0.905

f1_score(y_test, y_pred)

0.9052631578947369

In [6]:
# AUROC/AUC = Area under the Receiver Operating Characteristic curve
# plot the TPR (Recall) and FPR at various classification thresholds
# FPR = FP / FP + TN
# Good measure of overall performance

roc_auc_score(y_test, y_pred)

0.9387227527476945

In [7]:
# AUPRC = Area under the Precision-Recall curve
# Better alternative to AUC as doesn't include TN which influences the scores significantly in highly imbalanced data
# calculates the area under the curve at various classification thresholds

average_precision_score(y_test, y_pred)

0.8205300988809707

In [8]:
# Classification report summarizes the classification metrics at the class and overall level

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.93      0.88      0.91        49

    accuracy                           1.00     28481
   macro avg       0.97      0.94      0.95     28481
weighted avg       1.00      1.00      1.00     28481

