In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

base = Path().resolve().parent
file_path = f'{base}/data/creditcard.parquet'
df = pd.read_parquet(file_path)

#### IsolationForest

In [4]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['scaled_amount', 'scaled_time']] = scaler.fit_transform(df[['Amount', 'Time']])
df = df.drop(['Amount', 'Time'], axis=1)


X = df.drop(columns=['Class'], axis=1)
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = IsolationForest(n_estimators=100, random_state=42)
clf.fit(X_train)
y_pred = clf.predict(X_test)

# Convert: 1 -> 0 (normal), -1 -> 1 (anomaly)
y_pred = np.where(y_pred == 1, 0, 1)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))

Confusion Matrix:
[[54843  2021]
 [   17    81]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9997    0.9645    0.9818     56864
           1     0.0385    0.8265    0.0736        98

    accuracy                         0.9642     56962
   macro avg     0.5191    0.8955    0.5277     56962
weighted avg     0.9980    0.9642    0.9802     56962

ROC-AUC Score: 0.8954948362293706


#### One-Class-SVM

In [9]:
df_sampled = df.sample(frac=0.05, random_state=42)  

X_svm = df_sampled.drop(columns=['Class'], axis=1)
y_svm = df_sampled['Class']

X_train_svm, X_test_svm, y_train_svm, y_test_svm = train_test_split(
    X_svm, y_svm, test_size=0.2, random_state=42)

In [11]:
from sklearn.svm import OneClassSVM

svm = OneClassSVM()
svm.fit(X_train_svm)
y_pred_svm = svm.predict(X_test_svm)
y_pred_svm = np.where(y_pred_svm == 1, 0, 1)

print("Confusion Matrix:")
print(confusion_matrix(y_test_svm, y_pred_svm))

print("\nClassification Report:")
print(classification_report(y_test_svm, y_pred_svm, digits=4))

print("ROC-AUC Score:", roc_auc_score(y_test_svm, y_pred_svm))

Confusion Matrix:
[[1367 1477]
 [   0    4]]

Classification Report:
              precision    recall  f1-score   support

           0     1.0000    0.4807    0.6493      2844
           1     0.0027    1.0000    0.0054         4

    accuracy                         0.4814      2848
   macro avg     0.5014    0.7403    0.3273      2848
weighted avg     0.9986    0.4814    0.6483      2848

ROC-AUC Score: 0.7403305203938115
