In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np

In [2]:
# Reading data from 'creditcard.csv'
data = pd.read_csv('creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
# Getting unique values and their counts in the 'Class' column
np.unique(data['Class'], return_counts=True)

(array([0, 1], dtype=int64), array([284315,    492], dtype=int64))

In [4]:
# Calculating the percentage of fraud transactions
492/284315

0.0017304750013189597

In [6]:
pip install pyod 

Collecting pyod
  Downloading pyod-1.1.3.tar.gz (160 kB)
     ---------------------------------------- 0.0/160.5 kB ? eta -:--:--
     ------- -------------------------------- 30.7/160.5 kB ? eta -:--:--
     ------- -------------------------------- 30.7/160.5 kB ? eta -:--:--
     -------------- ---------------------- 61.4/160.5 kB 326.1 kB/s eta 0:00:01
     --------------------- --------------- 92.2/160.5 kB 403.5 kB/s eta 0:00:01
     ------------------------- ---------- 112.6/160.5 kB 467.6 kB/s eta 0:00:01
     -------------------------------- --- 143.4/160.5 kB 405.9 kB/s eta 0:00:01
     ------------------------------------ 160.5/160.5 kB 400.7 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyod
  Building wheel for pyod (setup.py): started
  Building wheel for pyod (setup.py): still running...
  Building wheel for pyod (setup.py): finished with status 'done'
  Create

In [7]:
# Importing ABOD model from pyod
from pyod.models import abod

In [8]:
# Creating an ABOD outlier detector
abod_od = abod.ABOD(n_neighbors=20, contamination=0.002)

In [9]:
# Removing 'Class' column from the dataset
X = data.drop('Class', axis=1)
# Fitting the ABOD model and predicting outlier labels
y_pred = abod_od.fit_predict(X)



In [10]:
# Counting unique values and their counts in the predicted labels
np.unique(y_pred, return_counts=True)

(array([0, 1]), array([284237,    570], dtype=int64))

In [11]:
# Counting unique values and their counts in the predicted labels
np.unique(y_pred, return_counts=True)

(array([0, 1]), array([284237,    570], dtype=int64))

In [12]:
# Importing metrics from scikit-learn
from sklearn import metrics

In [13]:
# Assigning true labels
y_true = data['Class']

In [14]:
# Printing classification report
print(metrics.classification_report(y_true=y_true, y_pred=y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.00      0.00      0.00       492

    accuracy                           1.00    284807
   macro avg       0.50      0.50      0.50    284807
weighted avg       1.00      1.00      1.00    284807



In [15]:
# Importing IsolationForest from scikit-learn
from sklearn.ensemble import IsolationForest

In [16]:
# Creating an Isolation Forest model
IF = IsolationForest(contamination=0.005)

In [17]:
# Fitting the Isolation Forest model and predicting outlier labels
y_pred = IF.fit_predict(X)



In [18]:
# Counting unique values and their counts in the predicted labels
np.unique(y_pred, return_counts=True)

(array([-1,  1]), array([  1425, 283382], dtype=int64))

In [19]:
# Assigning 0 to inliers and 1 to outliers in the predicted labels
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1] = 1

In [20]:
# Printing classification report
print(metrics.classification_report(y_true=y_true, y_pred=y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.13      0.38      0.20       492

    accuracy                           0.99    284807
   macro avg       0.57      0.69      0.60    284807
weighted avg       1.00      0.99      1.00    284807

