In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('Credit_card.csv')

In [3]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [4]:
print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64


In [5]:
# Exploring transaction type
print(data.type.value_counts())

CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: type, dtype: int64


In [6]:
# Checking correlation
correlation = data.corr()

In [7]:
correlation['isFraud'].sort_values(ascending=False)

isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64

In [8]:
data['type'] = data['type'].map({"CASH_OUT" : 1, "PAYMENT" : 2, "CASH IN": 3,"TRANSFER":4,"DEBIT":5})

In [9]:
data["isFraud"] = data["isFraud"].map({0:"No Fraud",1:"Fraud"})

In [10]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,2.0,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,No Fraud,0
1,1,2.0,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,No Fraud,0
2,1,4.0,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,Fraud,0
3,1,1.0,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,Fraud,0
4,1,2.0,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,No Fraud,0


In [11]:
from sklearn.model_selection import train_test_split

In [12]:
data.dropna(inplace=True)

In [13]:
X = data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]]
y = data[["isFraud"]]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [15]:
from sklearn.tree import DecisionTreeClassifier

In [16]:
model = DecisionTreeClassifier()

In [17]:
model.fit(X_train,y_train)

DecisionTreeClassifier()

In [18]:
y_pred = model.predict(X_test)

In [19]:
from sklearn.metrics import confusion_matrix

In [20]:
confusion_matrix(y_test, y_pred)

array([[  1453,    161],
       [   196, 990858]], dtype=int64)

In [21]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [26]:
precision = precision_score(y_test, y_pred, pos_label="No Fraud")
print(precision)

0.9998375409553197


In [29]:
f1score = f1_score(y_test, y_pred,pos_label="No Fraud")
print(f1score)

0.9998198855440743


In [30]:
recall = recall_score(y_test, y_pred, pos_label="No Fraud")
print(recall)

0.9998022307563462
