In [28]:
import pandas as pd
import numpy as np

In [29]:
data = pd.read_csv("/content/PS_20174392719_1491204439457_log.csv")
data

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.00,160296.36,M1979787155,0.0,0.0,0.0,0.0
1,1,PAYMENT,1864.28,C1666544295,21249.00,19384.72,M2044282225,0.0,0.0,0.0,0.0
2,1,TRANSFER,181.00,C1305486145,181.00,0.00,C553264065,0.0,0.0,1.0,0.0
3,1,CASH_OUT,181.00,C840083671,181.00,0.00,C38997010,21182.0,0.0,1.0,0.0
4,1,PAYMENT,11668.14,C2048537720,41554.00,29885.86,M1230701703,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
3546649,260,PAYMENT,7829.76,C28616592,22116.00,14286.24,M1321576916,0.0,0.0,0.0,0.0
3546650,260,PAYMENT,10792.26,C697109667,14286.24,3493.98,M1786562305,0.0,0.0,0.0,0.0
3546651,260,PAYMENT,6796.17,C1313895929,824941.00,818144.83,M758709998,0.0,0.0,0.0,0.0
3546652,260,CASH_IN,131307.53,C1821448190,11383.00,142690.53,C1998046670,0.0,0.0,0.0,0.0


## Checking for null values


In [30]:
data.isnull().sum()

step              0
type              0
amount            1
nameOrig          1
oldbalanceOrg     1
newbalanceOrig    1
nameDest          1
oldbalanceDest    1
newbalanceDest    1
isFraud           1
isFlaggedFraud    1
dtype: int64

In [31]:
data.dropna(inplace=True,axis=0)

In [32]:
data.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [33]:
## Exploring transaction type

data.type.value_counts()

CASH_OUT    1265140
PAYMENT     1188984
CASH_IN      777607
TRANSFER     293097
DEBIT         21825
Name: type, dtype: int64

In [34]:
type = data["type"].value_counts()
transactions = type.index
quantity = type.values

In [35]:
import plotly.express as px
figure = px.pie(data,
                values = quantity,
                names=transactions, hole=0.5,
                title="Distribution of Transaction Type")
figure.show()

## Checking correlation between features

In [36]:
correlation = data.corr()
correlation["isFraud"].sort_values(ascending=False)

isFraud           1.000000
amount            0.126923
isFlaggedFraud    0.026165
oldbalanceOrg     0.005606
newbalanceDest    0.000654
step             -0.005341
oldbalanceDest   -0.006479
newbalanceOrig   -0.007617
Name: isFraud, dtype: float64

In [37]:
data["type"] = data["type"].map({"CASH_OUT":1,
                                 "PAYMENT": 2, 
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})
data["isFraud"] = data["isFraud"].map({0:"No Fraud",
                                       1:"Fraud"})
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,2,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,No Fraud,0.0
1,1,2,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,No Fraud,0.0
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,Fraud,0.0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,Fraud,0.0
4,1,2,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,No Fraud,0.0


## Splitting Data for trainning and testing

In [38]:
from sklearn.model_selection import train_test_split
x = np.array(data[["type","amount","oldbalanceOrg","newbalanceOrig"]])
y = np.array(data[["isFraud"]])

## Training machine learning 

In [39]:
from sklearn.tree import DecisionTreeClassifier
xtrain, xtest, ytrain, ytest = train_test_split(x,y,
                                                test_size=0.10,
                                                random_state=42)

In [41]:
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
model.score(xtest,ytest)

0.9996926685952418

In [42]:
# prediction
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(model.predict(features))

['Fraud']
