### Import all the required python libraries

In [51]:
import numpy as np
import pandas as pd
import plotly.express as px

### Import the data

In [52]:
data = pd.read_csv("Fraud.csv")
print(data.head())

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  


### Check whether the dataset has any null values or not

In [53]:
print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64


### Look at the distribution of the transaction types mentioned in the dataset

In [54]:
type = data["type"].value_counts()
transactions = type.index
quantity = type.values

plot = px.pie(data, values=quantity, names = transactions, hole = 0.5, title = "Distribution of Transaction Types")
plot.show()

### Look at the correlation between the features of the data with the 'isFraud' column

In [55]:
correlation = data.corr()
print(correlation["isFraud"].sort_values(ascending = False))

isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64


### Transform the categorical features into numerical by transforming the values of the 'isFraud' column into 'No Fraud' and 'Fraud' labels

In [56]:
data["type"] = data["type"].map({"CASH_OUT" : 1, "CASH_IN" : 2, "PAYMENT" : 3, "DEBIT" : 4, "TRANSFER" : 5})
data["isFraud"] = data["isFraud"].map({0 : "No Fraud", 1 : "Fraud"})
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,3,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,No Fraud,0
1,1,3,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,No Fraud,0
2,1,5,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,Fraud,0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,Fraud,0
4,1,3,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,No Fraud,0


### Split the data into training and test sets

In [57]:
from sklearn.model_selection import train_test_split
x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])

### Train online payments fraud detection model to classify fraud and non-fraud transactions

In [58]:
from sklearn.tree import DecisionTreeClassifier
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.1, random_state = 22)
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
print(model.score(xtest, ytest))

0.9997296711103287


### Classify whether a transaction is fraud or not by giving information about a transaction into the model

In [59]:
features = np.array([[3, 5000, 10000, 5000]])
print(model.predict(features))

['No Fraud']


In [60]:
features = np.array([[5, 10000, 10000, 0]])
print(model.predict(features))

['Fraud']
