# Online Payments Fraud Detection    

In [1]:
#Libs import
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('Credit_card.csv',delimiter=',')
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [3]:
# Now, let’s have a look at whether this dataset has any null values or not:
df.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [4]:
# let’s have a look at the type of transaction mentioned in the dataset:
# Exploring transaction type in the column type.
print(df.type.value_counts())

type
CASH_OUT    186013
PAYMENT     167814
CASH_IN     111762
TRANSFER     41511
DEBIT         3683
Name: count, dtype: int64


### I needed to transform all string values ​​into numbers to be able to use the pandas curr() function later on.

In [5]:
df["type"] = df["type"].map({"CASH_OUT": 1, "PAYMENT": 2, 
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})
print(df.head())

   step  type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1     2   9839.64  C1231006815       170136.0       160296.36   
1     1     2   1864.28  C1666544295        21249.0        19384.72   
2     1     4    181.00  C1305486145          181.0            0.00   
3     1     1    181.00   C840083671          181.0            0.00   
4     1     2  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  


### I had to delete the letters from the values ​​in the names Orig and Dest columns because the corr() function was unable to work with string data.

In [6]:
df['nameOrig'] = df['nameOrig'].str.replace(r'^C', '', regex=True)
df['nameDest'] = df['nameDest'].str.replace(r'^C', '', regex=True)
df['nameDest'] = df['nameDest'].str.replace(r'^M', '', regex=True)

In [7]:
# Checking correlation
correlation = df.corr()
print(correlation["isFraud"].sort_values(ascending=False))

isFraud           1.000000
amount            0.052577
type              0.008740
oldbalanceOrg    -0.000546
nameOrig         -0.001305
nameDest         -0.001519
newbalanceDest   -0.001821
oldbalanceDest   -0.005242
newbalanceOrig   -0.006341
step             -0.020591
isFlaggedFraud         NaN
Name: isFraud, dtype: float64


In [8]:
# Here I will also transform the values of the isFraud column into No Fraud and Fraud 
# labels to have a better understanding of the output:
df["isFraud"] = df["isFraud"].map({0: "No Fraud", 1: "Fraud"})

# Online Payments Fraud Detection Model

### Now let’s train a classification model to classify fraud and non-fraud transactions. Before training the model, I will split the data into training and test sets:

In [9]:

x = np.array(df[["type","amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(df[["isFraud"]])

In [10]:
# training a machine learning model

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42)
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
print(model.score(xtest, ytest))

0.9995301395876975


### Now let’s classify whether a transaction is a fraud or not by feeding about a transaction into the model:

In [11]:
# prediction
# features = [type, amount, oldbalanceOrg, newbalanceOrig]
#features = np.array([[4, 19000.60, 10000.60, 0.0]]) # No Fraud
#features = np.array([[4, 19000.60, 18000.60, 1000.0]]) # No Fraud
features = np.array([[4, 19000.60, 20000.60, 0.0]]) # Fraud
print(model.predict(features))

['Fraud']
