## Algoritmo de detecção de fraudes em pagamentos online utilizando Árvore de Decisão

Utilizando a linguagem Python, desenvolver um programa para detectar possíveis fraudes em pagamentos online. Para esse projeto, foi utilizado um *dataset* disponível em https://www.kaggle.com/datasets/ealaxi/paysim1.

In [1]:
#Carregando bibliotecas necessárias
import pandas as pd
import numpy as np

In [4]:
#Importando os dados do dataset
data = pd.read_csv("dataset.csv")
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [5]:
#Vericando se o conjunto de dados possui algum valor nulo
data.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [7]:
#Explorando os tipos de transações existentes no dataset
data.type.value_counts()

CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: type, dtype: int64

In [46]:
#Visualizando as transações por meio de um gráfico de pizza
type = data["type"].value_counts();
transactions = type.index
quantity = type.values

import plotly.express as px
figure = px.pie(data, values=quantity, names=transactions, hole=0.5, 
                title="Distribuição de Transações por Tipo")
figure.show("png")

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido


In [12]:
#Checagem de correlação
correlation = data.corr()
correlation["isFraud"].sort_values(ascending=False)

isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64

In [23]:
#Transformando as variáveis categóricas em numéricas para aplicar o modelo
data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, 
                       "CASH_IN": 3, "TRANSFER": 4,
                       "DEBIT": 5})
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,2,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,2,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,2,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


### Árvore de Decisão

In [42]:
#Separando o conjunto de dados em treinamento e testes
from sklearn.model_selection import train_test_split
x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])

In [43]:
#Treinando o modelo de Machine Learning
from sklearn.tree import DecisionTreeClassifier
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42)
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
model.score(xtest,ytest)

0.999732814469511