# This project is about the use Random Forest and SVM machine learning algorithms for the prevention of financial fraud

In [None]:
# Importing the libraries
import pandas as pd # used to manipule DataFrames
import numpy as np # used to do computational mathematics and manipule data
import matplotlib.pyplot as plt # used to plot graphs
import seaborn as sb # another option to graphs plotting
import sklearn # a famous library to train machine learning models
from sklearn.metrics import classification_report, accuracy_score # metrics to mensure the models performance
import scipy # used to do science with python :D

# Libraries to Detect Outliers
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

# Libraries to Split the Data
from sklearn.model_selection import train_test_split

In [None]:
# Reding the Credit Card File (containing credit card transactions)
df = pd.read_csv(r'C:\Users\pedro\OneDrive\Área de Trabalho\Geral\Portfolio_Dados\Estudos\Credit Card Fraud Detection\creditcard.csv', sep = ',')
df.head() # printing the first five lines of the data

In [None]:
# Verifying the Data

# Searching for NAs
df.isnull().sum() # The data haven't NAs

In [None]:
# Verifying the Classes
fraud_class = pd.value_counts(df.Class, sort = True)
fraud_class

# Ploting the Classes Distribution
plt.title('Classes Distribution') # defining the ploting title
plt.xlabel('Class') # defining the X axis name
plt.ylabel('Count') # defining the Y axis name
fraud_class.plot(kind = 'bar', rot = 0, color = 'black') # ploting the classes dist

In [None]:
fraudulent_transaction = df[df['Class'] == 1] # defining the fraudulent transactions
legitimate_transaction = df[df['Class'] == 0] # defining the legitimate transactions

print('Shape:\n'f'Fraudulent: {fraudulent_transaction.shape}\n', f'Legitimate: {legitimate_transaction.shape}')

In [None]:
# Viewing the Statisticals Metrics of the Data
fraudulent_transaction['Amount'].describe()
legitimate_transaction['Amount'].describe()

In [None]:
# Defining the Dependent and Independent Vars
X = df.iloc[:, 0:30].values # Independent Var
y = df.iloc[:, 30].values # Dependent Var

In [None]:
# Spliting Training and Test Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [None]:
# Detecting Anomalies in the Data Using Isolation Forest (RFT for Outliers)
iso_forest = IsolationForest(n_estimators = 100, max_samples = len(X_train), random_state = 0, verbose = 0) # Defining the Algorithm
iso_forest.fit(X_train, y_train) # Training the Forest Model

# Making prediction
ypred = iso_forest.predict(X_test)
ypred

In [None]:
# Mapping the Output Values in Boolean
ypred[ypred == 1] = 0
ypred[ypred == -1] = 1

In [None]:
# Viewing the Model Perfomance
print('Accuracy: \n', accuracy_score(y_test, ypred)) # Model Accuracy Score
print('\n Group of Classification Metrics\n', classification_report(y_test, ypred)) # Model Score Metric

# Import Confusion Matrix from SkLearn Lib
from sklearn.metrics import confusion_matrix
print('\nConfusion Matrix Result: \n', confusion_matrix(y_test, ypred))

In [None]:
# Showing the Amount of Errors
n_errors = (ypred != y_test).sum() # Variable Containing the Number of Errors
print(f'The model have {n_errors} errors...')

# Now, we will train the model using Support Vector Machines (OneClassSVM)

In [None]:
# Training the Model Using Support Vector Machines (OneClassSVM)
svm = OneClassSVM(kernel = 'rbf', degree = 1, gamma = 0.1, nu = 0.05)

svm.fit(X_train, y_train) # Fitting the Model

ypredSVM = svm.predict(X_test) # Making a prediction

In [None]:
# Mapping the Output Values in Boolean
ypredSVM[ypredSVM == 1] = 0
ypredSVM[ypredSVM == -1] = 1

In [None]:
# Viewing the Model Perfomance
print('Accuracy: \n', accuracy_score(y_test, ypredSVM)) # Model Accuracy Score
print('\n Group of Classification Metrics\n', classification_report(y_test, ypredSVM)) # Model Score Metric

# Import Confusion Matrix from SkLearn Lib
from sklearn.metrics import confusion_matrix
print('\nConfusion Matrix Result: \n', confusion_matrix(y_test, ypredSVM))

In [None]:
# Showing the Amount of Errors
n_errorsSVM = (ypredSVM != y_test).sum() # Variable Containing the Number of Errors
print(f'The model have {n_errors} errors...')