# Credit Card Fraud Detection

Dataset Link: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud?select=creditcard.csv

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
#Packages related to general operating system & warnings
import os
import warnings
warnings.filterwarnings('ignore')
#Packages related to data importing, manipulation, exploratory data #analysis, data understanding
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from termcolor import colored as cl # text customization
#Packages related to data visualizaiton
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#Setting plot sizes and type of plot
plt.rc("font", size=14)
plt.rcParams['axes.grid'] = True
plt.figure(figsize=(6,3))
plt.gray()
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, LabelBinarizer, OrdinalEncoder
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from xgboost import XGBClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [None]:
# reading the data

data=pd.read_csv(r"F:\creditcard.csv")

In [None]:
data.head(10)

In [None]:
data.tail(10)

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.corr()

In [None]:
#finding any null values in dataset

print(data.isna().any())
#print(data.isna().sum())

In [None]:
# Determine number of fraud cases in dataset
fraud = data[data['Class'] == 1]
valid = data[data['Class'] == 0]
outlierFraction = len(fraud)/float(len(valid))
print(outlierFraction)
print('Fraud Cases: {}'.format(len(data[data['Class'] == 1])))
print('Valid Transactions: {}'.format(len(data[data['Class'] == 0])))

In [None]:
print("Amount details of the fraudulent transaction")
fraud.Amount.describe()

In [None]:
print("details of valid transaction")
valid.Amount.describe()

In [None]:
# Correlation matrix
corrmat = data.corr()
fig = plt.figure(figsize = (12, 9))
sns.heatmap(corrmat, vmax = .8, square = True)
plt.show()

In [None]:
#dividing the data into independent and dependent variables

X = data.drop('Class', axis = 1).values
y = data['Class'].values

In [None]:
print(X)

In [None]:
print(y)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 15)

# Support Vector Machines

In [None]:
from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR
svm = SVC()
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)

In [None]:
# Evaluating the classifier
# printing every score of the classifier
# scoring in anything
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, matthews_corrcoef
from sklearn.metrics import confusion_matrix

n_outliers = len(fraud)
n_errors = (svm_pred != y_test).sum()
print("The model used is Random Forest classifier")

accuracy = accuracy_score(y_test, svm_pred)
print("The accuracy is {}".format(accuracy))

Precision = precision_score(y_test, svm_pred)
print("The precision is {}".format(Precision))

recall = recall_score(y_test, svm_pred)
print("The recall is {}".format(recall))

f1_Score = f1_score(y_test, svm_pred)
print("The F1-Score is {}".format(f1_Score))



In [None]:
# printing the confusion matrix
LABELS = ['Normal', 'Fraud']
conf_matrix = confusion_matrix(y_test, svm_pred)
plt.figure(figsize =(12, 12))
sns.heatmap(conf_matrix, xticklabels = LABELS,
            yticklabels = LABELS, annot = True, fmt ="g");
plt.title("Confusion matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()