# Credit Card Fraud Detection

### Importing Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, accuracy_score, precision_recall_curve, roc_auc_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from imblearn.over_sampling import SMOTE
from pycaret.classification import *

### Importing Libraries for Outlier Detection

In [None]:
from sklearn.ensemble import IsolationForest

from sklearn.svm import OneClassSVM

### Reading our Dataset

In [8]:
from google.colab import drive
drive.mount('/content/drive')

In [9]:
df= pd.read_csv("/content/drive/Zahid/creditcard.csv")

In [10]:
df.head()

### Data Analysis

In [11]:
df.shape

#### Checking Null Values

In [12]:
df.isnull().sum()

### Checking the distribution of Normal and Fraud cases in our Data Set

In [14]:
fraud_check = pd.value_counts(df['Class'], sort = True)
fraud_check.plot(kind = 'bar', rot=0, color= 'r')
plt.title("Normal and Fraud Distribution")
plt.xlabel("Class")
plt.ylabel("Frequency")
 ## Defining labels to replace our 0 and 1 valuelabels= ['Normal','Fraud']
## mapping those labels
plt.xticks(range(2), labels)
plt.show()


#### Let us see what is the shape of Normal and Fraud data set

In [15]:
fraud_people = df[df['Class']==1]
normal_people = df[df['Class']==0]

In [16]:
fraud_people.shape

In [17]:
normal_people.shape

#### Finding out the avg amount in our both the data sets

In [18]:
fraud_people['Amount'].describe()

In [19]:
normal_people['Amount'].describe()

#### Let us analyse it visually

In [21]:
graph, (plot1, plot2) = plt.subplots(2,1,sharex= True)
graph.suptitle('Average amount per class')
bins = 70

plot1.hist(fraud_people['Amount'] , bins = bins)
plot1.set_title('Fraud Amount')

plot2.hist(normal_people['Amount'] , bins = bins)
plot2.set_title('Normal Amount')

plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.yscale('log')
plt.show();

#### Plotting a corr Heatmap

In [22]:
df.corr()
plt.figure(figsize=(30,30))
g=sns.heatmap(df.corr(),annot=True)

### Creating our Dependent and Independent Features

In [23]:
columns = df.columns.tolist()
# Making our Independent Features
columns = [var for var in columns if var not in ["Class"]]
# Making our Dependent Variable
target = "Class"
x= df[columns]
y= df[target]

In [24]:
x.shape

In [25]:
y.shape

In [26]:
x.head() ## Independent Variable

In [27]:
y.head() ## Dependent Variable

## Model building

### Splitting the data

In [28]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

In [29]:
iso_forest= IsolationForest(n_estimators=100, max_samples=len(x_train),random_state=0, verbose=0)                        

In [30]:
iso_forest.fit(x_train,y_train)

In [31]:
ypred= iso_forest.predict(x_test)

In [32]:
ypred

#### Mapping the values as we want to have an output in 0 and 1

In [33]:
ypred[ypred == 1] = 0
ypred[ypred == -1] = 1


### Accuracy score and Matrix

In [34]:
print(accuracy_score(y_test,ypred))

In [35]:
print(classification_report(y_test,ypred))

In [36]:
from sklearn.metrics import confusion_matrix

In [37]:
confusion_matrix(y_test, ypred)

### We can also print how many errors our model have

In [38]:
n_errors = (ypred != y_test).sum()
print("Isolation Forest have {} errors.".format(n_errors))

## OneClassSVM

In [40]:
svm= OneClassSVM(kernel='rbf', degree=3, gamma=0.1,nu=0.05, 
                                         #max_iter=-1)

In [41]:
svm.fit(x_train,y_train)

In [42]:
ypred1= svm.predict(x_test)

#### Here also we do the same thing as above, mapping our results in 0 and 1

In [43]:
ypred1[ypred1 == 1] = 0
ypred1[ypred1 == -1] = 1

### Accuracy score and Matrix

In [44]:
print(accuracy_score(y_test,ypred))

In [45]:
print(classification_report(y_test,ypred))

In [46]:
from sklearn.metrics import confusion_matrix

In [47]:
confusion_matrix(y_test, ypred)

In [49]:
n_errors = (ypred1 != y_test).sum()
print("SVM have {} errors.".format(n_errors))

## Solving the Problem Statement using PyCaret Library(Auto ML)

### Installing Pycaret

In [50]:
!pip install pycaret

In [51]:
df= pd.read_csv("creditcard.csv")

In [52]:
df.head()

In [53]:
from pycaret.classification import *

In [54]:
model= setup(data= df, target= 'Class')

In [55]:
compare_models()

In [56]:
random_forest= create_model('rf')

In [57]:
random_forest

### We can Hypertune our model to

In [58]:
tuned_model= tune_model('random_forest')

## Predictions

In [59]:
pred_holdout = predict_model(random_forest,data= x_test)

In [60]:
pred_holdout