<a href="https://colab.research.google.com/github/sahug/ds-tensorflow-colab/blob/master/Tensorflow%20-%20Credit%20Card%20Fraud%20Detection%20Using%20CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Tensorflow - Credit Card Fraud Detection Using CNN**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
    BatchNormalization,
    Conv1D,
    Dense,
    Dropout,
    Flatten,
    MaxPool1D,
)
from tensorflow.keras.optimizers import Adam

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
print(tf.__version__)

In [None]:
data = pd.read_csv("data/creditcard.csv")
data.head()

In [None]:
data.shape

In [None]:
# Checking any Null values
data.isnull().sum()

In [None]:
data.info()

In [None]:
# Here we have pretty inballanced data. 284315 False and only 492 True.
data["Class"].value_counts()

**Balance Data**

In [None]:
non_fraud = data[data["Class"] == 0]
fraud = data[data["Class"] == 1]

In [None]:
non_fraud, fraud

In [None]:
# Selecting only 492 random non fraud data to balance our dataset.
non_fraud = non_fraud.sample(fraud.shape[0])
non_fraud.shape

In [None]:
# Merging both Fraud and Non Fraud dataset.
data = fraud.append(non_fraud, ignore_index=True)
data

In [None]:
data["Class"].value_counts()

In [None]:
x = data.drop("Class", axis=1)
y = data["Class"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=0, stratify=y
)

In [None]:
x_train.shape, x_test.shape

In [None]:
# Mininzing the Variance
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [None]:
x_train.shape, x_train.shape[0], x_train.shape[1]

In [None]:
# Since CNN understands 3D data, converting our data to 3D.
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

In [None]:
x_train.shape, x_test.shape

**Build CNN**

In [None]:
epochs = 20
model = Sequential()

# Layer 1
model.add(Conv1D(32, 2, activation="relu", input_shape=x_train[0].shape))
model.add(BatchNormalization())
model.add(Dropout(0.2))

# Layer 2
model.add(Conv1D(64, 2, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Layer 3
model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))

# Layer 4
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer=Adam(lr=0.0001), loss="binary_crossentropy", metrics=["accuracy"]
)

In [None]:
history = model.fit(
    x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=1
)

In [None]:
epoch_range = range(1, epochs + 1)

In [None]:
# Plot training and validation accuracy values
# sparse_categorical_accuracy vs val_sparse_categorical_accuracy
plt.plot(epoch_range, history.history["accuracy"])
plt.plot(epoch_range, history.history["val_accuracy"])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()

In [None]:
# Plot training and validation loss values
# Loss vs Val_Loss
plt.plot(epoch_range, history.history["loss"])
plt.plot(epoch_range, history.history["val_loss"])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()

#### The above model has accuracy more than the val_accuracy and it is overfitting. So adding Maxpool and increasing the Epochs so that the both accuracy are close enough.

**Adding Maxpool and Increasing Epochs**

In [None]:
epochs = 50
model = Sequential()

# Layer 1
model.add(Conv1D(32, 2, activation="relu", input_shape=x_train[0].shape))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.2))

# Layer 2
model.add(Conv1D(64, 2, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.5))

# Layer 3
model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))

# Layer 4
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(
    optimizer=Adam(lr=0.0001), loss="binary_crossentropy", metrics=["accuracy"]
)

In [None]:
history = model.fit(
    x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=1
)

In [None]:
epoch_range = range(1, epochs + 1)

In [None]:
# Plot training and validation accuracy values
# sparse_categorical_accuracy vs val_sparse_categorical_accuracy
plt.plot(epoch_range, history.history["accuracy"])
plt.plot(epoch_range, history.history["val_accuracy"])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()

In [None]:
# Plot training and validation loss values
# Loss vs Val_Loss
plt.plot(epoch_range, history.history["loss"])
plt.plot(epoch_range, history.history["val_loss"])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()