In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

2023-03-21 19:22:20.380107: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Read the credit_card_transactions.csv file from the Resources folder into a Pandas DataFrame
cc_transactions_df = pd.read_csv(
    Path("credit_card_transactions.csv")
)

# Review the DataFrame
cc_transactions_df.head()

Unnamed: 0,dateMonth,dateWeekday,dateQuarter,operationType,amount,oldbalanceOrig,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud
0,1,1,1,4,736.76,1120.0,383.24,0.0,0.0,0
1,1,1,1,2,256226.2,0.0,0.0,518041.64,1305828.01,0
2,1,1,1,5,310027.49,61594.0,0.0,10089.0,1157398.83,0
3,1,1,1,1,76695.35,9064440.33,9141135.68,880045.33,206147.08,0
4,1,1,1,4,4761.57,0.0,0.0,0.0,0.0,0


In [19]:
# we know the this is a binary problem because y (isfraud col) is either 0 or 1
cc_transactions_df["isFraud"].value_counts()
#cc_transactions_df["isFraud"].unique()

0    99884
1      116
Name: isFraud, dtype: int64

In [3]:
# Define the target set by selecting the isFraud column
y = cc_transactions_df["isFraud"]

# Display a sample of y
y[:5]

0    0
1    0
2    0
3    0
4    0
Name: isFraud, dtype: int64

In [5]:
# Define features set X by selecting all columns but the isFraud
X = cc_transactions_df.drop(columns=["isFraud"])

# Display the features DataFrame
X

Unnamed: 0,dateMonth,dateWeekday,dateQuarter,operationType,amount,oldbalanceOrig,newbalanceOrig,oldbalanceDest,newbalanceDest
0,1,1,1,4,736.76,1120.00,383.24,0.00,0.00
1,1,1,1,2,256226.20,0.00,0.00,518041.64,1305828.01
2,1,1,1,5,310027.49,61594.00,0.00,10089.00,1157398.83
3,1,1,1,1,76695.35,9064440.33,9141135.68,880045.33,206147.08
4,1,1,1,4,4761.57,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...
99995,12,1,4,1,109534.19,18292168.76,18401702.96,403737.01,294202.82
99996,12,1,4,1,103920.74,5486901.65,5590822.39,1267095.24,2239899.53
99997,12,1,4,5,1298677.95,0.00,0.00,280579.46,1579257.41
99998,12,1,4,1,82450.63,4135263.42,4217714.05,2289239.02,2436585.65


In [7]:
# Create training and testing datasets using train_teat_split
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [8]:
# Create the StandardScaler instance
X_scaler = StandardScaler()

In [9]:
# Fit the scaler to the features training dataset
X_scaler.fit(X_train)

StandardScaler()

In [10]:
# Scale both the training and testing data from the features dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Create and Evaluate a Deep Neural Network Model
# Step 1: Create a deep neural network model with the following structure:
# * 9 inputs
# * First hidden layer with 18 neurons
# * Second hidden layer with 8 neurons
# * Output layer with a single output
# * Hidden layers use the ReLU activation function, and output layer uses the sigmoid activation function

In [13]:
# Define the the number of inputs to the model
number_inputs = 9

# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 = 18

# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 = 8

# Create the Sequential model instance
nn = Sequential()

# Add the first Dense layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn.add(Dense(units=18, input_dim=9, activation="relu"))
#units - number of hidden nodes in layer 1
#input dim is the number of inputs or number of columns is X df

# Add the second Dense layer specifying the number of hidden nodes and the activation function
nn.add(Dense(units=8, activation="relu"))
#units - number of hidden nodes in layer 2

# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation="sigmoid"))
#units is number of output units
#here we use sigmoid because its a 0 and 1 problem and we want prob between the two numbers

In [14]:
# Display the Sequential model summary
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 18)                180       
                                                                 
 dense_4 (Dense)             (None, 8)                 152       
                                                                 
 dense_5 (Dense)             (None, 1)                 9         
                                                                 
Total params: 341
Trainable params: 341
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
#since its a 0 and 1 classification problem, we will look at loss as binary_crossentropy and the metric we want is accuracy

In [16]:
# Fit the model using 100 epochs and the training data
model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [17]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

782/782 - 1s - loss: 0.0056 - accuracy: 0.9989 - 599ms/epoch - 766us/step
Loss: 0.005637311842292547, Accuracy: 0.9988800287246704
