# Import Dependencies

In [1]:
# load dependencies
import pickle 
import pandas as pd 
from tensorflow.keras.models import load_model

2023-07-20 01:13:51.283531: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load Dataset

In [2]:
# read CSV dataset
data = pd.read_csv('data/CreditCardFraudsTest.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,30448.0,-0.317055,1.187052,-0.331484,0.831914,0.092808,-0.516894,0.904622,0.250776,-1.095719,...,0.266655,0.71418,0.20815,0.025838,-0.608524,-0.415467,0.285716,0.16301,89.99,0
1,27392.0,-0.304378,0.734368,1.295802,2.462909,0.51043,0.156927,0.546101,-0.209375,-0.232114,...,0.040153,0.477963,0.097424,-0.114353,-1.019841,-0.076944,-0.16506,0.05123,7.56,0
2,30386.0,-2.336843,-2.243744,2.029424,-2.898203,0.856754,-0.742993,-0.763299,0.568315,1.752367,...,0.522294,0.941205,0.240249,-0.290065,0.663043,-0.733687,0.02507,0.096691,144.98,0
3,26966.0,1.214522,0.007067,-0.330466,0.890385,0.669037,1.120938,-0.105341,0.283448,0.266645,...,-0.163848,-0.334281,-0.291694,-1.710519,0.831878,-0.222015,0.02588,-0.010782,23.15,0
4,26409.0,1.181833,0.160211,0.634221,0.924468,-0.399348,-0.253618,-0.224235,0.095715,0.043724,...,-0.166071,-0.559428,0.062116,-0.055607,0.291259,-0.618746,0.027904,0.020755,9.99,0


In [3]:
# drop actual results from the dataset
data_test = data.drop('Class', axis = 1)
data_test.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,30448.0,-0.317055,1.187052,-0.331484,0.831914,0.092808,-0.516894,0.904622,0.250776,-1.095719,...,0.202401,0.266655,0.71418,0.20815,0.025838,-0.608524,-0.415467,0.285716,0.16301,89.99
1,27392.0,-0.304378,0.734368,1.295802,2.462909,0.51043,0.156927,0.546101,-0.209375,-0.232114,...,-0.119363,0.040153,0.477963,0.097424,-0.114353,-1.019841,-0.076944,-0.16506,0.05123,7.56
2,30386.0,-2.336843,-2.243744,2.029424,-2.898203,0.856754,-0.742993,-0.763299,0.568315,1.752367,...,0.542567,0.522294,0.941205,0.240249,-0.290065,0.663043,-0.733687,0.02507,0.096691,144.98
3,26966.0,1.214522,0.007067,-0.330466,0.890385,0.669037,1.120938,-0.105341,0.283448,0.266645,...,-0.14332,-0.163848,-0.334281,-0.291694,-1.710519,0.831878,-0.222015,0.02588,-0.010782,23.15
4,26409.0,1.181833,0.160211,0.634221,0.924468,-0.399348,-0.253618,-0.224235,0.095715,0.043724,...,-0.137687,-0.166071,-0.559428,0.062116,-0.055607,0.291259,-0.618746,0.027904,0.020755,9.99


# Scaling Data

In [4]:
# load standard scaler from pickle
with open('data/StandardScaler.pkl','rb') as f:
    scaler = pickle.load(f)

In [5]:
# apply standard scaling on the data
data_test = scaler.fit_transform(data_test)

# Convert Data to 3-D for Neural Network

In [6]:
# reshape data to 3-D for prediction
data_test = data_test.reshape(data_test.shape[0], data_test.shape[1], 1)
data_test.shape

(5092, 30, 1)

# Load Model

In [7]:
# load model 
model = load_model('data/fraud_detection_model.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 29, 32)            96        
                                                                 
 batch_normalization (BatchN  (None, 29, 32)           128       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 29, 32)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 28, 64)            4160      
                                                                 
 batch_normalization_1 (Batc  (None, 28, 64)           256       
 hNormalization)                                                 
                                                                 
 dropout_1 (Dropout)         (None, 28, 64)            0

# Make Predictions

In [8]:
# make predictions
predictions = model.predict(data_test)
# convert predictions to binary
predictions = predictions.astype(int)



In [9]:
# check model performance
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print('Classification Report:\n')
print(classification_report(data['Class'], predictions), '\n')
print('\nConfusion Matrix:\n')
print(confusion_matrix(data['Class'], predictions))
print('\nAccuracy:\n')
accuracy_score(data['Class'], predictions)

Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.85      0.92      5000
           1       0.11      1.00      0.19        92

    accuracy                           0.85      5092
   macro avg       0.55      0.92      0.55      5092
weighted avg       0.98      0.85      0.90      5092
 


Confusion Matrix:

[[4229  771]
 [   0   92]]

Accuracy:



0.848586017282011

# Save Predictions in CSV 

In [10]:
# create predictions column in original dataframe
data['Class'] = predictions

# check first few rows 
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,30448.0,-0.317055,1.187052,-0.331484,0.831914,0.092808,-0.516894,0.904622,0.250776,-1.095719,...,0.266655,0.71418,0.20815,0.025838,-0.608524,-0.415467,0.285716,0.16301,89.99,0
1,27392.0,-0.304378,0.734368,1.295802,2.462909,0.51043,0.156927,0.546101,-0.209375,-0.232114,...,0.040153,0.477963,0.097424,-0.114353,-1.019841,-0.076944,-0.16506,0.05123,7.56,1
2,30386.0,-2.336843,-2.243744,2.029424,-2.898203,0.856754,-0.742993,-0.763299,0.568315,1.752367,...,0.522294,0.941205,0.240249,-0.290065,0.663043,-0.733687,0.02507,0.096691,144.98,0
3,26966.0,1.214522,0.007067,-0.330466,0.890385,0.669037,1.120938,-0.105341,0.283448,0.266645,...,-0.163848,-0.334281,-0.291694,-1.710519,0.831878,-0.222015,0.02588,-0.010782,23.15,0
4,26409.0,1.181833,0.160211,0.634221,0.924468,-0.399348,-0.253618,-0.224235,0.095715,0.043724,...,-0.166071,-0.559428,0.062116,-0.055607,0.291259,-0.618746,0.027904,0.020755,9.99,0


In [12]:
# convert data to dict
data_to_dict = data.to_dict(orient = 'records')

In [13]:
# json filepath
jsonFilePath = 'path-to/fraud_detection_predictions.json'

# store data in JSON
from csv_jsonl import JSONLinesDictWriter
with open(jsonFilePath, "w", encoding="utf-8") as _fh:
    writer = JSONLinesDictWriter(_fh)
    writer.writerows(data_to_dict)