In [1]:
import numpy as np

import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from model import Sigmoid, Linear, SequentialNN
from utils import binary_cross_entropy

In [2]:
df = pd.read_csv(r"./data/fraud_detection_bank_dataset.csv")
df.drop(['Unnamed: 0'], axis=1, inplace = True)

In [3]:
df.head()

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,...,col_103,col_104,col_105,col_106,col_107,col_108,col_109,col_110,col_111,targets
0,9,1354,0,18,0,1,7,9,0,0,...,0,0,0,1,1,0,0,0,49,1
1,0,239,0,1,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,55,1
2,0,260,0,4,0,3,6,0,0,0,...,0,0,0,1,1,0,0,0,56,1
3,17,682,0,1,0,0,8,17,0,0,...,0,1,0,1,1,0,0,0,65,1
4,1,540,0,2,0,1,7,1,0,0,...,0,0,0,1,1,0,0,0,175,1


In [4]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
y = y.reshape((len(y), 1))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (16374, 112)
X_test shape: (4094, 112)
y_train shape: (16374, 1)
y_test shape: (4094, 1)


In [6]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
y_train[:5]

array([[0],
       [1],
       [1],
       [0],
       [0]], dtype=int64)

In [8]:
X_train = X_train.T
y_train = y_train.T 
X_test = X_test.T
y_test = y_test.T 

In [9]:
X_train.shape

(112, 16374)

In [10]:
# define training constants
learning_rate = 1
number_of_epochs = 10000

np.random.seed(100) 

model = SequentialNN()

In [11]:
# layer 1
Z1 = Linear(input_shape=X_train.shape, n_out=5, method='xavier')
print(Z1.parameters["w"].shape, Z1.parameters["b"].shape)
S1 = Sigmoid(Z1.z.shape)
model.add(Z1)
model.add(S1)

(5, 112) (5, 1)


In [12]:
# layer 2
Z2= Linear(input_shape=S1.sig.shape, n_out= 3, method='norm_xavier')
S2= Sigmoid(Z2.z.shape)
model.add(Z2)
model.add(S2)

In [13]:
# output layer
Z3= Linear(input_shape=S2.sig.shape, n_out=1, method='norm_xavier')
S3= Sigmoid(Z3.z.shape)
model.add(Z3)
model.add(S3)

In [14]:
model.compile(cost_func=binary_cross_entropy, epochs=number_of_epochs, learning_rate=learning_rate)

In [15]:
model.fit(X_train, y_train)

epoch 0: cost = 1.1351353195619127
epoch 100: cost = 0.28166114526606906
epoch 200: cost = 0.2521446244351183
epoch 300: cost = 0.23852494128799703
epoch 400: cost = 0.22919050724304516
epoch 500: cost = 0.22262486130171227
epoch 600: cost = 0.21824354827535875
epoch 700: cost = 0.21466186769841428
epoch 800: cost = 0.21153067279987042
epoch 900: cost = 0.20831073135334927
epoch 1000: cost = 0.20539570944739058
epoch 1100: cost = 0.20239172698130525
epoch 1200: cost = 0.19937545837357298
epoch 1300: cost = 0.19642975857560455
epoch 1400: cost = 0.1936198840693295
epoch 1500: cost = 0.191311119984988
epoch 1600: cost = 0.1891420581160831
epoch 1700: cost = 0.18755400153330049
epoch 1800: cost = 0.18624709373939194
epoch 1900: cost = 0.18511275507358924
epoch 2000: cost = 0.18409962469050853
epoch 2100: cost = 0.1831662676397242
epoch 2200: cost = 0.18228202509712893
epoch 2300: cost = 0.1814339101193856
epoch 2400: cost = 0.1806183136326572
epoch 2500: cost = 0.179832767496695
epoch 260

In [23]:
p = model.predict(X_test)
p = p.flatten()
y_test = y_test.flatten()
accuracy = np.sum((p == y_test))*100/len(list(y_test))
accuracy

91.40205178309722