In [1]:
import pandas as pd
import numpy as np
from keras import layers
from keras.layers import Input, Dense, Activation
from keras.models import Sequential
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
import keras.backend as K
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv("creditcard.csv", header = 0)

In [3]:
Vfeatures = dataset.iloc[:,1:29].columns

In [4]:
model_features = dataset.iloc[:,1:30].columns

In [5]:
print(model_features)

Index(['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
       'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],
      dtype='object')


In [6]:
dataset["Amount"] = (dataset["Amount"] - dataset["Amount"].min()) / (dataset["Amount"].max() - dataset["Amount"].min())

In [7]:
dataset["Amount"].head(n=20)

0     0.005824
1     0.000105
2     0.014739
3     0.004807
4     0.002724
5     0.000143
6     0.000194
7     0.001588
8     0.003628
9     0.000143
10    0.000304
11    0.000389
12    0.004729
13    0.001070
14    0.002289
15    0.000622
16    0.000506
17    0.000035
18    0.001822
19    0.000195
Name: Amount, dtype: float64

In [8]:
dataset = dataset.sample(frac=1).reset_index(drop=True)
split = np.random.rand(len(dataset)) < 0.95
dataset_train = dataset[split]
dataset_test = dataset[~split]

In [9]:
train_x = dataset_train.as_matrix(columns = model_features)
train_y = dataset_train["Class"]
test_x = dataset_test.as_matrix(columns = model_features)
test_y = dataset_test["Class"]

In [10]:
print(dataset["Amount"].sum())
print(train_y.mean()*100)
print(test_y.mean()*100)

979.4259975023741
0.17261964167563032
0.17519271198318148


In [11]:
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(270537, 29)
(270537,)
(14270, 29)
(14270,)


In [17]:
train_x = train_x.T
train_y = np.reshape(train_y, (1,len(dataset_train)))
test_x = test_x.T
test_y = np.reshape(test_y, (1,len(dataset_test)))

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(270537, 29)
(1, 270537)
(14270, 29)
(1, 14270)


  return getattr(obj, method)(*args, **kwds)


In [20]:
train_x = train_x.T
test_x = test_x.T

In [21]:
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(270537, 29)
(270537, 1)
(14270, 29)
(14270, 1)


In [22]:
model = Sequential()
model.add(Dense(14, activation="relu", input_shape=(29,)))
model.add(Dense(7, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 14)                420       
_________________________________________________________________
dense_5 (Dense)              (None, 7)                 105       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 8         
Total params: 533
Trainable params: 533
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.fit(x=train_x, y=train_y, epochs = 1)

Epoch 1/1


<keras.callbacks.History at 0x5998d05c50>

In [29]:
score = model.evaluate(x = test_x, y = test_y)
print()
print ("Loss = " + str(score[0]))
print ("Test Accuracy = " + str(score[1]))

Loss = 0.00321691018017
Test Accuracy = 0.999369306237


In [30]:
type(split)

numpy.ndarray

In [33]:
test_frauds = pd.DataFrame({'Fraud':test_y[:,0]})

In [34]:
test_frauds.head()

Unnamed: 0,Fraud
0,0
1,0
2,0
3,0
4,0


In [36]:
idx = test_frauds.index[test_frauds['Fraud'] == 1]

In [45]:
test_x_frauds = test_x[idx]
test_y_frauds = test_y[idx]

In [46]:
score_frauds = model.evaluate(x = test_x_frauds, y = test_y_frauds)
print()
print ("Loss = " + str(score[0]))
print ("Test Accuracy = " + str(score[1]))


Loss = 0.00321691018017
Test Accuracy = 0.999369306237


In [49]:
idx2 = test_frauds.index[test_frauds['Fraud'] == 0]
idx2

Int64Index([    0,     1,     2,     3,     4,     5,     6,     7,     8,
                9,
            ...
            14260, 14261, 14262, 14263, 14264, 14265, 14266, 14267, 14268,
            14269],
           dtype='int64', length=14245)

In [50]:
test_x_notfrauds = test_x[idx2]
test_y_notfrauds = test_y[idx2]

In [51]:
score_frauds = model.evaluate(x = test_x_notfrauds, y = test_y_notfrauds)
print()
print ("Loss = " + str(score[0]))
print ("Test Accuracy = " + str(score[1]))

Loss = 0.00321691018017
Test Accuracy = 0.999369306237


1.0014466853498198