## Credit Card Fraud Detection using TensorFlow
Anonymized credit card transactions labeled as fraudulent or genuine<br>
Using Keras from TensorFlow applied to dataset:<br>
Link: https://www.kaggle.com/mlg-ulb/creditcardfraud

In [139]:
debug = True
_verbose=1 if debug else 0
def printd(input):
    if debug:
        print(input)

In [140]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

1.9.0


In [141]:
import pandas as pd 
data = pd.read_csv('inputs/creditcard.csv', sep=',')

In [142]:
printd(data.shape)
printd(data.head())

(284807, 31)
   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...         V21       V22       V23       V24  \
0  0.098698  0.363787  ...   -0.018307  0.277838 -0.110474  0.066928   
1  0.085102 -0.255425  ...   -0.225775 -0.638672  0.101288 -0.339846   
2  0.247676 -1.514654  ...    0.247998  0.771679  0.909412 -0.689281   
3  0.377436 -1.387024  ...   -0.108300  0.005274 -0.190321 -1.175575   
4 -0.270533  0.817739  ...   -0.009431  0.798278 -0.137458  0.141267   

        V25       V26       V27       V28  Amount  Class  
0  0.128

In [143]:
split = 0.8
msk = np.random.rand(len(data)) < split

# Shuffle the entire data set (applies to both train & test)
data = data.sample(frac=1).reset_index(drop=True)

train_labels = data.loc[msk, data.columns =='Class']
train_data  = data.loc[msk, data.columns !='Class']

test_labels = data.loc[~msk, data.columns =='Class']
test_data  = data.loc[~msk, data.columns !='Class']

printd(test_data.head(1))
printd(test_labels.head(1))
printd(train_data.head(1))
printd(train_labels.head(1))

printd(msk[0:5])
printd(train_data.shape)
printd(test_data.shape)
printd(test_data.shape[0]+train_data.shape[0])

       Time        V1        V2        V3        V4        V5        V6  \
3  147373.0  2.102372 -1.197421 -0.632337 -0.932112 -1.143515 -0.349231   

        V7        V8       V9   ...         V20       V21       V22       V23  \
3 -1.15769  0.085457  0.11723   ...   -0.055009  0.346282  0.936178  0.072403   

        V24       V25       V26       V27       V28  Amount  
3 -0.406423 -0.185114 -0.080764 -0.007377 -0.061077   34.99  

[1 rows x 30 columns]
   Class
3      0
      Time        V1       V2        V3        V4        V5        V6  \
0  76676.0  1.262706  0.25327  0.512494  0.619236 -0.478039 -0.991197   

         V7        V8        V9   ...         V20       V21       V22  \
0  0.059961 -0.204712  0.046427   ...   -0.095786 -0.258461 -0.745136   

        V23       V24       V25       V26       V27      V28  Amount  
0  0.137002  0.374501  0.218209  0.095001 -0.028896  0.01877    0.69  

[1 rows x 30 columns]
   Class
0      0
[ True  True  True False  True]
(228257, 30)

In [144]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

#printd(mean)
#printd(std)

train_data = (train_data - mean) / std
test_data = (test_data - mean) / std

printd(train_data.head(1))
printd(test_data.head(1))

       Time        V1        V2        V3        V4        V5       V6  \
0 -0.380496  0.647223  0.153251  0.338101  0.437869 -0.347107 -0.74156   

         V7        V8        V9    ...          V20      V21       V22  \
0  0.047963 -0.169279  0.042692    ...    -0.126433 -0.34791 -1.024272   

        V23       V24       V25       V26       V27       V28    Amount  
0  0.220635  0.618038  0.419251  0.196289 -0.072088  0.057351 -0.346073  

[1 rows x 30 columns]
       Time        V1        V2        V3        V4        V5        V6  \
3  1.108026  1.076935 -0.723627 -0.420055 -0.657492 -0.828461 -0.260247   

         V7        V8        V9   ...          V20       V21       V22  \
3 -0.936188  0.071321  0.107276   ...    -0.073396  0.466349  1.289885   

        V23       V24       V25       V26       V27       V28   Amount  
3  0.116524 -0.671827 -0.354752 -0.168053 -0.018537 -0.186097 -0.21079  

[1 rows x 30 columns]


In [145]:
def build_model():
  model = keras.Sequential([
    keras.layers.Dense(60, activation=tf.nn.relu,
                       input_shape=(train_data.shape[1],)),
    #keras.layers.Dense(60, activation=tf.nn.relu),
    keras.layers.Dense(2, activation=tf.nn.softmax)
])
    
  model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

  return model

In [146]:
model = build_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_34 (Dense)             (None, 60)                1860      
_________________________________________________________________
dense_35 (Dense)             (None, 2)                 122       
Total params: 1,982
Trainable params: 1,982
Non-trainable params: 0
_________________________________________________________________


In [147]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

history = model.fit(train_data, train_labels, epochs=500,
                    validation_split=0.2, verbose=_verbose,
                    callbacks=[early_stop])

Train on 182605 samples, validate on 45652 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500


In [148]:
[loss, mae] = model.evaluate(test_data, test_labels, verbose=_verbose)

print("Mean Abs Error: {:f}".format(mae))
print("Loss: {:f}".format(loss))


test_predictions = model.predict(test_data)

frauds = np.where(test_labels[:]==1)[0]
print(frauds[0:5])
print(test_labels.values[frauds[0:5]])
print(np.around(test_predictions[frauds[0:5]]))

Mean Abs Error: 0.999346
Loss: 0.003919
[ 693 1777 2748 2750 2761]
[[1]
 [1]
 [1]
 [1]
 [1]]
[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In [149]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#@title MIT License
#
# Copyright (c) 2017 François Chollet
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.