## Credit Card Fraud Detection using TensorFlow
Anonymized credit card transactions labeled as fraudulent or genuine<br>
Link: https://www.kaggle.com/mlg-ulb/creditcardfraud

In [None]:
debug = True
def printd(input):
    if debug:
        print(input)

In [67]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

1.9.0


In [104]:
import pandas as pd 
data = pd.read_csv('inputs/creditcard.csv', sep=',')

(284807, 31)


In [134]:
printd(data.shape)
printd(data.head())

(284807, 31)
   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...         V21       V22       V23       V24  \
0  0.098698  0.363787  ...   -0.018307  0.277838 -0.110474  0.066928   
1  0.085102 -0.255425  ...   -0.225775 -0.638672  0.101288 -0.339846   
2  0.247676 -1.514654  ...    0.247998  0.771679  0.909412 -0.689281   
3  0.377436 -1.387024  ...   -0.108300  0.005274 -0.190321 -1.175575   
4 -0.270533  0.817739  ...   -0.009431  0.798278 -0.137458  0.141267   

        V25       V26       V27       V28  Amount  Class  
0  0.128

In [163]:
split = 0.8
msk = np.random.rand(len(data)) < split

# Shuffle the entire data set (applies to both train & test)
data = data.sample(frac=1).reset_index(drop=True)

train_labels = data.loc[msk, data.columns =='Class']
train_data  = data.loc[msk, data.columns !='Class']

test_labels = data.loc[~msk, data.columns =='Class']
test_data  = data.loc[~msk, data.columns !='Class']

printd(test_data.head(1))
printd(test_labels.head(1))
printd(train_data.head(1))
printd(train_labels.head(1))

printd(msk[0:5])
printd(train_data.shape)
printd(test_data.shape)
printd(test_data.shape[0]+train_data.shape[0])

       Time        V1        V2        V3        V4        V5        V6  \
15  60636.0 -0.882404  0.718426  0.620632 -1.588153 -0.130549 -0.139939   

          V7        V8       V9   ...        V20       V21       V22     V23  \
15  0.228585  0.476849  0.36932   ...    0.03754 -0.194147 -0.524842 -0.1263   

         V24       V25       V26       V27       V28  Amount  
15 -0.861773 -0.141352  0.763137 -0.002341 -0.204782    40.0  

[1 rows x 30 columns]
    Class
15      0
      Time        V1        V2        V3        V4        V5       V6  \
0  17666.0  0.844047 -1.388519  1.716528 -0.665041 -2.061028  0.27159   

         V7        V8        V9   ...         V20       V21       V22  \
0 -1.330486  0.256855  4.133627   ...    0.164628  0.083829  0.769868   

       V23       V24       V25       V26       V27       V28  Amount  
0 -0.28961  0.606923  0.461643  0.072626  0.052173  0.041751  154.19  

[1 rows x 30 columns]
   Class
0      0
[ True  True  True  True  True]
(227893, 3

In [168]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

#printd(mean)
#printd(std)

train_data = (train_data - mean) / std
test_data = (test_data - mean) / std

printd(train_data.head(1))
printd(test_data.head(1))

       Time        V1        V2        V3       V4        V5        V6  \
0 -1.622871  0.431068 -0.839931  1.136544 -0.46998 -1.506897  0.205155   

         V7        V8        V9    ...          V20       V21      V22  \
0 -1.095893  0.215119  3.766789    ...     0.215974  0.115893  1.06321   

        V23       V24       V25       V26       V27       V28    Amount  
0 -0.460617  1.000676  0.887967  0.149242  0.130989  0.128675  0.268037  

[1 rows x 30 columns]
        Time        V1        V2        V3        V4        V5       V6  \
15 -0.718516 -0.452667  0.437227  0.409463 -1.123281 -0.095639 -0.10478   

         V7        V8        V9    ...          V20     V21       V22  \
15  0.18768  0.400569  0.335393    ...     0.049714 -0.2675 -0.724246   

         V23       V24       V25       V26       V27       V28    Amount  
15 -0.201209 -1.422116 -0.268948  1.580045 -0.005139 -0.630277 -0.196879  

[1 rows x 30 columns]


In [169]:
def build_model():
  model = keras.Sequential([
    keras.layers.Dense(64, activation=tf.nn.relu,
                       input_shape=(train_data.shape[1],)),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(1)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae'])
  return model

In [170]:
def plot_history(history):
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [1000$]')
  plt.plot(history.epoch, np.array(history.history['mean_absolute_error']),
           label='Train Loss')
  plt.plot(history.epoch, np.array(history.history['val_mean_absolute_error']),
           label = 'Val loss')
  plt.legend()
  plt.ylim([0,5])

In [None]:
EPOCHS = 500

model = build_model()
model.summary()

history = model.fit(train_data, train_labels, epochs=EPOCHS,
                    validation_split=0.2, verbose=0)

plot_history(history)

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

history = model.fit(train_data, train_labels, epochs=EPOCHS,
                    validation_split=0.2, verbose=0,
                    callbacks=[early_stop, PrintDot()])

plot_history(history)

[loss, mae] = model.evaluate(test_data, test_labels, verbose=0)

print("Testing set Mean Abs Error: ${:7.2f}".format(mae * 1000))

test_predictions = model.predict(test_data).flatten()

print(test_predictions)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                1984      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 6,209
Trainable params: 6,209
Non-trainable params: 0
_________________________________________________________________


In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#@title MIT License
#
# Copyright (c) 2017 François Chollet
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

In [66]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
