In [133]:
import pandas as pd

In [134]:
import numpy as np

In [135]:
credit_card_data = pd.read_csv('creditcard.csv') 

In [136]:
credit_card_data.shape

(284807, 31)

In [137]:
credit_card_data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [138]:
credit_card_data.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [139]:
shuffled_data = credit_card_data.sample(frac=1)

In [140]:
one_hot_data = pd.get_dummies(shuffled_data, columns=['Class'])

In [142]:
normalized_data = (one_hot_data - one_hot_data.min()) / (one_hot_data.max()- one_hot_data.min())

In [143]:
df_X = normalized_data.drop(['Class_0', 'Class_1'], axis=1)
df_y = normalized_data[['Class_0', 'Class_1']]

In [144]:
ar_X, ar_y = np.asarray(df_X.values, dtype='float32'), np.asarray(df_y.values, dtype='float32')

In [145]:
train_size = int(0.8*len(ar_X))

In [146]:
raw_X_train, raw_y_train = ar_X[:train_size], ar_y[:train_size]

In [147]:
raw_X_test, raw_y_test = ar_X[train_size:], ar_y[train_size:]

In [148]:
count_legit, count_fraud = np.unique(credit_card_data['Class'], return_counts=True)[1]

In [149]:
np.unique(credit_card_data['Class'], return_counts=True)

(array([0, 1], dtype=int64), array([284315,    492], dtype=int64))

In [175]:
fraud_ratio = float(count_fraud / (count_legit + count_fraud))

In [176]:
fraud_ratio

0.001727485630620034

In [152]:
weighting = 1 / fraud_ratio

In [155]:
raw_y_train[:, 1] = raw_y_train[:, 1] * weighting

In [174]:
raw_y_train[:, 1].sum()

224025.06

In [156]:
import tensorflow as tf

In [157]:
input_dimensions = ar_X.shape[1]
output_dimensions = ar_y.shape[1]

In [158]:
num_layer_1_cells = 100
num_layer_2_cells = 150

In [159]:
X_train_node = tf.placeholder(tf.float32, [None, input_dimensions], name='X_train')

In [160]:
y_train_node = tf.placeholder(tf.float32, [None, output_dimensions], name='y_train')

In [161]:
X_test_node = tf.constant(raw_X_test, name = 'X_test')
y_test_node = tf.constant(raw_y_test, name = 'y_test')

In [162]:
weight_1_node = tf.Variable(tf.zeros([input_dimensions, num_layer_1_cells]), name = 'weight_1')
biases_1_node = tf.Variable(tf.zeros([num_layer_1_cells]), name= 'biases_1')

In [163]:
weight_2_node = tf.Variable(tf.zeros([num_layer_1_cells, num_layer_2_cells]), name = 'weight_2')
biases_2_node = tf.Variable(tf.zeros([num_layer_2_cells]), name= 'biases_2')

In [164]:
weight_3_node = tf.Variable(tf.zeros([num_layer_2_cells, output_dimensions]), name = 'weight_3')
biases_3_node = tf.Variable(tf.zeros([output_dimensions]), name= 'biases_3')

In [165]:
def network(input_tensor):
    layer1 = tf.nn.sigmoid(tf.matmul(input_tensor, weight_1_node) + biases_1_node)
    layer2 = tf.nn.dropout(tf.nn.sigmoid(tf.matmul(layer1, weight_2_node) + biases_2_node), 0.85)
    layer3 = tf.nn.softmax(tf.matmul(layer2, weight_3_node) + biases_3_node)
    return layer3

In [166]:
y_train_prediction = network(X_train_node)

In [167]:
y_test_prediction = network(X_test_node)

In [168]:
cross_entropy = tf.losses.softmax_cross_entropy(y_train_node, y_train_prediction)

In [169]:
optimizer = tf.train.AdamOptimizer(0.005).minimize(cross_entropy)

In [170]:
def accuracy(actual, predicted):
    actual = np.argmax(actual, 1)
    predicted = np.argmax(predicted, 1)
    return (100 * np.sum(np.equal(predicted, actual)) / predicted.shape[0])

In [171]:
num_epochs = 100

In [172]:
import time

In [173]:
with tf.Session() as session:
    tf.global_variables_initializer().run()
    for epoch in range(num_epochs):
        start_time = time.time()
        _ , cross_entropy_score = session.run([optimizer, cross_entropy], feed_dict = {X_train_node: raw_X_train, 
                                                                                      y_train_node: raw_y_train})
        
        if epoch % 10 ==0:
            timer = time.time() - start_time
            print('Epoch: {}'.format(epoch), 'Current loss:{0:.4f}'.format(cross_entropy_score),
                 'Elapsed time: {0:.2f} seconds'.format(timer))
            
            final_y_test = y_test_node.eval()
            final_y_test_prediction = y_test_prediction.eval()
            final_accuracy = accuracy(final_y_test, final_y_test_prediction)
            print('Current accuracy: {0:.2f}%'.format(final_accuracy))

Epoch: 0 Current loss:1.3735 Elapsed time: 0.21 seconds
Current accuracy: 99.82%
Epoch: 10 Current loss:1.3725 Elapsed time: 0.15 seconds
Current accuracy: 1.79%
Epoch: 20 Current loss:1.3388 Elapsed time: 0.15 seconds
Current accuracy: 65.12%
Epoch: 30 Current loss:1.1882 Elapsed time: 0.15 seconds
Current accuracy: 97.30%
Epoch: 40 Current loss:1.0002 Elapsed time: 0.15 seconds
Current accuracy: 98.28%
Epoch: 50 Current loss:0.8884 Elapsed time: 0.15 seconds
Current accuracy: 99.27%
Epoch: 60 Current loss:0.8361 Elapsed time: 0.15 seconds
Current accuracy: 99.78%
Epoch: 70 Current loss:0.8150 Elapsed time: 0.15 seconds
Current accuracy: 99.82%
Epoch: 80 Current loss:0.7965 Elapsed time: 0.15 seconds
Current accuracy: 99.77%
Epoch: 90 Current loss:0.7905 Elapsed time: 0.15 seconds
Current accuracy: 99.83%
