In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
credit_card_data = pd.read_csv('creditcard.csv')

In [3]:
credit_card_data.head(3)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0


In [4]:
shuffled_data = credit_card_data.sample(frac=1)

In [5]:
one_hot_data = pd.get_dummies(shuffled_data,columns = ['Class'])

In [6]:
one_hot_data.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V22,V23,V24,V25,V26,V27,V28,Amount,Class_0,Class_1
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.176458e-15,3.3967e-16,-1.399545e-15,2.094553e-15,1.00686e-15,1.495995e-15,-5.619836e-16,1.150612e-16,-2.411694e-15,...,-3.50772e-16,2.628794e-16,4.473191e-15,5.146818e-16,1.6861e-15,-3.663771e-16,-1.221652e-16,88.349619,0.998273,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,1.0,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,1.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,1.0,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0,1.0


In [7]:
normalized_data = (one_hot_data-one_hot_data.min())/ (one_hot_data.max()) 

In [8]:
df_X = normalized_data.drop(['Class_0','Class_1'],axis =1)
df_y= normalized_data[['Class_0','Class_1']]

In [9]:
ar_X, ar_y = np.asarray(df_X.values, dtype = float), np.asarray(df_y.values,dtype = float)
train_size = int(0.8*len(ar_X))
raw_X_train, raw_y_train = (ar_X[:train_size],ar_y[:train_size])
raw_X_test,raw_y_test = (ar_X[train_size:],ar_y[train_size:])

In [10]:
count_legit, count_fraud = np.unique(credit_card_data['Class'], return_counts = True)[1]

In [11]:
count_fraud

492

In [12]:
fraud_ratio = count_fraud / (count_legit+count_fraud)

In [13]:
fraud_ratio

0.001727485630620034

In [14]:
weighting = 1/fraud_ratio

In [15]:
raw_y_train[:,1] = raw_y_train[:,1]*weighting #logic_weighting

In [16]:
input_dimensions = ar_X.shape[1]

In [17]:
input_dimensions

30

In [18]:
output_dimensions = ar_y.shape[1]

In [19]:
num_layer_1_cells = 100
num_layer_2_cells = 150

In [20]:
X_train_node = tf.placeholder(tf.float64,[None, input_dimensions],name='X_train')
y_train_node = tf.placeholder(tf.float64,[None,output_dimensions], name='y_train')

In [21]:
X_test_node = tf.constant(raw_X_test,name='X_test')
y_test_node = tf.constant(raw_y_test, name='y_test')

In [22]:
weight_1_node = tf.Variable(tf.zeros([input_dimensions,num_layer_1_cells],dtype=tf.float64),name = 'weight_1')
biases_1_node = tf.Variable(tf.zeros([num_layer_1_cells],dtype=tf.float64),name = 'biases_1')

In [23]:
weight_2_node = tf.Variable(tf.zeros([num_layer_1_cells,num_layer_2_cells],dtype=tf.float64),name = 'weight_2')
bias_2_node = tf.Variable(tf.zeros([num_layer_2_cells],dtype=tf.float64),name='biases_2')

In [24]:
weight_3_node = tf.Variable(tf.zeros([num_layer_2_cells,output_dimensions],dtype=tf.float64),name='weight_3')
bias_3_node = tf.Variable(tf.zeros([output_dimensions],dtype=tf.float64),name = 'biases_3')

In [25]:
def network(input_tensor):
    layer_1 = tf.nn.sigmoid(tf.matmul(input_tensor,weight_1_node) + biases_1_node)
    layer_2 = tf.nn.dropout(tf.nn.sigmoid(tf.matmul(layer_1, weight_2_node)+bias_2_node), 0.85)
    layer_3 = tf.nn.softmax(tf.matmul(layer_2,weight_3_node)+ bias_3_node)
    return layer_3

In [26]:
y_train_prediction = network(X_train_node)
y_test_prediction = network(X_test_node)

In [27]:
cross_entropy = tf.losses.softmax_cross_entropy(y_train_node, y_train_prediction)

In [28]:
optimizer = tf.train.AdamOptimizer(0.005).minimize(cross_entropy)

In [36]:
def calculate_accuracy(actual,predicted):
    actual = np.argmax(actual,1)
    predicted = np.argmax(predicted,1)
    return (100*np.sum(np.equal(predicted,actual))/ predicted.shape[0])

In [40]:
num_epochs = 100

import time

with tf.Session() as session:
    tf.global_variables_initializer().run()
    for epoch in range(num_epochs):
        
        start_time = time.time()
        _, cross_entropy_score = session.run([optimizer,cross_entropy],
                                            feed_dict = {X_train_node: raw_X_train, y_train_node:raw_y_train})
        if epoch % 10 == 0:
            timer = time.time() - start_time
            print("epoch: {}".format(epoch),'Current Loss: {0:.4f}'.format(cross_entropy_score),'Elapsed Time: {0:.2f} seconds'.format(timer))
            
            final_y_test = y_test_node.eval()
            final_y_test_prediction = y_test_prediction.eval()
            final_accuracy = calculate_accuracy(final_y_test, final_y_test_prediction)
            print("current accuracy: {:.2f}%".format(final_accuracy))
    final_y_test = y_test_node.eval()
    final_y_test_prediction = y_test_prediction.eval()
    final_accuracy = calculate_accuracy(final_y_test, final_y_test_prediction)
    print("Final accuracy: {:.2f}%".format(final_accuracy))

final_fraud_y_test = final_y_test[final_y_test[:, 1]==1]
final_fraud_y_test_prediction = final_y_test_prediction[final_y_test[:, 1]==1]
final_fraud_accuracy = calculate_accuracy(final_fraud_y_test, final_fraud_y_test_prediction)
print('Final fraud spcific accuracy: {0:.2f}%'.format(final_fraud_accuracy))
        

epoch: 0 Current Loss: 1.3840 Elapsed Time: 0.79 seconds
current accuracy: 0.22%
epoch: 10 Current Loss: 1.3720 Elapsed Time: 0.67 seconds
current accuracy: 83.45%
epoch: 20 Current Loss: 1.2374 Elapsed Time: 0.67 seconds
current accuracy: 95.61%
epoch: 30 Current Loss: 1.0084 Elapsed Time: 0.67 seconds
current accuracy: 99.83%
epoch: 40 Current Loss: 0.9061 Elapsed Time: 0.67 seconds
current accuracy: 99.87%
epoch: 50 Current Loss: 0.8641 Elapsed Time: 0.67 seconds
current accuracy: 99.91%
epoch: 60 Current Loss: 0.8465 Elapsed Time: 0.67 seconds
current accuracy: 99.93%
epoch: 70 Current Loss: 0.8378 Elapsed Time: 0.67 seconds
current accuracy: 99.91%
epoch: 80 Current Loss: 0.8287 Elapsed Time: 0.67 seconds
current accuracy: 99.89%
epoch: 90 Current Loss: 0.8169 Elapsed Time: 0.67 seconds
current accuracy: 99.88%
Final accuracy: 99.82%
Final fraud spcific accuracy: 84.85%
