In [1]:
import pandas as pd
import numpy as np 
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.gridspec as gridspec
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
# from show_confusion_matrix import show_confusion_matrix 
# the above is from http://notmatthancock.github.io/2015/10/28/confusion-matrix.html

In [2]:
data = pd.read_csv("creditcard.csv")

In [4]:
data['Normal']=1-data['Class']
data['Amount_max_fraud'] = 1
data.loc[data.Amount <= 2125.87, 'Amount_max_fraud'] = 0
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V23,V24,V25,V26,V27,V28,Amount,Class,Normal,Amount_max_fraud
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0,1,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0,1,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0,1,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0,1,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0,1,0


In [5]:
train,test=train_test_split(data,test_size=0.2,random_state=0,stratify=data['Class'])# stratify the Class

In [6]:
count_train = pd.value_counts(train['Class'], sort = True).sort_index()
count_test = pd.value_counts(test['Class'], sort = True).sort_index()
print (count_train) 
'\n'  
print(count_test)

0    227451
1       394
Name: Class, dtype: int64
0    56864
1       98
Name: Class, dtype: int64


In [7]:
X_train = train.drop(['Class', 'Normal'], axis = 1)
X_test = test.drop(['Class', 'Normal'], axis = 1)

In [8]:
Y_train = train.loc[:, ['Class','Normal']]
Y_test = test.loc[:, ['Class','Normal']]

In [9]:
print(np.shape(X_train))
print(np.shape(Y_train))
print(np.shape(X_test))
print(np.shape(Y_test))

(227845, 31)
(227845, 2)
(56962, 31)
(56962, 2)


In [10]:
#Names of all of the features in X_train.
features = X_train.columns.values

for feature in features:
    mean, std = data[feature].mean(), data[feature].std()
    X_train.loc[:, feature] = (X_train[feature] - mean) / std
    X_test.loc[:, feature] = (X_test[feature] - mean) / std

In [11]:
X_train.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Amount_max_fraud
36001,-1.188904,0.532981,0.192911,0.689733,1.981815,-0.406531,-0.276187,0.026462,-0.035444,-0.293705,...,-0.326886,-0.937458,0.136643,1.130711,0.611229,-0.425034,0.004117,0.114801,-0.154644,-0.046062
12844,-1.521619,-0.850136,0.489575,1.190847,1.344345,-0.595275,0.701651,-0.666725,0.817087,1.590586,...,-0.456529,-0.704138,0.057391,0.243649,-1.015501,-1.175692,-1.476587,-0.666758,-0.285501,-0.046062
2873,-1.945388,-0.165465,0.36446,0.570702,-1.510027,0.213485,-0.939413,0.866639,-0.280399,0.975093,...,0.016637,0.486228,-0.54688,-0.240719,0.180699,-1.667318,0.568409,-0.065507,-0.349231,-0.046062
145263,-0.169324,-0.131858,0.737295,-0.386049,-0.61824,0.885698,-0.233456,0.86805,-0.135143,0.18265,...,-0.578096,-1.076417,0.030932,0.294914,-0.605466,0.200456,0.668282,-0.062513,-0.310129,-0.046062
186658,0.682026,1.093668,-0.299755,-1.277167,-0.577941,-0.018267,-0.771048,-0.122567,-0.255996,-0.791423,...,0.013771,0.029932,0.127251,-0.794025,0.045745,-0.578722,-0.074626,-0.132961,-0.193466,-0.046062


In [12]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
# fix random seed for reproducibility
np.random.seed(7)

Using TensorFlow backend.


In [13]:
model = Sequential()
model.add(Dense(64, input_dim=31, activation='relu'))
model.add(Dropout(0.9))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.9))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.9))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.9))
model.add(Dense(2, activation='softmax'))

In [15]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [44]:
epoch = 6
batch_size = 2048

model.fit(X_train, Y_train, epochs=epoch, batch_size=batch_size)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [29]:
score, acc = model.evaluate(X_test, Y_test)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 0.0264474947947
Test accuracy: 0.998279554791


In [30]:
from sklearn.metrics import confusion_matrix,classification_report

In [37]:
predictions = model.predict_classes(X_test)

In [39]:
#predictions.head()

In [42]:
#print(classification_report(Y_test[:,['Class']], predictions))