In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from tensorflow.keras.optimizers import Adam
import time
# Import use defined libraries
import dataUtils
import Model
import Interpretation


In [2]:
# Load Data
outputColumn = ['DatasetNameTrain','DatasetNameTest','Accuracy','Cost', 'ModelCreateRuntime','PositiveDesc','PositiveRuntime' , 'NegativeDesc','NegativeRuntime']
output = pd.DataFrame(columns=outputColumn)
datasetTrain, x_train_for_feature,datasetTest,x_test_for_feature = dataUtils.load_data('base_desenv_02.csv','base_validacao_02.csv')
X_train,y_train,X_test,y_test = dataUtils.preprocessing(datasetTrain,datasetTest)

In [3]:
# Load Models
input=X_train.shape[1]
base, mid_base=Model.create_base_model(input)
base.compile(loss='binary_crossentropy', optimizer=Adam(0.01), metrics=['acc'])
discriminator = Model.create_discriminator_model(input)
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.01), metrics=['acc'])
generator= Model.create_generator(input, input)
generator.compile(loss=Model.my_loss, optimizer=Adam(0.001))

In [4]:
# Train models
model_save=True
model_load=False
start_time_fitModel = time.time()
if model_load:
    generator = keras.models.load_model('generator.ml')
    discriminator = keras.models.load_model('discriminator.ml')
    base = keras.models.load_model('base.ml')
else:
    v_loss = base.fit(X_train, y_train, batch_size = 64,validation_data=(X_test, y_test), epochs=12)
    generator,discriminator,base=Model.train(X_train,y_train, 64, generator,discriminator,base,100,model_save)
end_time_fitModel = time.time()

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch: 0, d_loss (Acc)): 0.0625, g_loss: 7.7714
Epoch: 1, d_loss (Acc)): 1.0, g_loss: 9.7043
All models are saved!!!
INFO:tensorflow:Assets written to: generator.ml\assets
INFO:tensorflow:Assets written to: discriminator.ml\assets
INFO:tensorflow:Assets written to: base.ml\assets
End of training!!!


In [5]:
# Validation
y_pred = base.predict(X_test)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc=(cm[0][0]+cm[1][1])/len(y_pred)
cost=cm[0][0]+100*cm[0][1]+10*cm[1][0]
print("Cost is: ",cost)

[[12076     0]
 [    0   467]]
Cost is:  12076


In [6]:
# Interpretation
y_med = base.predict(X_train)
y_med=(y_med > 0.5).reshape(-1)
X_Fraud = x_train_for_feature[y_med==1]
X_Normal = x_train_for_feature[y_med==0]
output_lastLayer=mid_base(X_train).numpy()
X_fraud_feature = output_lastLayer[y_train==1]
X_normal_feature = output_lastLayer[y_train==0]
Cluster_Fraud = Interpretation.create_clusters(30,X_fraud_feature, X_Fraud)
Cluster_Normal = Interpretation.create_clusters(30,X_normal_feature, X_Normal)
y_pred = base.predict(X_test)
y_pred = (y_pred > 0.5)
output_layer_new = mid_base(X_test).numpy()
participating_features = generator(X_test)
participating_features = np.round(participating_features)
_1_y_test_idx = Interpretation.getRandIndex(y_test,1)
_0_y_test_idx = Interpretation.getRandIndex(y_test,0)
PositiveDescValue , PositiveRuntimeValue = Interpretation.describeInstance(_1_y_test_idx,y_pred,y_test,output_layer_new,participating_features,Cluster_Fraud,Cluster_Normal,x_test_for_feature)
NegativeDescValue , NegativeRuntimeValue = Interpretation.describeInstance(_0_y_test_idx,y_pred,y_test,output_layer_new,participating_features,Cluster_Fraud,Cluster_Normal,x_test_for_feature)
new_row = {'DatasetNameTrain':'base_desenv_02.csv','DatasetNameTest':'base_validacao_02.csv','Accuracy':acc, 'Cost':cost, 'ModelCreateRuntime':(end_time_fitModel - start_time_fitModel) ,'PositiveDesc':PositiveDescValue,'PositiveRuntime':PositiveRuntimeValue , 'NegativeDesc':NegativeDescValue,'NegativeRuntime':NegativeRuntimeValue}
output = output.append(new_row, ignore_index=True)

In [7]:
print('_________________________________________PositiveDesc_________________\n')
print(output.loc[0,'PositiveDesc'])
print('_________________________________________NegativeDesc_________________\n')
print(output.loc[0,'NegativeDesc'])

_________________________________________PositiveDesc_________________

The model classified this transaction as positive.
While its true class was positive.
The model has made this decision, because the original patern:
[6 0 2 0 9 1 9 6 5 3 0 1 0 1 2 3 0]
Is mostly similar to the following: 
[6 9 2 2 8 9 9 6 5 3 2 1 0 1 5 4 0]
Which is a dominant pattern of the class positive.
On the other hand, these two pattern have 10 similar points as follow:
6 == 6
9 != 0
2 == 2
2 != 0
8 != 9
9 != 1
9 == 9
6 == 6
5 == 5
3 == 3
2 != 0
1 == 1
0 == 0
1 == 1
5 != 2
4 != 3
0 == 0
the most participating features are as follow: [1. 1. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1.
 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0.
 0. 1. 0. 0. 1. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 1. 0. 0.]
_________________________________________NegativeDesc_________________

T