In [1]:
import pickle
import tensorflow as tf
import numpy as np
import math
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

import src.network as network

  from ._conv import register_converters as _register_converters


In [2]:
with open('./dumps/dump.pickle', 'rb') as handle:
    dumped = pickle.load(handle)
vocab = dumped["vocab"]
X_val, Y_val = dumped["X_val"], dumped["Y_val"]

In [3]:
current_model_name = "model-0.0001-0.0001-1.0-64-32-128"

splitted = current_model_name.split("-")
embedding_size = int(splitted[4])
hidden_cells = int(splitted[6])

# Reset TF graph
tf.reset_default_graph()

# Placeholders
tensor_X = tf.placeholder(tf.int32, (None, dumped["X_train"].shape[1]), 'inputs')
tensor_Y = tf.placeholder(tf.int32, (None, None), 'outputs')
keep_prob = tf.placeholder(tf.float32, (None), 'dropout_keep')

logits, mask, sequence_length = network.create_network(tensor_X, 
                                                       tensor_Y, 
                                                       keep_prob, 
                                                       vocab, 
                                                       embedding_size, 
                                                       hidden_cells, 
                                                       None)

scores = tf.nn.softmax(logits)
predictions = tf.to_int32(tf.argmax(scores, axis=2))
accuracy = tf.contrib.metrics.accuracy(predictions, tensor_Y, weights=mask)

In [4]:
with tf.Session() as sess:
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    saver.restore(sess, "./checkpoints/{}.ckpt".format(current_model_name)) 
    

    pred_scores, lengths, pred = sess.run([scores, sequence_length, predictions], feed_dict={
                                                                                tensor_X: X_val,
                                                                                tensor_Y: Y_val,
                                                                                keep_prob: 1.0  })

final_pred, final_true = np.array([]), np.array([])
for i in range(len(Y_val)):
    final_pred = np.append(final_pred, pred[i][:lengths[i]])
    final_true = np.append(final_true, Y_val[i][:lengths[i]])
    
print()
print(round(accuracy_score(final_true, final_pred), 3))
print(round(precision_score(final_true, final_pred), 3))
print(round(recall_score(final_true, final_pred), 3))
print(round(roc_auc_score(final_true, final_pred), 3))

INFO:tensorflow:Restoring parameters from ./checkpoints/model-0.0001-0.0001-1.0-64-32-128.ckpt

0.695
0.398
0.354
0.584


In [7]:
# BOOTSTRAPING
nSamples = 1000
accuracy_set, precision_set, recall_set, auc_set = [], [], [], []

with tf.Session() as sess:
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    saver.restore(sess, "./checkpoints/{}.ckpt".format(current_model_name)) 
    
    for i in range(nSamples):
        
        pos_indices = np.random.choice(len(X_val), len(X_val), replace=True)
        X_val_selected = X_val[pos_indices]
        Y_val_selected = Y_val[pos_indices]
        
        pred_scores, lengths, pred = sess.run([scores, sequence_length, predictions], feed_dict={
                                                                            tensor_X: X_val_selected,
                                                                            tensor_Y: Y_val_selected,
                                                                            keep_prob: 1.0  })
                
        final_pred, final_true = np.array([]), np.array([])
        for i in range(len(Y_val_selected)):
            final_pred = np.append(final_pred, pred[i][:lengths[i]])
            final_true = np.append(final_true, Y_val_selected[i][:lengths[i]])

        accuracy_set.append(accuracy_score(final_true, final_pred))
        precision_set.append(precision_score(final_true, final_pred))
        recall_set.append(recall_score(final_true, final_pred))
        auc_set.append(roc_auc_score(final_true, final_pred))


accuracy_mean = np.mean(np.array(accuracy_set))
precision_mean = np.mean(np.array(precision_set))
recall_mean = np.mean(np.array(recall_set))
auc_mean = np.mean(np.array(auc_set))

totAcc, totPre, totRec, totAUC = 0, 0, 0, 0
for i in range(len(accuracy_set)):
    totAcc += ((accuracy_set[i]-accuracy_mean)**2)
    totPre += ((precision_set[i]-precision_mean)**2)
    totRec += ((recall_set[i]-recall_mean)**2)
    totAUC += ((auc_set[i]-auc_mean)**2)

print("\nAccuracy :" + str(accuracy_mean) + " +- " + str(math.sqrt(totAcc/(nSamples))/math.sqrt((nSamples - 1))))
print("Precision :" + str(precision_mean) + " +- " + str(math.sqrt(totPre/(nSamples))/math.sqrt((nSamples - 1))))
print("Recall :" + str(recall_mean) + " +- " + str(math.sqrt(totRec/(nSamples))/math.sqrt((nSamples - 1))))
print("AUC :" + str(auc_mean) + " +- " + str(math.sqrt(totAUC/(nSamples))/math.sqrt((nSamples - 1))))

INFO:tensorflow:Restoring parameters from ./checkpoints/model-0.0001-0.0001-1.0-64-32-128.ckpt

Accuracy :0.6941333846042748 +- 0.0004966151316338885
Precision :0.4000419311266215 +- 0.0009087462057440703
Recall :0.3547208608826038 +- 0.0011087808929558251
AUC :0.583809876677336 +- 0.0004098295728779245
