In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill

In [2]:
from tensorflow.keras.layers import (
    Dense, 
    Dropout,
    LSTMCell,
    RNN
)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
timestamp = "20200918-103631"
timestamp

'20200918-103631'

In [5]:
dataset_name = "SEG_AR"

In [6]:
static_params = {
    'PAST_HISTORY': 16,
    'FUTURE_TARGET': 8,
    'BATCH_SIZE': 512,
    'BUFFER_SIZE': 200000,
    'EPOCHS': 500,
    'VOCAB_SIZE': 16293
 }

In [7]:
with open("static/test_pipeline.pkl", "rb") as p:
    test_pipeline = dill.load(p)

In [8]:
test_set = np.genfromtxt("data/SEG_test_set_original.csv", delimiter="\n", dtype=np.int64)
processed_test_set = test_pipeline.transform(test_set.copy())
#x_test, y_test = generate_timeseries(processed_test_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])

In [9]:
test_set_size = len(processed_test_set)//(static_params["PAST_HISTORY"])
chunk_size = test_set_size * static_params["PAST_HISTORY"]      # Test Set Size
test_set_size, chunk_size

(2223, 35568)

In [12]:
processed_test_set_chunk = processed_test_set.copy()[:chunk_size]

In [13]:
x_test = processed_test_set_chunk.reshape(-1, static_params["PAST_HISTORY"])
x_test = x_test[:-1]    # remove last x to match size with y

In [14]:
x_test.shape

(2222, 16)

In [15]:
y_test = []
for i in range(test_set_size - 1):
    y_test.append(processed_test_set_chunk[static_params["PAST_HISTORY"]*(i+1):static_params["PAST_HISTORY"]*(i+1)+static_params["FUTURE_TARGET"]])
y_test = np.array(y_test)

In [16]:
y_test[0]

array([  20,   32,   32, 4584,  210,  189,  306,  331])

In [17]:
x_test.shape, y_test.shape

((2222, 16), (2222, 8))

In [197]:
model = tf.saved_model.load("version/" + timestamp)

In [198]:
model.signatures.items()

ItemsView(_SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(call) at 0x1F0244C2C08>}))

In [199]:
inference = model.signatures["serving_default"]

In [200]:
result = inference(tf.constant(x_test[:16].reshape(16, -1, 1)))
result["output_0"].shape

TensorShape([16, 8, 16293])

In [201]:
result_argmax = tf.argmax(result["output_0"], axis=-1).numpy()
result_argmax

array([[  32,   32,   32, 4584,  210,  189,  306,  331],
       [5096, 4239, 4645,  967,  298,  195, 3408, 4665],
       [3308, 1234, 3964, 1365,    2,  811,  607, 4042],
       [ 410,  339,   37, 5643, 4618, 4724,  190,  127],
       [4857,  170,  478, 6281,  457, 4975,  244, 5231],
       [ 874, 5487, 5487, 3413,  448,  893,  316, 5251],
       [4034,  101,  258, 1442, 5248, 4302, 1169, 3859],
       [ 205, 4237,  406, 4881, 3657,  433, 2609, 5183],
       [5015,  170,  507, 5007,  472, 3310,  428,  205],
       [5383, 3729, 4746, 4232, 3506, 5785, 3752, 3467],
       [4571, 3648, 5093,  457, 5610,  109,   37,   37],
       [4466, 3850,  435,  435, 3529, 3529,  184,  189],
       [5321, 3667, 4631, 5240, 5752,  341, 1487, 3561],
       [ 194, 3796,  109,   54, 2531, 1792, 6113, 6113],
       [ 206, 3060, 4158,  202, 4587, 3703, 4904, 5803],
       [4927, 3540, 3870, 7934, 4009, 3531,  250,  127]], dtype=int64)

In [202]:
np.ravel(result_argmax)

array([  32,   32,   32, 4584,  210,  189,  306,  331, 5096, 4239, 4645,
        967,  298,  195, 3408, 4665, 3308, 1234, 3964, 1365,    2,  811,
        607, 4042,  410,  339,   37, 5643, 4618, 4724,  190,  127, 4857,
        170,  478, 6281,  457, 4975,  244, 5231,  874, 5487, 5487, 3413,
        448,  893,  316, 5251, 4034,  101,  258, 1442, 5248, 4302, 1169,
       3859,  205, 4237,  406, 4881, 3657,  433, 2609, 5183, 5015,  170,
        507, 5007,  472, 3310,  428,  205, 5383, 3729, 4746, 4232, 3506,
       5785, 3752, 3467, 4571, 3648, 5093,  457, 5610,  109,   37,   37,
       4466, 3850,  435,  435, 3529, 3529,  184,  189, 5321, 3667, 4631,
       5240, 5752,  341, 1487, 3561,  194, 3796,  109,   54, 2531, 1792,
       6113, 6113,  206, 3060, 4158,  202, 4587, 3703, 4904, 5803, 4927,
       3540, 3870, 7934, 4009, 3531,  250,  127], dtype=int64)

In [203]:
np.ravel(y_test[:16])

array([  20,   32,   32, 4584,  210,  189,  306,  331, 5096, 4239, 4645,
        967,  298,  195, 3408, 4665, 3308, 1234, 3964, 1365,    2,  811,
        607, 4042,  410,  339,   37, 5643, 4618, 4724,  190,  127, 4857,
        170,  478, 6281,  457, 4975,  244, 5231,  874,  228, 5487, 3413,
        448,  893,  316, 5251, 4034,  101,  258, 1442, 5248, 4302, 1169,
       3859,  205, 4237,  406, 4881, 3657,  433, 2609, 5183, 5015,  170,
        507, 5007,  472, 3310,  428,  205, 5383, 3729, 4746, 4232, 3506,
       5785, 3752, 3467, 4571, 3648, 5093,  457, 5610,  109,   37,   33,
       4466, 3850, 5246,  435,  291, 3529,  184,  182, 5321, 3667, 4631,
       5240, 5752,  341, 1487, 3561,  194, 3796,  109,   54, 2531, 1792,
       6113,  251,  206, 3060, 4158,  202, 4587, 3703, 4904, 5803, 4927,
       3540, 3870, 7934, 4009, 3531,  250,  127])

In [204]:
from sklearn.metrics import accuracy_score, classification_report

print(classification_report(np.ravel(y_test[:16]), np.ravel(result_argmax)))

              precision    recall  f1-score   support

           2       1.00      1.00      1.00         1
          20       0.00      0.00      0.00         1
          32       0.67      1.00      0.80         2
          33       0.00      0.00      0.00         1
          37       0.67      1.00      0.80         2
          54       1.00      1.00      1.00         1
         101       1.00      1.00      1.00         1
         109       1.00      1.00      1.00         2
         127       1.00      1.00      1.00         2
         170       1.00      1.00      1.00         2
         182       0.00      0.00      0.00         1
         184       1.00      1.00      1.00         1
         189       0.50      1.00      0.67         1
         190       1.00      1.00      1.00         1
         194       1.00      1.00      1.00         1
         195       1.00      1.00      1.00         1
         202       1.00      1.00      1.00         1
         205       1.00    

In [205]:
accuracy_score(np.ravel(y_test[:16]), np.ravel(result_argmax))

0.9453125

In [18]:
batch_size = 16
batch_chunk = (x_test.shape[0]//batch_size)*batch_size
x_test_batch = x_test[:batch_chunk].reshape(-1, batch_size, static_params["PAST_HISTORY"])
y_test_batch = y_test[:batch_chunk].reshape(-1, batch_size, static_params["FUTURE_TARGET"])
x_test_remainder = x_test[batch_chunk:]
y_test_remainder = y_test[batch_chunk:]

In [269]:
result = []
for x in x_test_batch:
    y_pred_batch = tf.argmax(inference(tf.constant(x.reshape(batch_size, -1, 1)))["output_0"], axis=-1).numpy()
    #y_pred_batch = y_pred_batch.reshape(-1, 8)
    result.append(y_pred_batch)

In [270]:
result = np.array(result)
result.shape

(138, 16, 8)

In [271]:
result = np.ravel(result)
result.shape

(17664,)

In [272]:
result = np.r_[result, np.ravel(tf.argmax(inference(tf.constant(x_test_remainder.reshape(x_test_remainder.shape[0], -1, 1)))["output_0"], axis=-1).numpy())]
result.shape

(17776,)

In [275]:
report = classification_report(np.ravel(y_test), result, output_dict=True)

In [276]:
with open("report.json", "w") as j:
    json.dump(report, j, indent=4)

In [288]:
from sklearn.metrics import precision_score, recall_score, f1_score

p, r, f = [], [], []
average_method = ["micro", "macro", "weighted"]

for method in average_method:
    precision = precision_score(np.ravel(y_test), result, average=method)
    recall = recall_score(np.ravel(y_test), result, average=method)
    f1 = f1_score(np.ravel(y_test), result, average=method)
     
    p.append(precision)
    r.append(recall)
    f.append(f1)

In [303]:
with open("precision.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in p:
        c.write(str(score))
        c.write(",")

In [304]:
with open("recall.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in r:
        c.write(str(score))
        c.write(",")

In [305]:
with open("f1.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in f:
        c.write(str(score))
        c.write(",")