In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill

In [2]:
from tensorflow.keras.layers import (
    Dense, 
    Dropout,
    LSTMCell,
    RNN
)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
timestamp = "20200918-103631"
timestamp

'20200918-103631'

In [5]:
dataset_name = "SEG_AR"

In [6]:
static_params = {
    'PAST_HISTORY': 16,
    'FUTURE_TARGET': 8,
    'BATCH_SIZE': 512,
    'BUFFER_SIZE': 200000,
    'EPOCHS': 500,
    'VOCAB_SIZE': 16293
 }

In [7]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [8]:
with open("static/test_pipeline.pkl", "rb") as p:
    test_pipeline = dill.load(p)

In [9]:
test_set = np.genfromtxt("data/SEG_test_set_original.csv", delimiter="\n", dtype=np.int64)
processed_test_set = test_pipeline.transform(test_set.copy())
x_test, y_test = generate_timeseries(processed_test_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])

In [10]:
model = tf.saved_model.load("version/" + timestamp)

In [11]:
model.signatures.items()

ItemsView(_SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(call) at 0x151984ADFC8>}))

In [12]:
inference = model.signatures["serving_default"]

In [13]:
result = inference(tf.constant(x_test[:16].reshape(16, -1, 1)))
result

{'output_0': <tf.Tensor: shape=(16, 8, 16293), dtype=float32, numpy=
 array([[[1.50097057e-05, 5.48189291e-07, 3.89038703e-08, ...,
          2.30102135e-11, 3.40366602e-12, 1.55406863e-14],
         [1.06637381e-05, 4.69579561e-07, 2.40659492e-08, ...,
          2.27535317e-11, 1.81268531e-12, 7.57247624e-15],
         [1.73060835e-05, 6.67029838e-07, 1.95133971e-08, ...,
          1.75098033e-10, 8.74910127e-12, 7.91185016e-15],
         ...,
         [3.41588078e-04, 1.06824780e-06, 3.29334360e-09, ...,
          6.09716766e-09, 3.74622555e-12, 1.84857339e-15],
         [2.76026898e-04, 6.17002684e-07, 3.78019216e-09, ...,
          8.04716294e-09, 8.52313391e-13, 1.01211279e-15],
         [1.85614743e-03, 1.16095034e-05, 3.80971947e-08, ...,
          3.88608790e-09, 5.19941815e-12, 2.05931412e-15]],
 
        [[1.15620796e-05, 6.05554874e-07, 2.96360199e-08, ...,
          2.00855842e-11, 1.79739177e-12, 1.08461578e-14],
         [2.88992978e-05, 1.21338599e-06, 3.88748482e-08, ..

In [14]:
result_argmax = tf.argmax(result["output_0"], axis=-1).numpy()
result_argmax

array([[  32,   32,   32, 4584,  210,  189,  306,  331],
       [  32,   32, 4584,  210,  189,  306,  331, 4426],
       [  32, 4584,  210,  189,  306,  331, 4426, 6272],
       [4584,  210,  189,  306,  331, 4426, 6272,  566],
       [ 210,  189,  306,  331, 4426, 6272,  566,  193],
       [ 189,  306,  331, 4426, 6272,  566,  193, 5039],
       [ 306,  331, 4426, 6272,  566,  193, 5039,  439],
       [ 331, 4426, 6272,  566,  193, 5039,  439, 5525],
       [4426, 6272,  566,  193, 5039,  439, 5525, 5680],
       [6272,  566,  193, 5039,  439, 5525, 5680, 5096],
       [ 566,  193, 5039,  439, 5525, 5680, 5096, 4239],
       [ 193, 5039,  439, 5525, 5680, 5096, 4239, 4645],
       [5039,  439, 5525, 5680, 5096, 4239, 4645,  967],
       [ 439, 5525, 5680, 5096, 4239, 4645,  967,  298],
       [5525, 5680, 5096, 4239, 4645,  967,  298,  195],
       [5680, 5096, 4239, 4645,  967,  298,  195, 3408]], dtype=int64)

In [15]:
np.squeeze(y_test[:16], axis=-1)

array([[  20,   32,   32, 4584,  210,  189,  306,  331],
       [  32,   32, 4584,  210,  189,  306,  331, 4426],
       [  32, 4584,  210,  189,  306,  331, 4426, 6272],
       [4584,  210,  189,  306,  331, 4426, 6272,  566],
       [ 210,  189,  306,  331, 4426, 6272,  566,  193],
       [ 189,  306,  331, 4426, 6272,  566,  193, 5039],
       [ 306,  331, 4426, 6272,  566,  193, 5039,  439],
       [ 331, 4426, 6272,  566,  193, 5039,  439, 5525],
       [4426, 6272,  566,  193, 5039,  439, 5525, 5680],
       [6272,  566,  193, 5039,  439, 5525, 5680, 5096],
       [ 566,  193, 5039,  439, 5525, 5680, 5096, 4239],
       [ 193, 5039,  439, 5525, 5680, 5096, 4239, 4645],
       [5039,  439, 5525, 5680, 5096, 4239, 4645,  967],
       [ 439, 5525, 5680, 5096, 4239, 4645,  967,  298],
       [5525, 5680, 5096, 4239, 4645,  967,  298,  195],
       [5680, 5096, 4239, 4645,  967,  298,  195, 3408]])

In [17]:
from sklearn.metrics import accuracy_score, classification_report

print(classification_report(np.squeeze(y_test[:16], axis=-1).reshape(1, -1)[0], tf.argmax(result["output_0"], axis=-1).numpy().reshape(1, -1)[0]))

              precision    recall  f1-score   support

          20       0.00      0.00      0.00         1
          32       0.83      1.00      0.91         5
         189       1.00      1.00      1.00         6
         193       1.00      1.00      1.00         8
         195       1.00      1.00      1.00         2
         210       1.00      1.00      1.00         5
         298       1.00      1.00      1.00         3
         306       1.00      1.00      1.00         7
         331       1.00      1.00      1.00         8
         439       1.00      1.00      1.00         8
         566       1.00      1.00      1.00         8
         967       1.00      1.00      1.00         4
        3408       1.00      1.00      1.00         1
        4239       1.00      1.00      1.00         6
        4426       1.00      1.00      1.00         8
        4584       1.00      1.00      1.00         4
        4645       1.00      1.00      1.00         5
        5039       1.00    

In [18]:
accuracy_score(np.squeeze(y_test[:16], axis=-1), result_argmax)

ValueError: multiclass-multioutput is not supported

from sklearn.metrics import classification_report

report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()
report.to_csv("version/{}/report_2G1P.csv".format(timestamp))
print(report)

In [37]:
# x_true
test_set[:17]

array([93238620160, 93238624428, 93238645112, 93238649036, 93238653032,
       93238658844, 93236211716, 93238665264, 93238670056, 93238685276,
       93238686004, 93238825024, 93238829092, 93236215808, 93236219912,
       93236224004, 93236228096], dtype=int64)

In [32]:
# x_delta
test_pipeline["calculate_delta"].transform(test_set[:15].copy())

array([    4268,    20684,     3924,     3996,     5812, -2447128,
        2453548,     4792,    15220,      728,   139020,     4068,
       -2613284,     4104], dtype=int64)

In [33]:
# x
x_test[0].copy().reshape(1, -1)

array([[ 185, 2340,  338,  190, 3235, 3004, 6040,  352, 3072,  416, 3421,
        1558, 4131,   20,   32,   32]])

In [25]:
result_1 = inference(tf.constant(x_test[0].reshape(1, -1, 1)))
result_1

{'output_0': <tf.Tensor: shape=(1, 8, 16293), dtype=float32, numpy=
 array([[[1.5009706e-05, 5.4818929e-07, 3.8903945e-08, ...,
          2.3010214e-11, 3.4036660e-12, 1.5540686e-14],
         [1.0663727e-05, 4.6957956e-07, 2.4065949e-08, ...,
          2.2753532e-11, 1.8126853e-12, 7.5724762e-15],
         [1.7306060e-05, 6.6702898e-07, 1.9513333e-08, ...,
          1.7509778e-10, 8.7490891e-12, 7.9118544e-15],
         ...,
         [3.4158808e-04, 1.0682478e-06, 3.2933436e-09, ...,
          6.0971792e-09, 3.7462256e-12, 1.8485734e-15],
         [2.7602725e-04, 6.1700348e-07, 3.7801970e-09, ...,
          8.0471878e-09, 8.5231610e-13, 1.0121160e-15],
         [1.8561472e-03, 1.1609491e-05, 3.8097262e-08, ...,
          3.8860950e-09, 5.1994177e-12, 2.0593139e-15]]], dtype=float32)>}

In [26]:
result_1_argmax = tf.argmax(result_1["output_0"], axis=-1).numpy()
result_1_argmax

array([[  32,   32,   32, 4584,  210,  189,  306,  331]], dtype=int64)

In [27]:
test_pipeline["sparse_category_encoder"].inverse_transform(result_1_argmax)

array([[   4092,    4092,    4092, 2470348,    4016,    4056,    4060,
           4316]], dtype=int64)

In [34]:
test_set[16:24]

array([93236228096, 93236232200, 93236236292, 93236240384, 93238710732,
       93238714748, 93238718804, 93238722864], dtype=int64)

In [36]:
test_pipeline["calculate_delta"].transform(test_set[16:25].copy())

array([   4104,    4092,    4092, 2470348,    4016,    4056,    4060,
          4316], dtype=int64)

In [39]:
x_test.shape

(35555, 16, 1)

In [46]:
pd.Series(processed_test_set).value_counts()#.to_csv("frequency_table.csv")

0        7740
2        1556
1        1036
5         274
7         266
         ... 
16072       1
1711        1
4152        1
5805        1
14400       1
Length: 12695, dtype: int64