In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [2]:
dataset_name = "SEG_CNNLSTM"

In [3]:
data = np.genfromtxt("data/{}_test_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)
data

array([93617988376, 93536062752, 93747035368, ..., 92658792872,
       92658792864, 92654987192], dtype=int64)

In [4]:
dataset = np.array([data[i+1] - data[i] for i in range(int(len(data))-1)])
dataset, len(dataset)

(array([ -81925624,  210972616, -189258952, ...,   36097352,         -8,
          -3805672], dtype=int64),
 59298)

In [5]:
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
vocab_size

14882

In [6]:
vocabulary = {word_index[i]:i for i in range(vocab_size)}
dict(list(vocabulary.items())[0:10])

{-1: 0,
 0: 1,
 4096: 2,
 909517620: 3,
 -909517620: 4,
 8192: 5,
 -8: 6,
 -4096: 7,
 8: 8,
 12288: 9}

In [7]:
word_index

array([       -1,         0,      4096, ...,  -7445040,  12889736,
       619958144], dtype=int64)

In [8]:
unseen_category = []
in_word_index = np.where(np.isin(dataset, word_index))[0]
for i in range(len(dataset)):
    if i in in_word_index:
        continue
    unseen_category.append(i)

In [9]:
dataset[unseen_category] = -1

In [10]:
dataset

array([ -81925624,  210972616, -189258952, ...,         -1,         -8,
               -1], dtype=int64)

In [11]:
test_set = [vocabulary[dataset[i]] for i in range(len(dataset))]
test_set = np.array(test_set).astype(np.float32)
test_set[:10]

array([ 3281., 10140., 10181.,  8177.,  9834., 14409.,  8531.,  8166.,
        6807.,  4700.], dtype=float32)

In [12]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [13]:
x_test, y_test = generate_timeseries(test_set, 0, None, 16, 8)
x_test.shape, y_test.shape

((59274, 16, 1), (59274, 8, 1))

In [14]:
x_test[0], y_test[0]

(array([[ 3281.],
        [10140.],
        [10181.],
        [ 8177.],
        [ 9834.],
        [14409.],
        [ 8531.],
        [ 8166.],
        [ 6807.],
        [ 4700.],
        [ 4453.],
        [ 3203.],
        [ 5888.],
        [11018.],
        [11563.],
        [    0.]], dtype=float32),
 array([[1.000e+00],
        [0.000e+00],
        [1.291e+04],
        [0.000e+00],
        [6.358e+03],
        [2.888e+03],
        [1.700e+02],
        [6.866e+03]], dtype=float32))

In [15]:
timestamp = "20200909-155423"
timestamp

'20200909-155423'

In [16]:
model = keras.models.load_model("version/{}/model.h5".format(timestamp))
model.summary()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_45 (Conv1D)           (None, 16, 64)            640       
_________________________________________________________________
max_pooling1d_29 (MaxPooling (None, 8, 64)             0         
_________________________________________________________________
lstm_25 (LSTM)               (None, 8, 64)             33024     
_________________________________________________________________
lstm_26 (LSTM)               (None, 8, 64)             33024     
_________________________________________________________________
time_distributed_11 (TimeDis (None, 8, 14882)          967330    
Total params: 1,034,018
Trainable params: 1,034,018
Non-trainable params: 0
_________________________________________________________________


In [17]:
result = model.evaluate(x_test, y_test)
result



[11.161510467529297, 0.5640732049942017]

In [18]:
with open("version/{}/evaluate.txt".format(timestamp), 'w') as e:
    e.write("loss, accuracy\n")
    e.write("{}, {}".format(result[0], result[1]))