In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [2]:
test_dataset = pd.read_csv("data/NU_test_set.csv")
test_dataset.head()

Unnamed: 0,t,t+1,delta,tokenized_data
0,103591821312,103591825408,4096,4096
1,103591825408,103591829504,4096,4096
2,103591829504,103591833600,4096,4096
3,103591833600,103591837696,4096,4096
4,103591837696,103591841792,4096,4096


In [3]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 5)))
        labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [4]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()   #dtype=np.int64
encoded_data = encoder.fit_transform(test_dataset["tokenized_data"].values.reshape(-1, 1))
encoded_data[0], encoder.categories_

(<1x5 sparse matrix of type '<class 'numpy.float64'>'
 	with 1 stored elements in Compressed Sparse Row format>,
 [array([-6291456,       -1,        0,     4096,  2097152], dtype=int64)])

In [7]:
x_test, y_test = generate_timeseries(encoded_data.toarray(), 0, None, 16, 8)

In [8]:
x_test[0], y_test[0]

(array([[0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]),
 array([[0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]))

In [13]:
x_test.shape, y_test.shape

((15970, 16, 5), (15970, 8, 5))

In [39]:
model = tf.keras.models.load_model("version/second_model.h5")
model.summary()

Model: "sequential_53"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_53 (LSTM)               multiple                  220       
_________________________________________________________________
repeat_vector_6 (RepeatVecto multiple                  0         
_________________________________________________________________
lstm_54 (LSTM)               multiple                  220       
_________________________________________________________________
time_distributed_31 (TimeDis multiple                  30        
Total params: 470
Trainable params: 470
Non-trainable params: 0
_________________________________________________________________


In [16]:
np.argmax(model.predict(x_test[0].reshape(1, 16, 5)))

3

In [18]:
np.argmax(y_test[0])

3

In [40]:
label_list = [np.argmax(y_test[i]) for i in range(x_test.shape[0])]
label_list[:5]

[3, 3, 3, 3, 3]

In [42]:
result = []
for i in range(x_test.shape[0]):   
    result.append(np.argmax(model.predict(x_test[i].reshape(1, 16, 5))))

In [43]:
evaluation = 0
for i in range(x_test.shape[0]):
    if result[i] == label_list[i]:
        evaluation += 1

In [44]:
evaluation / x_test.shape[0]

0.6391984971822167