In [15]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [16]:
test_dataset = pd.read_csv("data/NU_test_set.csv")
test_dataset.head()

Unnamed: 0,t,t+1,delta,tokenized_data
0,103591821312,103591825408,4096,4096
1,103591825408,103591829504,4096,4096
2,103591829504,103591833600,4096,4096
3,103591833600,103591837696,4096,4096
4,103591837696,103591841792,4096,4096


In [17]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 5)))
        labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [18]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(dtype=np.float32)   #dtype=np.int64
encoded_data = encoder.fit_transform(test_dataset["tokenized_data"].values.reshape(-1, 1))
encoded_data[0], encoder.categories_

(<1x5 sparse matrix of type '<class 'numpy.float32'>'
 	with 1 stored elements in Compressed Sparse Row format>,
 [array([-6291456,       -1,        0,     4096,  2097152], dtype=int64)])

In [19]:
x_test, y_test = generate_timeseries(encoded_data.toarray(), 0, None, 16, 8)

In [20]:
x_test[0], y_test[0]

(array([[0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]], dtype=float32),
 array([[0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]], dtype=float32))

In [21]:
x_test.shape, y_test.shape

((10478, 16, 5), (10478, 8, 5))

In [22]:
np.argmax(y_test[0])

3

In [23]:
y_true = [np.argmax(y_test[i]) for i in range(x_test.shape[0])]
y_true[:5]

[3, 3, 3, 3, 3]

In [24]:
timestamp = "20200815-170315"

In [25]:
model = tf.keras.models.load_model("version/{}/model.h5".format(timestamp))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_2 (Bidirection multiple                  440       
_________________________________________________________________
repeat_vector_1 (RepeatVecto multiple                  0         
_________________________________________________________________
bidirectional_3 (Bidirection multiple                  640       
_________________________________________________________________
time_distributed_1 (TimeDist multiple                  55        
Total params: 1,135
Trainable params: 1,135
Non-trainable params: 0
_________________________________________________________________


In [26]:
y_pred = []
for i in range(x_test.shape[0]):   
    y_pred.append(np.argmax(model.predict(x_test[i].reshape(1, 16, 5))))

In [27]:
import sklearn.metrics as metrics

accuracy = metrics.accuracy_score(y_true, y_pred)
with open("version/{}/accuracy.txt".format(timestamp), "w") as t:
    t.write(str(accuracy.tolist()))

accuracy

0.0001908761213972132