In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [1]:
PAST_HISTORY = 16
FUTURE_TARGET = 8

In [3]:
dataset_name = "SEG_2"

In [4]:
data = np.genfromtxt("data/{}_test_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)
data

array([93617988376, 93536062752, 93747035368, ..., 92658792872,
       92658792864, 92654987192], dtype=int64)

In [5]:
dataset = np.array([data[i] - data[i+1] for i in range(int(len(data))-1)])
dataset, len(dataset)

(array([  81925624, -210972616,  189258952, ...,  -36097352,          8,
           3805672], dtype=int64),
 59298)

In [6]:
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
vocab_size

14882

In [7]:
vocabulary = {word_index[i]:i for i in range(vocab_size)}
dict(list(vocabulary.items())[0:10])

{-1: 0,
 0: 1,
 4096: 2,
 909517620: 3,
 -909517620: 4,
 8192: 5,
 -8: 6,
 -4096: 7,
 8: 8,
 12288: 9}

In [8]:
word_index

array([       -1,         0,      4096, ...,  -7445040,  12889736,
       619958144], dtype=int64)

In [9]:
unseen_category = []
in_word_index = np.where(np.isin(dataset, word_index))[0]
for i in range(len(dataset)):
    if i in in_word_index:
        continue
    unseen_category.append(i)

In [10]:
dataset[unseen_category] = -1

In [11]:
dataset

array([-1, -1, -1, ..., -1,  8, -1], dtype=int64)

In [12]:
test_set = [vocabulary[dataset[i]] for i in range(len(dataset))]
#test_set = np.array(test_set).astype(np.float32)
test_set = np.array(test_set)
test_set[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
timestamp = "20200826-201949"
timestamp

In [16]:
model = keras.models.load_model("version/{}/model.h5".format(timestamp))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_6 (Bidirection (None, 354)               253464    
_________________________________________________________________
dropout_2 (Dropout)          (None, 354)               0         
_________________________________________________________________
repeat_vector_3 (RepeatVecto (None, 8, 354)            0         
_________________________________________________________________
bidirectional_7 (Bidirection (None, 8, 354)            753312    
_________________________________________________________________
dropout_3 (Dropout)          (None, 8, 354)            0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 8, 14882)          5283110   
Total params: 6,289,886
Trainable params: 6,289,886
Non-trainable params: 0
____________________________________________

In [18]:
batch_size = 128
batch_chunk_size = x_test.shape[0]//batch_size * batch_size
x_test_chunk = x_test[:batch_chunk_size].reshape(-1, batch_size, 16, 1)
x_test_chunk.shape

(463, 128, 16, 1)

In [19]:
y_pred = []

for x in x_test_chunk:
    y_pred.append(tf.argmax(model.predict(x.reshape(batch_size, 16, 1), batch_size=32), axis=-1))

In [20]:
y_pred = np.ravel(np.array(y_pred))
y_pred.shape

(474112,)

In [21]:
y_pred_remainder = []
for x in x_test[batch_chunk_size:]:
    y_pred_remainder.append(tf.argmax(model.predict(x.reshape(1, 16, 1)), axis=-1))

y_pred_remainder = np.ravel(y_pred_remainder)
y_pred_remainder.shape

(80,)

In [22]:
y_pred = np.r_[y_pred, y_pred_remainder]
y_pred.shape

(474192,)

In [24]:
p, r, f = [], [], []
average_method = ["micro", "macro", "weighted"]

for method in average_method:
    precision = precision_score(np.ravel(y_test), y_pred, average=method)
    recall = recall_score(np.ravel(y_test), y_pred, average=method)
    f1 = f1_score(np.ravel(y_test), y_pred, average=method)
     
    p.append(precision)
    r.append(recall)
    f.append(f1)

In [23]:
with open("accuracy.csv", "w") as c:
    c.write(str(accuracy_score(np.ravel(y_test), y_pred)))

0.7240548132402065

In [25]:
with open("precision.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in p:
        c.write(str(score))
        c.write(",")

In [26]:
with open("recall.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in r:
        c.write(str(score))
        c.write(",")

In [27]:
with open("f1.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in f:
        c.write(str(score))
        c.write(",")