In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [2]:
#PAST_HISTORY = 16  # LSTM
PAST_HISTORY = 64   # Bidirectional(LSTM)
FUTURE_TARGET = 8

In [3]:
#timestamp = "20200909-155423"  # LSTM
timestamp = "20200909-143624"   # Bidirectional(LSTM)
timestamp

'20200909-143624'

In [4]:
dataset_name = "SEG_CNNLSTM"

In [5]:
data = np.genfromtxt("data/{}_test_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)
data

array([93617988376, 93536062752, 93747035368, ..., 92658792872,
       92658792864, 92654987192], dtype=int64)

In [6]:
dataset = np.array([data[i] - data[i+1] for i in range(int(len(data))-1)])
dataset, len(dataset)

(array([  81925624, -210972616,  189258952, ...,  -36097352,          8,
           3805672], dtype=int64),
 59298)

In [7]:
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
vocab_size

14882

In [8]:
vocabulary = {word_index[i]:i for i in range(vocab_size)}
dict(list(vocabulary.items())[0:10])

{-1: 0,
 0: 1,
 4096: 2,
 909517620: 3,
 -909517620: 4,
 8192: 5,
 -8: 6,
 -4096: 7,
 8: 8,
 12288: 9}

In [9]:
word_index

array([       -1,         0,      4096, ...,  -7445040,  12889736,
       619958144], dtype=int64)

In [10]:
unseen_category = []
in_word_index = np.where(np.isin(dataset, word_index))[0]
for i in range(len(dataset)):
    if i in in_word_index:
        continue
    unseen_category.append(i)

In [11]:
dataset[unseen_category] = -1

In [12]:
dataset

array([-1, -1, -1, ..., -1,  8, -1], dtype=int64)

In [13]:
test_set = np.array([vocabulary[dataset[i]] for i in range(len(dataset))])
#test_set = np.array(test_set).astype(np.float32)
test_set[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [14]:
total_dataset_size = (test_set.shape[0] - FUTURE_TARGET)//PAST_HISTORY * PAST_HISTORY
test_set = test_set[:total_dataset_size]
total_dataset_size

59264

In [15]:
batch_size = 128
batch_chunk_size = test_set.shape[0]//(batch_size * PAST_HISTORY) * (batch_size * PAST_HISTORY)

In [16]:
x_test_batch = test_set[:batch_chunk_size].reshape(-1, batch_size, PAST_HISTORY, 1)
x_test_batch.shape

(7, 128, 64, 1)

In [17]:
x_test_remainder = test_set[batch_chunk_size:].reshape(-1, PAST_HISTORY, 1)
x_test_remainder = x_test_remainder[:-1]    # remove last to match with y
x_test_remainder.shape

(29, 64, 1)

In [18]:
y_test = []
for i in range(total_dataset_size // PAST_HISTORY):
    y_test.extend(test_set[PAST_HISTORY*(i+1):PAST_HISTORY*(i+1) + FUTURE_TARGET])
y_test = np.array(y_test)

In [19]:
y_test.shape

(7400,)

In [20]:
y_test = np.ravel(y_test)

In [21]:
y_test.shape

(7400,)

In [22]:
model = keras.models.load_model("version/{}/model.h5".format(timestamp))
model.summary()

Model: "sequential_135"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_168 (Conv1D)          (None, 64, 32)            128       
_________________________________________________________________
max_pooling1d_112 (MaxPoolin (None, 32, 32)            0         
_________________________________________________________________
bidirectional_16 (Bidirectio (None, 354)               297360    
_________________________________________________________________
dropout_23 (Dropout)         (None, 354)               0         
_________________________________________________________________
repeat_vector_8 (RepeatVecto (None, 8, 354)            0         
_________________________________________________________________
bidirectional_17 (Bidirectio (None, 8, 354)            753312    
_________________________________________________________________
dropout_24 (Dropout)         (None, 8, 354)         

In [23]:
y_pred = []

for x in x_test_batch:
    y_pred.append(tf.argmax(model.predict(x.reshape(batch_size, PAST_HISTORY, 1), batch_size=batch_size), axis=-1))

In [24]:
y_pred = np.ravel(np.array(y_pred))
y_pred.shape

(7168,)

In [25]:
y_pred_remainder = []
for x in x_test_remainder:
    y_pred_remainder.append(tf.argmax(model.predict(x.reshape(1, PAST_HISTORY, 1)), axis=-1))

y_pred_remainder = np.ravel(y_pred_remainder)
y_pred_remainder.shape

(232,)

In [26]:
y_pred = np.r_[y_pred, y_pred_remainder]
y_pred.shape

(7400,)

In [27]:
with open("accuracy.csv", "w") as c:
    c.write(str(accuracy_score(y_test, y_pred)))

In [28]:
p, r, f = [], [], []
average_method = ["micro", "macro", "weighted"]

for method in average_method:
    precision = precision_score(y_test, y_pred, average=method)
    recall = recall_score(y_test, y_pred, average=method)
    f1 = f1_score(y_test, y_pred, average=method)
     
    p.append(precision)
    r.append(recall)
    f.append(f1)

In [29]:
with open("precision.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in p:
        c.write(str(score))
        c.write(",")

In [30]:
with open("recall.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in r:
        c.write(str(score))
        c.write(",")

In [31]:
with open("f1.csv", "w") as c:
    c.write(", ".join(average_method))
    c.write("\n")
    for score in f:
        c.write(str(score))
        c.write(",")