In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
dataset_name = "SEG_2_retrain"

In [4]:
timestamp = "20201123-064550"

In [5]:
model = keras.models.load_model("version/{}".format(timestamp))

In [6]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_2 (Bidirection (None, 354)               23322936  
_________________________________________________________________
dropout_2 (Dropout)          (None, 354)               0         
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 8, 354)            0         
_________________________________________________________________
bidirectional_3 (Bidirection (None, 8, 354)            753312    
_________________________________________________________________
dropout_3 (Dropout)          (None, 8, 354)            0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 8, 16293)          5784015   
Total params: 29,860,263
Trainable params: 29,860,263
Non-trainable params: 0
__________________________________________

In [7]:
vocabulary = np.genfromtxt("static/vocabulary.csv", delimiter="\n", dtype=np.int64)
vocab_size = vocabulary.shape[0]
vocab_size

16293

In [8]:
param_list = dict()

param_list["PAST_HISTORY"] = 16
param_list["FUTURE_TARGET"] = 8
param_list["BATCH_SIZE"] = 128
param_list["VOCAB_SIZE"] = vocab_size

In [9]:
test_set = np.genfromtxt("data/{}_test_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)
test_set

array([ 172, 2711,  326, ...,    0,    6,    0], dtype=int64)

In [11]:
x_test = tf.data.Dataset.from_tensor_slices(test_set[:-param_list["FUTURE_TARGET"]]).window(param_list["PAST_HISTORY"], 1, 1, True)
x_test = x_test.flat_map(lambda x: x.batch(param_list["PAST_HISTORY"])) 
x_test = x_test.map(lambda x: tf.one_hot(x, param_list["VOCAB_SIZE"], axis=-1))
x_test = x_test.batch(param_list["BATCH_SIZE"])

In [12]:
y_test = tf.data.Dataset.from_tensor_slices(test_set[param_list["PAST_HISTORY"]:]).window(param_list["FUTURE_TARGET"], 1, 1, True)
y_test_slices = y_test.flat_map(lambda y: y.batch(param_list["FUTURE_TARGET"]))
y_test = y_test_slices.map(lambda y: tf.one_hot(y, param_list["VOCAB_SIZE"], axis=-1))
y_test = y_test.batch(param_list["BATCH_SIZE"])

In [13]:
test_data = tf.data.Dataset.zip((x_test, y_test))

In [14]:
loss, acc = model.evaluate(test_data)



In [15]:
y_pred = tf.argmax(model.predict(x_test), axis=-1).numpy()

In [16]:
y_true = np.array([yt for yt in y_test_slices.as_numpy_iterator()])

In [17]:
p, r, f = [], [], []
average_method = ["micro", "macro", "weighted"]

for method in average_method:
    precision = precision_score(np.ravel(y_true), np.ravel(y_pred), average=method)
    recall = recall_score(np.ravel(y_true), np.ravel(y_pred), average=method)
    f1 = f1_score(np.ravel(y_true), np.ravel(y_pred), average=method)
     
    p.append(precision)
    r.append(recall)
    f.append(f1)

In [18]:
report = pd.DataFrame(data=[p, r, f], columns=average_method, index=["precision", "recall", "f1"])
report

Unnamed: 0,micro,macro,weighted
precision,0.080348,9.6e-05,0.006456
recall,0.080348,0.001199,0.080348
f1,0.080348,0.000178,0.011951


In [25]:
accuracy = pd.DataFrame(data=[[loss, acc]], columns=["loss", "accuracy"])
accuracy

Unnamed: 0,loss,accuracy
0,14.813964,0.080348


In [27]:
report.to_csv("version/{}/report.csv".format(timestamp))
accuracy.to_csv("version/{}/accuracy.csv".format(timestamp), index=False)