In [5]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [6]:
dataset_name = "StreamBench_3G1P"

In [7]:
timestamp = "20200817-092343"

In [8]:
test_dataset = pd.read_csv("data/{}_test_set.csv".format(dataset_name))
test_dataset.head()

Unnamed: 0,t,t+1,delta,tokenized_data
0,101626552320,101626552320,0,0
1,101626552320,101626556416,4096,4096
2,101626556416,101626556416,0,0
3,101626556416,101626560512,4096,4096
4,101626560512,101626560512,0,0


In [9]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size, n_features):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, n_features)))
        labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [10]:
from sklearn.preprocessing import OneHotEncoder

test_encoder = OneHotEncoder(dtype=np.float32)
encoded_test_data = test_encoder.fit_transform(test_dataset["tokenized_data"].values.reshape(-1, 1))
encoded_test_data[0], test_encoder.categories_

(<1x5 sparse matrix of type '<class 'numpy.float32'>'
 	with 1 stored elements in Compressed Sparse Row format>,
 [array([-6291456,       -1,        0,     4096,  2097152], dtype=int64)])

In [11]:
x_test, y_test = generate_timeseries(encoded_test_data.toarray(), 0, None, 16, 8, len(test_encoder.categories_[0]))

In [12]:
model = tf.keras.models.load_model("version/{}/model.h5".format(timestamp))
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_21 (Bidirectio multiple                  137216    
_________________________________________________________________
dropout_4 (Dropout)          multiple                  0         
_________________________________________________________________
repeat_vector_11 (RepeatVect multiple                  0         
_________________________________________________________________
bidirectional_22 (Bidirectio multiple                  164352    
_________________________________________________________________
dropout_5 (Dropout)          multiple                  0         
_________________________________________________________________
time_distributed_10 (TimeDis multiple                  645       
Total params: 302,213
Trainable params: 302,213
Non-trainable params: 0
_______________________________________________

In [13]:
y_true = []
for i in range(y_test.shape[0]):
    y_true.extend(np.argmax(y_test[i], axis=1).tolist())

In [14]:
y_pred = []
for i in range(x_test.shape[0]):
    y_pred.extend(np.argmax(model.predict(x_test[i].reshape(1, 16, 5))[0], axis=1).tolist())

In [15]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true, y_pred)
with open("version/{}/accuracy_3G1P.txt".format(timestamp), "w") as t:
    t.write(str(accuracy.tolist()))

accuracy

0.5024073853010227

In [16]:
from sklearn.metrics import classification_report

report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()
report.to_csv("version/{}/report_3G1P.csv".format(timestamp))
print(report)

precision    recall  f1-score        support
0              0.014039  0.752841  0.027565     704.000000
1              0.000000  0.000000  0.000000     120.000000
2              0.997961  0.993167  0.995558  100544.000000
3              0.088889  0.000808  0.001602   98960.000000
4              0.008679  0.751389  0.017160     720.000000
accuracy       0.502407  0.502407  0.502407       0.502407
macro avg      0.221914  0.499641  0.208377  201048.000000
weighted avg   0.542913  0.502407  0.498825  201048.000000
