In [66]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [67]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
'''
log_dir = "logs/fit/" + timestamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
version_dir = "version/" + timestamp 

os.makedirs(version_dir)
'''
timestamp

'20200825-150332'

In [68]:
dataset_name = "SEG_2"

In [266]:
dataset = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)
dataset

array([90096539952,      528712,       73032, ...,  5992956672,
        -753929088,   639534672], dtype=int64)

In [267]:
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
vocab_size

225

In [91]:
param_list = dict()

param_list["PAST_HISTORY"] = 16
param_list["FUTURE_TARGET"] = 8
param_list["BATCH_SIZE"] = 128
param_list["EPOCHS"] = 1
param_list["BUFFER_SIZE"] = 200000
param_list["VOCAB_SIZE"] = vocab_size
param_list["EMBEDDING_DIM"] = 128
param_list["NUM_1_NEURONS"] = 64
param_list["NUM_2_NEURONS"] = 64

In [198]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        #data.append(np.reshape(dataset[indices], (history_size, 1)))
        data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [199]:
x_train, y_train = generate_timeseries(dataset, 0, None, param_list["PAST_HISTORY"], param_list["FUTURE_TARGET"])
x_train.shape, y_train.shape

((14858, 16), (14858, 8, 1))

In [200]:
x_train[0], y_train[0]

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64),
 array([[  0],
        [  0],
        [  0],
        [  0],
        [  0],
        [933],
        [  0],
        [  0]], dtype=int64))

In [172]:
model_1 = keras.models.Sequential()
model_1.add(keras.layers.Embedding(param_list["VOCAB_SIZE"], param_list["EMBEDDING_DIM"]))
'''
model.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_1_NEURONS"])))
model.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_2_NEURONS"], return_sequences=True)))
model.add(keras.layers.TimeDistributed(tf.keras.layers.Dense(param_list["EMBEDDING_DIM"], activation="relu")))
model.add(tf.keras.layers.Dense(1, activation="relu"))
'''
model_1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [180]:
result_1 = model_1.predict(x_train[0].reshape(1, -1))
result_1, result_1.shape

(array([[[ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115],
         [ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115],
         [ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115],
         ...,
         [ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115],
         [ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115],
         [ 0.03348713, -0.0321422 , -0.0002039 , ..., -0.01538205,
          -0.00037723, -0.04587115]]], dtype=float32),
 (1, 16, 128))

In [181]:
model_2 = keras.models.Sequential()
model_2.add(keras.layers.Embedding(param_list["VOCAB_SIZE"], param_list["EMBEDDING_DIM"]))
model_2.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_1_NEURONS"])))
'''
model.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_2_NEURONS"], return_sequences=True)))
model.add(keras.layers.TimeDistributed(tf.keras.layers.Dense(param_list["EMBEDDING_DIM"], activation="relu")))
model.add(tf.keras.layers.Dense(1, activation="relu"))
'''
model_2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [184]:
result_2 = model_2.predict(x_train[0].reshape(1, -1))
result_2, result_2.shape

(array([[-0.0323175 ,  0.01080733, -0.02124918, -0.03182497,  0.0005349 ,
          0.03013883,  0.00514005, -0.00704384,  0.00064479, -0.02233502,
         -0.03173504,  0.03177533,  0.02380591, -0.04436284, -0.0002885 ,
          0.03022791, -0.00095697,  0.00453   ,  0.0079164 , -0.01633008,
         -0.01089192, -0.0180753 ,  0.02040696, -0.02448608,  0.04001582,
          0.01959519,  0.0419482 ,  0.03068886, -0.05885827, -0.01642068,
         -0.01179666,  0.02060026,  0.00935058, -0.00333466, -0.03220222,
         -0.00065259,  0.03831847, -0.00028345, -0.02940718,  0.05537249,
          0.0318084 ,  0.03244494,  0.03810416,  0.01265741,  0.03713877,
         -0.0210963 , -0.01847055, -0.03113708,  0.00825988,  0.0211153 ,
          0.02339498, -0.01074204, -0.0211601 ,  0.03770156, -0.03256163,
          0.00466607, -0.02375534, -0.00655294, -0.02509974,  0.00518663,
          0.06740782, -0.01615395,  0.01820631, -0.05055143,  0.01211054,
         -0.0065441 , -0.03464903, -0.

In [185]:
model_3 = keras.models.Sequential()
model_3.add(keras.layers.Embedding(param_list["VOCAB_SIZE"], param_list["EMBEDDING_DIM"]))
model_3.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_1_NEURONS"])))
model_3.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
'''
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_2_NEURONS"], return_sequences=True)))
model.add(keras.layers.TimeDistributed(tf.keras.layers.Dense(param_list["EMBEDDING_DIM"], activation="relu")))
model.add(tf.keras.layers.Dense(1, activation="relu"))
'''
model_3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [186]:
result_3 = model_3.predict(x_train[0].reshape(1, -1))
result_3, result_3.shape

(array([[[-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185],
         [-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185],
         [-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185],
         ...,
         [-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185],
         [-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185],
         [-0.01109056,  0.0153303 ,  0.03808117, ...,  0.01914384,
          -0.00045225,  0.02542185]]], dtype=float32),
 (1, 8, 128))

In [206]:
model = keras.models.Sequential()
model.add(keras.layers.Embedding(param_list["VOCAB_SIZE"], param_list["EMBEDDING_DIM"]))
model.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_1_NEURONS"])))
model.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_2_NEURONS"], return_sequences=True)))
model.add(keras.layers.TimeDistributed(tf.keras.layers.Dense(param_list["EMBEDDING_DIM"])))
#model.add(tf.keras.layers.Dense(1))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [207]:
model_history = model.fit(x_train, y_train, batch_size=param_list["BATCH_SIZE"], validation_split=0.2, epochs=param_list["EPOCHS"])



In [203]:
result = model.predict(x_train[0].reshape(1, -1))
result, result.shape

(array([[[0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.]]], dtype=float32),
 (1, 8, 1))

In [255]:
model_4 = keras.models.Sequential()
model_4.add(keras.layers.Embedding(param_list["VOCAB_SIZE"], param_list["EMBEDDING_DIM"]))
model_4.add(tf.keras.layers.Dense(param_list["EMBEDDING_DIM"]))
#model_4.add(tf.keras.layers.Dense(1))
model_4.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

In [262]:
model_4_history = model_4.fit(x_train, x_train, batch_size=param_list["BATCH_SIZE"], validation_split=0.2, epochs=param_list["EPOCHS"])



In [263]:
result_4 = model_4.predict(x_train[30].reshape(1, -1))
result_4, result_4.shape

(array([[[nan, nan, nan, ..., nan, nan, nan]],
 
        [[nan, nan, nan, ..., nan, nan, nan]],
 
        [[nan, nan, nan, ..., nan, nan, nan]],
 
        ...,
 
        [[nan, nan, nan, ..., nan, nan, nan]],
 
        [[nan, nan, nan, ..., nan, nan, nan]],
 
        [[nan, nan, nan, ..., nan, nan, nan]]], dtype=float32),
 (16, 1, 128))