In [56]:
import tensorflow as tf
from tensorflow import keras
import kerastuner
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [67]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
'''
log_dir = "logs/fit/" + timestamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
'''
version_dir = "version/" + timestamp 

os.makedirs(version_dir)
timestamp

'20200826-201949'

In [17]:
dataset_name = "SEG_2"

In [18]:
dataset = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.float32) #np.int64
dataset

array([0., 0., 0., ..., 1., 3., 1.], dtype=float32)

In [19]:
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
vocab_size

14882

In [60]:
param_list = dict()

param_list["PAST_HISTORY"] = 16
param_list["FUTURE_TARGET"] = 8
param_list["BATCH_SIZE"] = 128
param_list["EPOCHS"] = 100
param_list["BUFFER_SIZE"] = 200000
param_list["VOCAB_SIZE"] = vocab_size
param_list["LEARNING_RATE"] = 0.01
param_list["NUM_1_NEURONS"] = 177
param_list["NUM_2_NEURONS"] = 177
param_list["DROPOUT_1"] = 0.1
param_list["DROPOUT_2"] = 0.2

In [61]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [62]:
x_train, y_train = generate_timeseries(dataset, 0, None, param_list["PAST_HISTORY"], param_list["FUTURE_TARGET"])
x_train.shape, y_train.shape

((14858, 16, 1), (14858, 8, 1))

In [63]:
x_train[10], y_train[10]

(array([[  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [  0.],
        [933.],
        [  0.],
        [  0.],
        [  0.],
        [  0.]], dtype=float32),
 array([[ 0.],
        [ 0.],
        [48.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.]], dtype=float32))

In [64]:
model = keras.models.Sequential()
model.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_1_NEURONS"])))
model.add(keras.layers.Dropout(param_list["DROPOUT_1"]))
model.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(param_list["NUM_2_NEURONS"], return_sequences=True)))
model.add(keras.layers.Dropout(param_list["DROPOUT_2"]))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(param_list["VOCAB_SIZE"], activation='softmax')))
model.compile(optimizer=keras.optimizers.Adam(param_list["LEARNING_RATE"]), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [65]:
model_history = model.fit(x_train, y_train, batch_size=param_list["BATCH_SIZE"], validation_split=0.2, epochs=param_list["EPOCHS"])

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [66]:
result = model.predict(x_train[10000].reshape(1, -1, 1))
result, result.shape

(array([[[6.1038168e-05, 9.8713338e-01, 6.2270291e-09, ...,
          6.7727048e-21, 2.3094301e-11, 2.6334306e-11],
         [8.0672351e-07, 2.2673817e-02, 8.1424520e-12, ...,
          4.0316848e-22, 1.2918821e-14, 1.5611544e-14],
         [2.5613365e-06, 7.0023197e-01, 9.7059321e-09, ...,
          5.1029115e-18, 2.8300067e-13, 3.4164991e-13],
         ...,
         [1.6759045e-06, 4.8838500e-02, 2.2756441e-10, ...,
          2.6029939e-18, 4.0208989e-14, 4.7051952e-14],
         [5.7494722e-06, 6.5169364e-01, 1.1887892e-08, ...,
          2.3820406e-16, 8.6826640e-13, 1.0649063e-12],
         [2.2729300e-06, 5.4909896e-02, 7.6255331e-09, ...,
          1.0153624e-17, 2.8463692e-12, 3.1653406e-12]]], dtype=float32),
 (1, 8, 14882))

In [68]:
model.save("version/{}/model.h5".format(timestamp))