In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill

In [2]:
from tensorflow.keras.layers import (
    Dense, 
    Dropout,
    LSTMCell,
    RNN
)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
log_dir = "ex"

In [5]:
dataset_name = "SEG_AR"

In [6]:
static_params = {
    'PAST_HISTORY': 16,
    'FUTURE_TARGET': 8,
    'BATCH_SIZE': 512,
    'BUFFER_SIZE': 200000,
    'EPOCHS': 500,
    'VOCAB_SIZE': 16293
 }

In [7]:
hparams = {
    "HP_LSTM_1_UNITS" : 64,
    "HP_LSTM_2_UNITS" : 64,
    "HP_LSTM_1_DROPOUT" : 0.5,
    "HP_LSTM_2_DROPOUT" : 0.5,
    "HP_LEARNING_RATE" : 1e-3,
}

In [8]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [9]:
train_set = np.genfromtxt("data/SEG_train_set.csv", delimiter="\n", dtype=np.int32)
x_train, y_train = generate_timeseries(train_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().batch(static_params["BATCH_SIZE"]).shuffle(static_params["BUFFER_SIZE"])

In [10]:
val_set = np.genfromtxt("data/SEG_val_set.csv", delimiter="\n", dtype=np.int32)
x_val, y_val = generate_timeseries(val_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_data = val_data.cache().batch(static_params["BATCH_SIZE"])

In [11]:
class SEGAutoRegressive(keras.Model):
    def __init__(self, units_1, units_2, dropout_1, dropout_2, output_steps, output_size):
        super().__init__()
        self.output_steps = output_steps
        self.units_1 = units_1
        self.units_2 = units_2
        self.dropout_1 = dropout_1
        self.dropout_2 = dropout_2

        self.lstm_cell_1 = LSTMCell(units_1, dropout=dropout_1)
        self.lstm_cell_2 = LSTMCell(units_2, dropout=dropout_2)

        self.lstm_rnn_1 = RNN(self.lstm_cell_1, return_state=True, return_sequences=True)
        self.lstm_rnn_2 = RNN(self.lstm_cell_2, return_state=True)
        self.dense = Dense(output_size, activation="softmax")

    @tf.function#(input_signature=[tf.TensorSpec(shape=[None, None, 1], dtype=tf.int32)])
    def warmup(self, inputs):
        onehot_inputs = tf.squeeze(tf.one_hot(inputs, static_params["VOCAB_SIZE"]), axis=2)

        # inputs.shape => (batch, time, features)
        # x.shape => (batch, lstm_units)
        x_1, *state_1 = self.lstm_rnn_1(onehot_inputs)
        x_2, *state_2 = self.lstm_rnn_2(x_1)

        # predictions.shape => (batch, features)
        prediction = self.dense(x_2)

        return prediction, state_1, state_2

    @tf.function#(input_signature=[tf.TensorSpec(shape=[None, None, 1], dtype=tf.int32)])
    def call(self, inputs, training=None):
        # Use a TensorArray to capture dynamically unrolled outputs.
        #predictions = []
        predictions = tf.TensorArray(tf.float32, size=self.output_steps, clear_after_read=False)

        # Initialize the lstm state
        prediction, state_1, state_2 = self.warmup(inputs)

        # Insert the first prediction
        #predictions.append(prediction)
        predictions = predictions.write(0, prediction)

        # Run the rest of the prediction steps
        for i in tf.range(1, self.output_steps):
            # Use the last prediction as input.
            x = prediction

            # Execute one lstm step.
            x_1, state_1 = self.lstm_cell_1(x, states=state_1, training=training)
            x_2, state_2 = self.lstm_cell_2(x_1, states=state_2, training=training)

            # Convert the lstm output to a prediction.
            prediction = self.dense(x_2)

            # Add the prediction to the output
            #predictions.append(prediction)
            predictions = predictions.write(i, prediction)

        # predictions.shape => (time, batch, features)
        #predictions = tf.stack(predictions)
        predictions = predictions.stack()

        # predictions.shape => (batch, time, features)
        predictions = tf.transpose(predictions, [1, 0, 2])

        return predictions

In [12]:
model = SEGAutoRegressive(
    units_1=hparams["HP_LSTM_1_UNITS"], units_2=hparams["HP_LSTM_2_UNITS"], dropout_1=hparams["HP_LSTM_1_DROPOUT"], 
    dropout_2=hparams["HP_LSTM_2_DROPOUT"], output_steps=static_params["FUTURE_TARGET"], output_size=static_params["VOCAB_SIZE"])
model.compile(
    optimizer=keras.optimizers.Nadam(hparams["HP_LEARNING_RATE"]),
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy']
)

In [13]:
model.predict(x_train[2].reshape(1, -1, 1))

array([[[6.1402556e-05, 6.1380131e-05, 6.1333849e-05, ...,
         6.1377206e-05, 6.1327686e-05, 6.1369428e-05],
        [6.1399835e-05, 6.1381084e-05, 6.1335770e-05, ...,
         6.1379462e-05, 6.1327475e-05, 6.1366438e-05],
        [6.1396109e-05, 6.1382001e-05, 6.1339226e-05, ...,
         6.1382154e-05, 6.1328763e-05, 6.1363004e-05],
        ...,
        [6.1386723e-05, 6.1383231e-05, 6.1351078e-05, ...,
         6.1388018e-05, 6.1337836e-05, 6.1354665e-05],
        [6.1384831e-05, 6.1383085e-05, 6.1354403e-05, ...,
         6.1388768e-05, 6.1341467e-05, 6.1353312e-05],
        [6.1383471e-05, 6.1382736e-05, 6.1357263e-05, ...,
         6.1388964e-05, 6.1345039e-05, 6.1352745e-05]]], dtype=float32)

In [14]:
for x, y in train_data.take(1):
    x_sample = x

In [15]:
#history = model.fit(train_data, epochs=1)

In [16]:
model.summary()

Model: "seg_auto_regressive"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
lstm_cell (LSTMCell)         multiple                  4187648
_________________________________________________________________
lstm_cell_1 (LSTMCell)       multiple                  33024
_________________________________________________________________
rnn (RNN)                    multiple                  4187648
_________________________________________________________________
rnn_1 (RNN)                  multiple                  33024
_________________________________________________________________
dense (Dense)                multiple                  1059045
Total params: 5,279,717
Trainable params: 5,279,717
Non-trainable params: 0
_________________________________________________________________


In [17]:
tf.saved_model.save(model, "ex/model_4", 
    signatures=model.call.get_concrete_function(tf.TensorSpec(shape=[None, None, 1], dtype=tf.int32, name="input")))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ex/model_4\assets


In [10]:
new_model = tf.saved_model.load("version/20200918-103208")

In [11]:
new_model.signatures.items()

ItemsView(_SignatureMap({'serving_default': <tensorflow.python.saved_model.load._WrapperFunction object at 0x000002B1A7670F88>}))

In [12]:
inference = new_model.signatures["serving_default"]
inference(tf.constant(x_train[0].reshape(1, -1, 1)))

{'output_0': <tf.Tensor: shape=(1, 8, 16293), dtype=float32, numpy=
 array([[[2.61560619e-01, 1.05555393e-01, 3.22286412e-02, ...,
          1.17059717e-05, 8.97260907e-06, 1.40956763e-05],
         [2.61344552e-01, 1.05482891e-01, 3.22125517e-02, ...,
          1.17148556e-05, 8.97976952e-06, 1.41057408e-05],
         [2.61348903e-01, 1.05484381e-01, 3.22129093e-02, ...,
          1.17146810e-05, 8.97961036e-06, 1.41055179e-05],
         ...,
         [2.61362463e-01, 1.05488814e-01, 3.22138779e-02, ...,
          1.17141390e-05, 8.97916107e-06, 1.41048795e-05],
         [2.61363626e-01, 1.05489217e-01, 3.22139934e-02, ...,
          1.17140789e-05, 8.97913105e-06, 1.41048322e-05],
         [2.61364222e-01, 1.05489418e-01, 3.22140381e-02, ...,
          1.17140607e-05, 8.97910104e-06, 1.41047849e-05]]], dtype=float32)>}