In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime
import dill

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)  # Off when Distributed Environment

In [15]:
from tensorflow.keras.layers import TimeDistributed, Dense, Conv1D, MaxPooling1D, Bidirectional, LSTM, Dropout

In [16]:
dataset_name = "SEG_CNNLSTM"

In [17]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
timestamp

'20200910-214530'

In [18]:
dataset = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.float32) #np.int64
dataset

array([0., 0., 0., ..., 1., 3., 1.], dtype=float32)

In [19]:
with open("tuner.pkl", "rb") as t:
    tuner = dill.load(t)

tuner

EOFError: Ran out of input

In [27]:
# Static Parameters 
static_params = dict()

static_params["PAST_HISTORY"] = 16
static_params["FUTURE_TARGET"] = 8
static_params["BATCH_SIZE"] = 512
static_params["EPOCHS"] = 1000
static_params["LOSS_FUNCTION"] = 'sparse_categorical_crossentropy'
static_params["VAL_SPLIT"] = 0.2
static_params["METRIC_ACCURACY"] = 'accuracy'
'''
word_index = np.genfromtxt("data/word_index.csv", delimiter="\n", dtype=np.int64)
vocab_size = len(word_index)
static_params["VOCAB_SIZE"] = vocab_size
'''
static_params["VOCAB_SIZE"] = 14482

In [21]:
'''
import dill

with open("static/SparseCategoryEncoderDecoder.pkl", 'rb') as f:
    SparseCategoryEncoderDecoder = dill.load(f)

static_params["VOCAB_SIZE"] = SparseCategoryEncoderDecoder.vocab_size

with open("static/static_params.json", "w") as j :
    json.dump(static_params, j, indent=4)
'''

'\nimport dill\n\nwith open("static/SparseCategoryEncoderDecoder.pkl", \'rb\') as f:\n    SparseCategoryEncoderDecoder = dill.load(f)\n\nstatic_params["VOCAB_SIZE"] = SparseCategoryEncoderDecoder.vocab_size\n\nwith open("static/static_params.json", "w") as j :\n    json.dump(static_params, j, indent=4)\n'

In [22]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [23]:
x_train, y_train = generate_timeseries(dataset, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
x_train.shape, y_train.shape

((14858, 16, 1), (14858, 8, 1))

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().batch(static_params["BATCH_SIZE"]).shuffle(static_params["BUFFER_SIZE"]).repeat()

In [24]:
model = keras.Sequential()
model.add(Conv1D(filters=32 , kernel_size=5, padding='causal', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.45))
model.add(Bidirectional(LSTM(232, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(168, return_sequences=True)))
model.add(Dropout(0.4))
model.add(TimeDistributed(Dense(static_params["VOCAB_SIZE"], activation="softmax")))

model.compile(
    optimizer=keras.optimizers.Nadam(learning_rate=1e-4),
    loss=static_params["LOSS_FUNCTION"],
    metrics=[static_params["METRIC_ACCURACY"]]
)

In [28]:
model_history = model.fit(x_train, y_train, 
batch_size=static_params["BATCH_SIZE"], validation_split=0.2, epochs=static_params["EPOCHS"],
callbacks=[keras.callbacks.EarlyStopping('val_accuracy')]
)

Epoch 1/1000
Epoch 2/1000
