In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill

In [2]:
from tensorflow.keras.layers import (
    TimeDistributed, 
    Dense, 
    Conv1D, 
    MaxPooling1D, 
    Bidirectional, 
    LSTM, 
    Dropout
)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/" + timestamp
version_dir = "version/" + timestamp 

os.makedirs(log_dir)
os.makedirs(version_dir)
timestamp

'20200916-184946'

In [5]:
dataset_name = "SEG_AR"

In [7]:
with open("static/static_params.json", 'r') as j:
    static_params = json.load(j)
static_params

{'PAST_HISTORY': 16,
 'FUTURE_TARGET': 8,
 'BATCH_SIZE': 1024,
 'BUFFER_SIZE': 200000,
 'EPOCHS': 500}

In [7]:
hparams = {
    "HP_BATCH_SIZE" : 1024,
    "HP_CONV1D_FILTERS" : 80,
    "HP_CONV1D_KERNEL_SIZE" : 3,
    "HP_CONV1D_DROPOUT" : 0.3,
    "HP_LSTM_1_UNITS" : 40,
    "HP_LSTM_2_UNITS" : 168,
    "HP_LSTM_1_DROPOUT" : 0.3,
    "HP_LSTM_2_DROPOUT" : 0.2,
    "HP_LEARNING_RATE" : 1e-3
}

In [8]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [9]:
train_set = np.genfromtxt("data/SEG_train_set.csv", delimiter="\n", dtype=np.float32)
x_train, y_train = generate_timeseries(train_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().batch(static_params["BATCH_SIZE"]).shuffle(static_params["BUFFER_SIZE"])

In [10]:
val_set = np.genfromtxt("data/SEG_val_set.csv", delimiter="\n", dtype=np.float32)
x_val, y_val = generate_timeseries(val_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_data = val_data.cache().batch(static_params["BATCH_SIZE"])

In [12]:
model = keras.Sequential()
model.add(Conv1D(filters=hparams["HP_CONV1D_FILTERS"], kernel_size=hparams["HP_CONV1D_KERNEL_SIZE"], padding='causal', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(hparams["HP_CONV1D_DROPOUT"]))
model.add(Bidirectional(LSTM(hparams["HP_LSTM_1_UNITS"], return_sequences=True)))
model.add(Dropout(hparams["HP_LSTM_1_DROPOUT"]))
model.add(Bidirectional(LSTM(hparams["HP_LSTM_2_UNITS"], return_sequences=True)))
model.add(Dropout(hparams["HP_LSTM_2_DROPOUT"]))
model.add(TimeDistributed(Dense(static_params["VOCAB_SIZE"], activation="softmax")))

model.compile(
    optimizer=keras.optimizers.Nadam(hparams["HP_LEARNING_RATE"]),
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy']
)

In [20]:
with tf.summary.create_file_writer(log_dir).as_default():
    hp_api.hparams(hparams)
    history = model.fit(train_data, validation_data=val_data, epochs=1, callbacks=[
        keras.callbacks.EarlyStopping('val_accuracy', patience=3),
        keras.callbacks.TensorBoard(log_dir)
        ])
    #tf.summary.scalar('accuracy', accuracy, step=1)



In [61]:
model.save(version_dir + "/model.h5") 