In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill
import IPython

In [3]:
from tensorflow.keras.layers import (
    TimeDistributed, 
    Dense, 
    Conv1D, 
    MaxPooling1D, 
    Bidirectional, 
    LSTM, 
    Dropout
)

In [4]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [58]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/fit/" + timestamp
version_dir = "version/" + timestamp 

os.makedirs(log_dir)
os.makedirs(version_dir)
timestamp

'20200911-114924'

In [8]:
dataset_name = "SEG_CNNLSTM"

In [38]:
with open("static/static_params.json", 'r') as j:
    static_params = json.load(j)

with open("static/test_pipeline.pkl", "rb") as p:
    pipeline = dill.load(p)

static_params["VOCAB_SIZE"] = pipeline["sparse_category_encoder"].vocab_size
static_params

{'PAST_HISTORY': 16,
 'FUTURE_TARGET': 8,
 'BATCH_SIZE': 1024,
 'BUFFER_SIZE': 200000,
 'EPOCHS': 500,
 'VOCAB_SIZE': 16293}

In [54]:
# Hyper Parameters
HP_BATCH_SIZE = hp_api.HParam('batch_size', hp_api.Discrete([1024]))
#HP_EPOCHS = hp.HParam('epochs', hp.Discrete([500]))
HP_CONV1D_FILTERS = hp_api.HParam('conv1d_filters', hp_api.Discrete([80]))
HP_CONV1D_KERNEL_SIZE = hp_api.HParam('conv1d_kernel_size', hp_api.Discrete([3]))
HP_CONV1D_DROPOUT = hp_api.HParam('conv1d_dropout', hp_api.Discrete([0.3]))
HP_LSTM_1_UNITS = hp_api.HParam('lstm_1_units', hp_api.Discrete([40]))
HP_LSTM_2_UNITS = hp_api.HParam('lstm_2_units', hp_api.Discrete([168]))
HP_LSTM_1_DROPOUT = hp_api.HParam('lstm_1_dropout', hp_api.Discrete([0.3]))
HP_LSTM_2_DROPOUT = hp_api.HParam('lstm_2_dropout', hp_api.Discrete([0.2]))
HP_LEARNING_RATE = hp_api.HParam('learning_rate', hp_api.Discrete([1e-3]))


with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp_api.hparams_config(
    hparams=[HP_BATCH_SIZE, HP_CONV1D_FILTERS, HP_CONV1D_KERNEL_SIZE, HP_CONV1D_DROPOUT, HP_LSTM_1_UNITS, HP_LSTM_2_UNITS, HP_LSTM_1_DROPOUT, HP_LSTM_2_DROPOUT, HP_LEARNING_RATE],
    metrics=[hp_api.Metric('accuracy', display_name='Accuracy')],
  )

In [55]:
hparams = {
    HP_CONV1D_FILTERS: HP_CONV1D_FILTERS.domain.values[0],
    HP_CONV1D_KERNEL_SIZE: HP_CONV1D_KERNEL_SIZE.domain.values[0],
    HP_CONV1D_DROPOUT: HP_CONV1D_DROPOUT.domain.values[0],
    HP_LSTM_1_UNITS: HP_LSTM_1_UNITS.domain.values[0],
    HP_LSTM_2_UNITS: HP_LSTM_2_UNITS.domain.values[0],
    HP_LSTM_1_DROPOUT: HP_LSTM_1_DROPOUT.domain.values[0],
    HP_LSTM_2_DROPOUT: HP_LSTM_2_DROPOUT.domain.values[0],
    HP_LEARNING_RATE: HP_LEARNING_RATE.domain.values[0]
}

In [56]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [34]:
train_set = np.r_[np.genfromtxt("data/SEG_train_set.csv", delimiter="\n", dtype=np.float32), np.genfromtxt("data/SEG_val_set.csv", delimiter="\n", dtype=np.float32)]
x_train, y_train = generate_timeseries(train_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().batch(static_params["BATCH_SIZE"]).shuffle(static_params["BUFFER_SIZE"])

In [45]:
test_set = np.genfromtxt("data/SEG_test_set_original.csv", delimiter="\n", dtype=np.float32)
processed_test_set = pipeline.transform(test_set)
x_test, y_test = generate_timeseries(processed_test_set, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.float32)

In [57]:
model = keras.Sequential()
model.add(Conv1D(filters=HP_CONV1D_FILTERS.domain.values[0], kernel_size=HP_CONV1D_KERNEL_SIZE.domain.values[0], padding='causal', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(HP_CONV1D_DROPOUT.domain.values[0]))
model.add(Bidirectional(LSTM(HP_LSTM_1_UNITS.domain.values[0], return_sequences=True)))
model.add(Dropout(HP_LSTM_1_DROPOUT.domain.values[0]))
model.add(Bidirectional(LSTM(HP_LSTM_2_UNITS.domain.values[0], return_sequences=True)))
model.add(Dropout(HP_LSTM_2_DROPOUT.domain.values[0]))
model.add(TimeDistributed(Dense(static_params["VOCAB_SIZE"], activation="softmax")))

model.compile(
    optimizer=keras.optimizers.Nadam(HP_LEARNING_RATE.domain.values[0]),
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy']
)

In [59]:
with tf.summary.create_file_writer('logs/hparam_tuning/' + timestamp).as_default():
    hp.hparams(hparams)
    history, accuracy = model.fit(train_data, epochs=static_params["EPOCHS"], callbacks=[keras.callbacks.EarlyStopping('accuracy', patience=3)])
    #tf.summary.scalar('accuracy', accuracy, step=1)

Epoch 152/500


TypeError: cannot unpack non-iterable History object

In [60]:
model.evaluate(x_test, y_test)



[3.302696704864502, 0.615440845489502]

In [61]:
model.save("best_model_2.h5") 