In [54]:
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp_api
import kerastuner
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import json
import datetime
import dill

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)  # Off when Distributed Environment

In [3]:
dataset_name = "SEG"

In [4]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
timestamp

'20200828-121953'

In [11]:
dataset = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.float32) #np.int64
dataset

array([0., 0., 0., ..., 1., 3., 1.], dtype=float32)

# Static Parameters 
static_params = dict()

static_params["PAST_HISTORY"] = 16
static_params["FUTURE_TARGET"] = 8
static_params["BATCH_SIZE"] = 1024
static_params["ACTIVATION"] = 'softmax'
static_params["LOSS_FUNCTION"] = 'sparse_categorical_crossentropy'
static_params["VAL_SPLIT"] = 0.2
static_params["METRIC_ACCURACY"] = 'accuracy'
static_params["OPTIMIZER"] = 'adam'

import dill

with open("static/SparseCategoryEncoderDecoder.pkl", 'rb') as f:
    SparseCategoryEncoderDecoder = dill.load(f)

static_params["VOCAB_SIZE"] = SparseCategoryEncoderDecoder.vocab_size

with open("static/static_params.json", "w") as j :
    json.dump(static_params, j, indent=4)

In [52]:
with open("static/static_params.json", "r") as j :
    static_params = json.load(j)
static_params

{'PAST_HISTORY': 16,
 'FUTURE_TARGET': 8,
 'BATCH_SIZE': 1024,
 'ACTIVATION': 'softmax',
 'LOSS_FUNCTION': 'sparse_categorical_crossentropy',
 'VAL_SPLIT': 0.2,
 'METRIC_ACCURACY': 'accuracy',
 'OPTIMIZER': 'adam',
 'VOCAB_SIZE': 14882}

In [12]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, n_feature)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        #data.append(dataset[indices])
        labels.append(np.reshape(dataset[i:i+target_size], (target_size, 1)))
        #labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [15]:
x_train, y_train = generate_timeseries(dataset, 0, None, static_params["PAST_HISTORY"], static_params["FUTURE_TARGET"])
x_train.shape, y_train.shape

((14858, 16, 1), (14858, 8, 1))

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().batch(static_params["BATCH_SIZE"]).shuffle(static_params["BUFFER_SIZE"]).repeat()

In [17]:
def tensorboard_callback(log_dir, hist_freq=1):
    return keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=hist_freq)

In [41]:
def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(hp.Int("layer_1_units", min_value=32, max_value=256, step=8))))#step=30
    model.add(keras.layers.Dropout(hp.Float("layer_1_dropout", min_value=0.1, max_value=0.5, step=0.05)))
    model.add(keras.layers.RepeatVector(static_params["FUTURE_TARGET"]))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(hp.Int("layer_2_units", min_value=32, max_value=256, step=8), return_sequences=True)))
    model.add(keras.layers.Dropout(hp.Float("layer_2_dropout", min_value=0.1, max_value=0.5, step=0.05)))
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(static_params["VOCAB_SIZE"], activation=static_params["ACTIVATION"])))
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=static_params["LOSS_FUNCTION"],
        metrics=[static_params["METRIC_ACCURACY"]]
    )
    return model

In [48]:
tuner = kerastuner.tuners.Hyperband(
    hypermodel=build_model,
    objective='val_accuracy',
    max_epochs=250,
    factor=2,
    hyperband_iterations=3,
    distribution_strategy=tf.distribute.MirroredStrategy(),
    tune_new_entries=True,
    directory="hyper_results",
    project_name="SEG"
)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [50]:
tuner.search(
    x_train,
    y_train,
    validation_split=static_params["VAL_SPLIT"],
    epochs=250,
    callbacks=[tf.keras.callbacks.EarlyStopping('val_accuracy')] #tensorboard_callback("logs/fit/" + timestamp)
)

Epoch 1/2
Epoch 2/2


Epoch 1/2
Epoch 2/2


Epoch 1/2
Epoch 2/2


Epoch 1/2
Epoch 2/2


Epoch 1/2
Epoch 2/2


Epoch 1/2
INFO:tensorflow:Error reported to Coordinator: 
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\coordinator.py", line 297, in stop_on_exception
    yield
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\mirrored_strategy.py", line 165, in _call_for_each_replica
    t.has_paused.wait()
  File "C:\ProgramData\Anaconda3\lib\threading.py", line 552, in wait
    signaled = self._cond.wait(timeout)
  File "C:\ProgramData\Anaconda3\lib\threading.py", line 296, in wait
    waiter.acquire()
KeyboardInterrupt


KeyboardInterrupt: 

In [55]:
with open("hyper_results/SEG/tuner.pkl", 'wb') as f:
    dill.dump(tuner, f)