In [31]:
import tensorflow as tf
from tensorflow import keras
import kerastuner
import numpy as np
import pandas as pd

In [32]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [33]:
dataset_name = "SEG_CNNLSTM_1_retrain"

In [34]:
hyper_tuning_version = "ht_1"

In [35]:
param_list = dict()

param_list["PAST_HISTORY"] = 64     # Hparam
param_list["FUTURE_TARGET"] = 8
param_list["BATCH_SIZE"] = 128
param_list["EPOCHS"] = 1000
param_list["BUFFER_SIZE"] = 200000
param_list["VOCAB_SIZE"] = 16293    #vocab_size

In [36]:
train_set = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)

In [37]:
x_train = tf.data.Dataset.from_tensor_slices(train_set[:-param_list["FUTURE_TARGET"]]).window(param_list["PAST_HISTORY"], 1, 1, True)
# As dataset.window() returns "dataset", not "tensor", need to flat_map() it with sequence length
x_train = x_train.flat_map(lambda x: x.batch(param_list["PAST_HISTORY"])) 
x_train = x_train.map(lambda x: tf.one_hot(x, param_list["VOCAB_SIZE"], axis=-1))
x_train = x_train.batch(param_list["BATCH_SIZE"])

In [38]:
y_train = tf.data.Dataset.from_tensor_slices(train_set[param_list["PAST_HISTORY"]:]).window(param_list["FUTURE_TARGET"], 1, 1, True)
y_train = y_train.flat_map(lambda y: y.batch(param_list["FUTURE_TARGET"]))
y_train = y_train.map(lambda y: tf.one_hot(y, param_list["VOCAB_SIZE"], axis=-1))
y_train = y_train.batch(param_list["BATCH_SIZE"])

In [39]:
train_data = tf.data.Dataset.zip((x_train, y_train))

In [40]:
val_set = np.genfromtxt("data/{}_val_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)

In [41]:
x_val = tf.data.Dataset.from_tensor_slices(val_set[:-param_list["FUTURE_TARGET"]]).window(param_list["PAST_HISTORY"], 1, 1, True)
x_val = x_val.flat_map(lambda x: x.batch(param_list["PAST_HISTORY"]))
x_val = x_val.map(lambda x: tf.one_hot(x, param_list["VOCAB_SIZE"], axis=-1))
x_val = x_val.batch(param_list["BATCH_SIZE"])

In [42]:
y_val = tf.data.Dataset.from_tensor_slices(val_set[param_list["PAST_HISTORY"]:]).window(param_list["FUTURE_TARGET"], 1, 1, True)
y_val = y_val.flat_map(lambda y: y.batch(param_list["FUTURE_TARGET"]))
y_val = y_val.map(lambda y: tf.one_hot(y, param_list["VOCAB_SIZE"], axis=-1))
y_val = y_val.batch(param_list["BATCH_SIZE"])

In [43]:
val_data = tf.data.Dataset.zip((x_val, y_val))

In [44]:
def build_model(hp):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv1D(
        filters=hp.Int("FILTERS", min_value=8, max_value=256, step=8), 
        kernel_size=hp.Int("KERNEL_SIZE", min_value=8, max_value=param_list["PAST_HISTORY"], step=8),
        padding="causal", activation='relu'))
    model.add(keras.layers.MaxPooling1D(
        pool_size=hp.Int("POOL_SIZE", min_value=2, max_value=8, step=2)
        ))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(
        hp.Int("NUM_1_NEURONS", min_value=32, max_value=256, step=8)
        )))
    model.add(keras.layers.Dropout(
        hp.Float("DROPOUT_1", min_value=0.1, max_value=0.5, step=0.05)
        ))
    model.add(keras.layers.RepeatVector(param_list["FUTURE_TARGET"]))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(
        hp.Int("NUM_2_NEURONS", min_value=32, max_value=256, step=8),
        return_sequences=True)))
    model.add(keras.layers.Dropout(
        hp.Float("DROPOUT_2", min_value=0.1, max_value=0.5, step=0.05)
        ))
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(param_list["VOCAB_SIZE"], activation='softmax')))

    model.compile(optimizer=keras.optimizers.Nadam(
        hp.Choice("LEARNING_RATE", [1e-2, 1e-3, 1e-4])),
        loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [45]:
tuner = kerastuner.Hyperband(
    build_model,
    "val_loss",
    max_epochs=10,
    factor=3,
    hyperband_iterations=3,
    #distribution_strategy=tf.distribute.MirroredStrategy(),
    directory="tuner_results",
    project_name=hyper_tuning_version
)

INFO:tensorflow:Reloading Oracle from existing project tuner_results\ht_1\oracle.json


In [46]:
tuner.search(train_data, epochs=10, validation_data=val_data)

Epoch 1/2
      9/Unknown - 6s 693ms/step - loss: 7.8567 - accuracy: 0.5703

KeyboardInterrupt: 