In [3]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import datetime

plt.rcParams["figure.figsize"] = (20, 5)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [5]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/fit/" + timestamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
version_dir = "version/" + timestamp 

os.makedirs(version_dir)

In [15]:
param_list = dict()

param_list["PAST_HISTORY"] = 16
param_list["FUTURE_TARGET"] = 8
param_list["TRAIN_SPLIT"] = 100000
param_list["BATCH_SIZE"] = 256
param_list["BUFFER_SIZE"] = 200000
param_list["EVALUATION_INTERVAL"] = 300
param_list["VAL_STEPS"] = 50

In [6]:
pattern_length = 4

dataset = pd.read_csv("data/SEG_dataset_{}.csv".format(pattern_length))
dataset.head()

Unnamed: 0,delta,t,t+1
0,-1587653544,95444983808,93857330264
1,6172925864,87674650304,93847576168
2,-6187909720,93857313352,87669403632
3,-6187918552,93857325056,87669406504
4,-6198466584,93857330264,87658863680


In [10]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(dataset, test_size=0.4, shuffle=False, random_state=42)
train_set, test_set

(             delta            t          t+1
 0      -1587653544  95444983808  93857330264
 1       6172925864  87674650304  93847576168
 2      -6187909720  93857313352  87669403632
 3      -6187918552  93857325056  87669406504
 4      -6198466584  93857330264  87658863680
 ...            ...          ...          ...
 122816       63120  93730480256  93730543376
 122817       65536  93730495504  93730561040
 122818       65536  93730497920  93730563456
 122819   -16673024  93747165384  93730492360
 122820       -5888  93730543376  93730537488
 
 [122821 rows x 3 columns],
            delta            t          t+1
 122821    878848  93730561040  93731439888
 122822 -11182192  93730563456  93719381264
 122823 -11128760  93730492360  93719363600
 122824  53784064  93730537488  93784321552
 122825  52884080  93731439888  93784323968
 ...          ...          ...          ...
 204697  29537720  92593157808  92622695528
 204698  11427864  92611267656  92622695520
 204699  47525224  926

In [12]:
from sklearn.preprocessing import MinMaxScaler
import joblib

minmax_scaler = MinMaxScaler()

train_set_scaled = minmax_scaler.fit_transform(train_set["delta"].values.reshape(-1, 1))
joblib.dump(minmax_scaler, "version/{}/scaler.pkl".format(timestamp))
train_set_scaled

array([[0.39820164],
       [0.89577508],
       [0.10325391],
       ...,
       [0.49999905],
       [0.49892585],
       [0.49999447]])

In [13]:
def generate_timeseries(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, 1)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        labels.append(dataset[i:i+target_size])
    return np.array(data), np.array(labels)

In [16]:
x_train, y_train = generate_timeseries(train_set_scaled, 0, param_list["TRAIN_SPLIT"], param_list["PAST_HISTORY"], param_list["FUTURE_TARGET"])
x_val, y_val = generate_timeseries(train_set_scaled, param_list["TRAIN_SPLIT"], None, param_list["PAST_HISTORY"], param_list["FUTURE_TARGET"])

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).repeat()
train_data = train_data.cache().shuffle(param_list["BUFFER_SIZE"]).batch(param_list["BATCH_SIZE"])
val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val)).repeat()
val_data = val_data.cache().batch(param_list["BATCH_SIZE"])

In [58]:
test_set_scaled = minmax_scaler.fit(test_set)

x_test, y_test = generate_timeseries(test_set_scaled, 0, None, param_list["PAST_HISTORY"], param_list["FUTURE_TARGET"])
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).repeat()
test_data = test_data.cache().batch(param_list["BATCH_SIZE"])

TypeError: object of type 'MinMaxScaler' has no len()

In [51]:
from tensorboard.plugins.hparams import api as hp

HP_NUM_LSTM_1_UNITS = hp.HParam("num_LSTM_1_units", hp.Discrete([32, 64, 128]))
HP_NUM_LSTM_2_UNITS = hp.HParam("num_LSTM_2_units", hp.Discrete([32, 64, 128]))
HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.1, 0.5))

METRIC_ACCURACY = 'mae'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_LSTM_1_UNITS, HP_NUM_LSTM_2_UNITS, HP_DROPOUT],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='mae')]
    )    

In [57]:
def train_test_model(hparams):
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(hparams[HP_NUM_LSTM_1_UNITS], return_sequences=True, input_shape=x_train.shape[-2:]),
        tf.keras.layers.LSTM(hparams[HP_NUM_LSTM_2_UNITS]),
        tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
        tf.keras.layers.Dense(FUTURE_TARGET),
        tf.keras.layers.Activation("relu")
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    model.fit(train_data, epochs=1, steps_per_epoch=EVALUATION_INTERVAL, validation_data=val_data, validation_steps=VAL_STEPS, callbacks=[
        tf.keras.callbacks.TensorBoard(log_dir),  # log metrics
        hp.KerasCallback(log_dir, hparams),  # log hparams
    ],)
    _, mae = model.evaluate(x_test, y_test)
    return model

In [53]:
def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)