In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

from data_util import *

from data_generator import DataGenerator

from tensorflow.keras.layers import BatchNormalization, GlobalMaxPool1D, Bidirectional, Dense, Flatten, Conv2D, LeakyReLU, Dropout, LSTM, GRU, Input
from tensorflow.keras import Model, Sequential
from tensorflow.keras import datasets, layers, models
import tensorflow as tf
#import tensorflow_addons as tfa

from tensorboard.plugins.hparams import api as hp

%load_ext tensorboard
print(tf.__version__)

2.5.0-rc3


In [2]:
def reset_seed():
    seed_value= 0
    # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
    import os
    os.environ['PYTHONHASHSEED']=str(seed_value)
    # 2. Set `python` built-in pseudo-random generator at a fixed value
    import random
    random.seed(seed_value)
    # 3. Set `numpy` pseudo-random generator at a fixed value
    import numpy as np
    np.random.seed(seed_value)
    # 4. Set the `tensorflow` pseudo-random generator at a fixed value
    import tensorflow as tf
    tf.random.set_seed(seed_value)


In [13]:
#Model 

def add_deep_layers(x, drop, units):
    #x = BatchNormalization()(input_layer)
    x = Dropout(drop)(x)
    x = Dense(units, activation='relu')(x)
    #x = Dropout(drop)(x)
    x = Dense(units, activation='relu')(x)
    return x

def add_attention(x):
    attention = tf.keras.layers.Attention(use_scale=True)([x, x])
    x = attention
    return x

def generate_model(seqs, features, dim, dropout, lays, lays_seq):
    inputX = Input(shape=(seqs, features))
    #x = Dense(dim)(inputX)
    x = inputX
    #x = add_attention(x)
    x = LSTM(units=dim, return_sequences=True)(x)
    for lay in range(int(lays_seq)):
        x = add_deep_layers(x, dropout, dim)
    
    x = Bidirectional(LSTM(dim, return_sequences=False))(x)
    #x = LSTM(dim, return_sequences=False)(x)
    
    for lay in range(int(lays)):
        x = add_deep_layers(x, dropout, dim)
        
    x = add_deep_layers(x, dropout, 50)
    x = Dense(20, activation='relu')(x)
    x = Dense(2, activation='softmax',
                                  kernel_regularizer=tf.keras.regularizers.l2(0.0001))(x)

    return Model(inputs=[inputX], outputs=x)

path = "./data/"


In [14]:

HP_NUM_SEQS = hp.HParam('num_seqs', hp.Discrete([50, 100, 150]))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([150, 600]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.5]))
HP_LAY_SEQ = hp.HParam('layers_seq', hp.Discrete([5]))
HP_LAY = hp.HParam('layers', hp.Discrete([5, 8]))

METRIC_ACCURACY = 'accuracy'
METRIC_LOSS = 'loss'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_SEQS, HP_NUM_UNITS, HP_DROPOUT, HP_LAY_SEQ, HP_LAY ],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )
  
trainX, trainY, positiveX, positiveY, negativeX, negativeY = load_data("omgusd", "train", path, balanced = False)
print("Loaded: {} {} ".format(trainX.shape, trainY.shape))
features = trainX.shape[-1]

def train_test_model(hparams):
    reset_seed()
    
    dim = hparams[HP_NUM_UNITS]
    seqs = hparams[HP_NUM_SEQS]
    dropout = hparams[HP_DROPOUT]
    lays = hparams[HP_LAY]
    lays_seq = hparams[HP_LAY_SEQ]

    x, y = create_dataset(trainX, trainY, seqs)
    
    model = generate_model(seqs = seqs,
                           features = features,
                           dim = dim,
                           dropout = dropout,
                           lays = lays,
                           lays_seq = lays_seq
                          )
    
    #radam = tfa.optimizers.RectifiedAdam()
    #ranger = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)
    model.compile(
        #optimizer=ranger,
        optimizer=tf.keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95, epsilon=1e-07),
        loss=tf.losses.CategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    history = model.fit(
        x,
        y,
        batch_size = 64*2,
        shuffle=True,
        #steps_per_epoch = len(train_generator),
        epochs=10,
        verbose=1
    )
    
    model_loaded = model
    
    def average(lst): 
        return sum(lst) / len(lst)
    
    def get_acc(db):
        print("evaluating {}".format(db))
        valX, valY, positiveX_, positiveY_, negativeX_, negativeY_ = load_data(db, "val", path, balanced = True)
        pos_x, pos_y = create_dataset(positiveX_, positiveY_, seqs)
        neg_x, neg_y = create_dataset(negativeX_, negativeY_, seqs)
        _, acc_pos = model_loaded.evaluate(pos_x, pos_y)
        _, acc_neg = model_loaded.evaluate(neg_x, neg_y)
        return average([acc_pos, acc_neg])
    
    evals = []
    evals.append(get_acc(""))
    #evals.append(get_acc("btcusd"))
    #evals.append(get_acc("ethusd"))
    #evals.append(get_acc("ltcusd"))
    accuracy = average(evals)
    print("accuracy: {}".format(accuracy))
    return accuracy

Loaded: (212129, 6) (212129, 2) 


In [15]:
best_hparams = {}
best_acc = 0

def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
        

In [None]:
session_num = 0

hparams_list = []

for num_seqs in HP_NUM_SEQS.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for dropout_rate in HP_DROPOUT.domain.values:
            for lay_seq in HP_LAY_SEQ.domain.values:
                for lay in HP_LAY.domain.values:
                    hparams = {
                    HP_NUM_SEQS: num_seqs,
                    HP_NUM_UNITS: num_units,
                    HP_DROPOUT: dropout_rate,
                    HP_LAY_SEQ: lay_seq,
                    HP_LAY: lay
                    }
                    hparams_list.append(hparams)

print("Total hparams: {}".format(len(hparams_list)))

for hparams in hparams_list:
    run_name = "run-%d" % session_num
    print('--- Starting trial: %s' % run_name)
    print({h.name: hparams[h] for h in hparams})
    run_name = 'logs/hparam_tuning/' + run_name
    run(run_name, hparams)
    session_num += 1
    
print("Best acc {} hparams {}".format(best_acc, best_hparams))

Total hparams: 12
--- Starting trial: run-0
{'num_seqs': 50, 'num_units': 150, 'dropout': 0.5, 'layers_seq': 5, 'layers': 5}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
evaluating 
accuracy: 0.5
--- Starting trial: run-1
{'num_seqs': 50, 'num_units': 150, 'dropout': 0.5, 'layers_seq': 5, 'layers': 8}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
evaluating 
accuracy: 0.5
--- Starting trial: run-2
{'num_seqs': 50, 'num_units': 600, 'dropout': 0.5, 'layers_seq': 5, 'layers': 5}
Epoch 1/10

In [None]:
%tensorboard --logdir logs/hparam_tuning

In [None]:
#tf.keras.utils.plot_model(model, 'multi_input_and_output_model.png', show_shapes=True)

In [None]:


checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss',
                             verbose=1,
                             save_best_only=True,
                             load_weights_on_restart=True)

callbacks_list = []

history = model.fit_generator(train_generator, 
                              validation_data=val_generator, 
                              validation_steps=len(val_generator)-1,
                              steps_per_epoch=len(train_generator)-1, 
                              epochs=20, verbose=1, 
                              callbacks=callbacks_list)


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
import datetime
print (datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))

In [None]:
    inputX = Input(shape=(seqs, features))
    x = Dense(dim)(inputX)
    x = BatchNormalization()(x)
    x = LSTM(units=dim, return_sequences=True)(x)
    for lay in range(int(lays_seq)):
        x = add_deep_layers(x, dropout, dim)
    x = LSTM(dim, return_sequences=False)(x)
    
    for lay in range(int(lays)):
        x = add_deep_layers(x, dropout, dim)
        
    x = add_deep_layers(x, dropout, 20)
    x = Dense(10)(x)
    x = Dense(1, activation='sigmoid')(x)