In [2]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random

import multiprocessing
    
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
    
os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
tf.keras.mixed_precision.experimental.set_policy(policy) 

In [2]:
print(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1


In [3]:
def create_model(sequence_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
                loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False,layer_activation="linear"):
    model = Sequential()
    for i in range(n_layers):
        if i == 0:
            # first layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True), input_shape=(None, sequence_length)))
            else:
                model.add(cell(units, return_sequences=True, input_shape=(None, sequence_length)))
        elif i == n_layers - 1:
            # last layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=False)))
            else:
                model.add(cell(units, return_sequences=False))
        else:
            # hidden layers
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True)))
            else:
                model.add(cell(units, return_sequences=True))
        # add dropout after each layer
        model.add(Dropout(dropout))
    model.add(Dense(4, activation=layer_activation))
    model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
    return model

In [4]:
#def run_tensorflow():

window_size = 320

# create these folders if they does not exist
# Window size or the sequence length
N_STEPS = (window_size * 8) - 8
# Lookup step, 1 is the next day
#LOOKUP_STEP = int(run_dict[run]["LOOKUP_STEP"])

# test ratio size, 0.2 is 20%
TEST_SIZE = 0.3
# features to use
items = ["close", "ema", "high", "low", "open", "rsi", "sma", "volume"]
day_counts = [f"_{i}" for i in range(0, window_size -1)]
FEATURE_COLUMNS = []
for day_count in day_counts:
    for item in items:
        FEATURE_COLUMNS.append(f"{item}{day_count}")

TARGET_COLUMNS = []
for item in ["close", "high", "low", "open"]:
    TARGET_COLUMNS.append(f"{item}_{window_size - 1}")

# date now
date_now = time.strftime("%Y-%m-%d")

### model parameters

N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 1000
# 40% dropout
DROPOUT = 0.25
# whether to use bidirectional RNNs
BIDIRECTIONAL = True

### training parameters

# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 25

LAYER_ACTIVATION = "elu"

# Stock market
ticker = "MIXED"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{window_size}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-{LAYER_ACTIVATION}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
    model_name += "-b"

#----------------------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------------------#

#try:
if not os.path.isdir("results"):
    os.mkdir("results")

if not os.path.isdir("logs"):
    os.mkdir("logs")

if not os.path.isdir("data"):
    os.mkdir("data")

# load the data
data = pd.read_csv(f"../data/processed/all_processed_{window_size}.csv")

# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
                    dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL, layer_activation=LAYER_ACTIVATION)

# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))

X = data[FEATURE_COLUMNS]
y = data[TARGET_COLUMNS]

# convert to numpy arrays
X = np.array(X)
y = np.array(y)

# reshape X to fit the neural network
X = X.reshape((X.shape[0], 1, X.shape[1]))

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, shuffle=True)

with tf.device('/device:GPU:0'):
    history = model.fit(X_train, y_train,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(X_test, y_test),
                        callbacks=[checkpointer, tensorboard],
                        verbose=1)

model.save(os.path.join("results", model_name) + ".h5")

#except:
#    print("There was an attempt.")
tf.keras.backend.clear_session()

Train on 61009 samples, validate on 26148 samples
Epoch 1/25
Epoch 00001: val_loss improved from inf to 0.32513, saving model to results\2021-12-03_MIXED-320-huber_loss-adam-LSTM-elu-layers-3-units-1000-b.h5
Epoch 2/25
Epoch 00002: val_loss improved from 0.32513 to 0.29487, saving model to results\2021-12-03_MIXED-320-huber_loss-adam-LSTM-elu-layers-3-units-1000-b.h5
Epoch 3/25
Epoch 00003: val_loss improved from 0.29487 to 0.27895, saving model to results\2021-12-03_MIXED-320-huber_loss-adam-LSTM-elu-layers-3-units-1000-b.h5
Epoch 4/25
Epoch 00004: val_loss improved from 0.27895 to 0.26663, saving model to results\2021-12-03_MIXED-320-huber_loss-adam-LSTM-elu-layers-3-units-1000-b.h5
Epoch 5/25
Epoch 00005: val_loss did not improve from 0.26663
Epoch 6/25
Epoch 00006: val_loss improved from 0.26663 to 0.25748, saving model to results\2021-12-03_MIXED-320-huber_loss-adam-LSTM-elu-layers-3-units-1000-b.h5
Epoch 7/25
Epoch 00007: val_loss improved from 0.25748 to 0.25393, saving model to