In [1]:
import sys
if not './' in sys.path:
    sys.path.append('./')

In [2]:
import tensorflow as tf

# from tensorflow.keras.mixed_precision import experimental as mixed_precision
import pandas as pd
import numpy as np
import io
import os

from envs.stocks_env_multiaction import Stocks_env
from datasets import nyse
# from models.lstm_selfattention_embedding import ActorCritic
from models.lstm import ActorCritic

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
        
print(tf.config.experimental.list_logical_devices('GPU'))
tf.test.is_gpu_available()

# set up policy used in mixed precision
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)

1 Physical GPUs, 1 Logical GPUs
[LogicalDevice(name='/device:GPU:0', device_type='GPU')]
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
#data = nyse.load_data('../data/')
data, tokenized_industry, vocabulary_size = nyse.load_data_with_industry('../data/', column='GICS Sector')

In [5]:
# Hyper params:
lr               = 1e-4
run_lenght       = 30
window_size      = 5
hidden_dim       = 512
num_filters      = 128
lstm_units       = 1024
num_blocks       = 1
embedding_out    = 6
in_lstm_units    = 16
initial_money    = 100
test_seed        = 42
train_test_ratio = 0.2


# log
tested_model = 'best-lstm-2021_03_29-04:11:02'
save_directory = 'results/test-all/'
models_directory = 'results/models/'
identifier = "test-"+tested_model
test_summary_writer = tf.summary.create_file_writer('results/summaries/test/' + identifier)
mean_test_reward = tf.keras.metrics.Mean(name='mean_test_reward')


In [6]:
# initialize env
env = Stocks_env(data, window_size, run_lenght, random_reset=True, train_test_ratio=train_test_ratio,
                 tokenized_industry=tokenized_industry, test_seed=test_seed, initial_money=initial_money)
batch_size  = len(env.get_test_symbols())
num_inputs  = env.get_observation_space()
num_policies = env.get_action_space()

In [7]:
# initialize the model
model = ActorCritic(num_policies = num_policies, hidden_dim=hidden_dim, num_filters=num_filters, 
                    lstm_units=lstm_units, text_lenght=tokenized_industry.shape[1], 
                    vocabulary_size=vocabulary_size, embedding_out=embedding_out, in_lstm_units = in_lstm_units)
optimizer = tf.keras.optimizers.Adam(lr)
state = env.reset(training=False, batch_size=batch_size)
model(state[0], state[1])
model.load_weights(models_directory + tested_model + '.h5')

OSError: Unable to open file (unable to open file: name = 'results/models/best-lstm-2021_03_29-04:11:02.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
def test_env(record_days=False):
    state = env.reset(training=False, batch_size=batch_size, run_lenght=run_lenght, initial_money=initial_money)
    done = False
    operation_array = []
    days_array = []
    rewards_array = []
    total_profit = np.zeros(batch_size)
    while not done:
        _, dist = model(state[0], state[1])
        next_state, reward, done, operations, day, profit = env.step(dist.sample())
        state = next_state
        if record_days:
            operation_array.append(np.array(operations))
            days_array.append(np.array(day))
            rewards_array.append(np.array(reward))
        mean_test_reward(np.array(reward))
        total_profit += profit
    total_profit = total_profit/initial_money
    return operation_array, days_array, rewards_array, total_profit

In [None]:
repeat = 500

test_total_profits = []

for i in range(repeat):

    operation_array, days_array, rewards_array, test_total_profit = test_env(record_days=True)
    test_total_profits.append(test_total_profit)

    with test_summary_writer.as_default():
        tf.summary.scalar('mean_test_reward', mean_test_reward.result(), step=i)

    # serialize weights to HDF5
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    if not os.path.exists(save_directory+'operations/'):
        os.makedirs(save_directory+'operations/')
    if not os.path.exists(save_directory+'endingdays/'):
        os.makedirs(save_directory+'endingdays/')
    if not os.path.exists(save_directory+'rewards/'):
        os.makedirs(save_directory+'rewards/')
    if not os.path.exists(save_directory+'profits/'):
        os.makedirs(save_directory+'profits/')
    pd.DataFrame(operation_array).to_csv(save_directory+"operations/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(days_array).to_csv(save_directory+"endingdays/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(rewards_array).to_csv(save_directory+"rewards/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(test_total_profits).to_csv(save_directory+"profits/{}.csv".format(identifier),
                                            index=None)
    mean_test_reward.reset_states()

    print("{}: {}".format(i, np.mean(test_total_profits)))

In [None]:
# buy and hold
total_profit = 0
for symbol in env.get_test_symbols():
    sym_data = data.loc[data.symbol==symbol].drop(["symbol"], axis=1)
    profit = (sym_data.close.iloc[sym_data.shape[0]-1]-sym_data.close.iloc[0])/sym_data.close.iloc[0]/len(sym_data)
    total_profit += profit
total_profit/len(env.get_test_symbols())*30

In [None]:
# best
total_profit = 0
for symbol in env.get_test_symbols():
    sym_data = data.loc[data.symbol==symbol].drop(["symbol"], axis=1)
    profit = abs(sym_data.close.iloc[sym_data.shape[0]-1]-sym_data.close.iloc[0])/sym_data.close.iloc[0]/len(sym_data)
    total_profit += profit
total_profit/len(env.get_test_symbols())*30