In [1]:
import sys
if not './' in sys.path:
    sys.path.append('./')

In [2]:
import tensorflow as tf

# from tensorflow.keras.mixed_precision import experimental as mixed_precision
import pandas as pd
import numpy as np
import io
import os

from envs.stocks_env_multiaction import Stocks_env
from datasets import nyse
from models.lstm_selfattention_embedding import ActorCritic

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
        
print(tf.config.experimental.list_logical_devices('GPU'))
tf.test.is_gpu_available()

# set up policy used in mixed precision
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)

1 Physical GPUs, 1 Logical GPUs
[LogicalDevice(name='/device:GPU:0', device_type='GPU')]
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
#data = nyse.load_data('../data/')
data, tokenized_industry, vocabulary_size = nyse.load_data_with_industry('../data/')

In [5]:
# Hyper params:
lr               = 1e-5
run_lenght       = 30
window_size      = 5
hidden_dim       = 512
num_filters      = 128
lstm_units       = 1024
num_blocks       = 1
embedding_out    = 6
in_lstm_units    = 16
initial_money    = 100
test_seed        = None
batch_size       = len(np.unique(data.symbol))

# log
tested_model = 'model-stonks-2021_02_25-07:36:05'
save_directory = 'results/test-all/'
models_directory = 'results/models/'
identifier = "test-2021_02_25-07:36:05"
test_summary_writer = tf.summary.create_file_writer('results/summaries/test/' + identifier)
mean_test_reward = tf.keras.metrics.Mean(name='mean_test_reward')
mean_test_daily_change = tf.keras.metrics.Mean(name='mean_test_daily_change')

In [6]:
# initialize env
env = Stocks_env(data, batch_size, window_size, run_lenght, train_test_ratio=1,
                 tokenized_industry=tokenized_industry, test_seed=test_seed, initial_money=initial_money)
num_inputs  = env.get_observation_space()
num_policies = env.get_action_space()

In [7]:
# initialize the model
model = ActorCritic(num_policies = num_policies, hidden_dim=hidden_dim, num_filters=num_filters, 
                    lstm_units=lstm_units, text_lenght=tokenized_industry.shape[1], 
                    vocabulary_size=vocabulary_size, embedding_out=embedding_out, in_lstm_units = in_lstm_units)
optimizer = tf.keras.optimizers.Adam(lr)
state = env.reset(training=False)
model(state[0], state[1])
model.load_weights(models_directory + tested_model + '.h5')

In [8]:
def test_env(record_days=False):
    state = env.reset(training=False, batch_size=batch_size, run_lenght=run_lenght, initial_money=initial_money)
    done = False
    operation_array = []
    days_array = []
    rewards_array = []
    total_profit = np.zeros(batch_size)
    while not done:
        _, dist = model(state[0], state[1])
        next_state, reward, done, operations, day, daily_change, profit = env.step(dist.sample())
        state = next_state
        if record_days:
            operation_array.append(np.array(operations))
            days_array.append(np.array(day))
            rewards_array.append(np.array(reward))
        mean_test_reward(np.array(reward))
        mean_test_daily_change(np.array(daily_change))
        total_profit += profit
    total_profit = total_profit/initial_money
    return operation_array, days_array, rewards_array, total_profit

In [9]:
repeat = 100

test_total_profits = []

for i in range(repeat):

    print(i)
    operation_array, days_array, rewards_array, test_total_profit = test_env(record_days=True)
    test_total_profits.append(test_total_profit)

    with test_summary_writer.as_default():
        tf.summary.scalar('mean_test_reward', mean_test_reward.result(), step=i)

    # serialize weights to HDF5
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    if not os.path.exists(save_directory+'operations/'):
        os.makedirs(save_directory+'operations/')
    if not os.path.exists(save_directory+'endingdays/'):
        os.makedirs(save_directory+'endingdays/')
    if not os.path.exists(save_directory+'rewards/'):
        os.makedirs(save_directory+'rewards/')
    if not os.path.exists(save_directory+'profits/'):
        os.makedirs(save_directory+'profits/')
    pd.DataFrame(operation_array).to_csv(save_directory+"operations/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(days_array).to_csv(save_directory+"endingdays/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(rewards_array).to_csv(save_directory+"rewards/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(test_total_profits).to_csv(save_directory+"profits/{}.csv".format(identifier),
                                            index=None)
    mean_test_reward.reset_states()
    mean_test_daily_change.reset_states()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
