In [1]:
import sys
if not './' in sys.path:
    sys.path.append('./')

In [2]:
import pandas as pd
import numpy as np
import io
import os
from datetime import datetime
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from envs.stocks_env_multiaction import Stocks_env
from datasets import nyse
import tensorflow as tf

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
#data = nyse.load_data('../data/')
data, _, _ = nyse.load_data_with_industry('../data/')

In [4]:
# Hyper params:
seed             = 42
batch_size       = 256

# log
save_directory = 'results/bah/'
date = datetime.now().strftime("%Y_%m_%d-%H:%M:%S")
identifier = "bah-" + date

In [5]:
window_size = 1
run_lenght = 10
initial_money = 100
train_test_ratio = 0.2
env = Stocks_env(data, window_size, run_lenght, batch_size=batch_size, train_test_ratio = train_test_ratio,
                 test_seed=seed, initial_money=initial_money)
batch_size  = len(env.get_test_symbols())

In [6]:
def test_env(record_days=False):
    env.reset(training=False, batch_size=batch_size, run_lenght=run_lenght, initial_money=initial_money)
    done = False
    operation_array = []
    days_array = []
    rewards_array = []
    total_profit = np.zeros(batch_size)
    while not done:
        actions = [[1,0]]*batch_size
        _, reward, done, operations, day, profit = env.step(actions)
        if record_days:
            operation_array.append(np.array(operations))
            days_array.append(np.array(day))
            rewards_array.append(np.array(reward))
        mean_test_reward(np.array(reward))
        total_profit += profit
    total_profit = total_profit/initial_money
    return operation_array, days_array, rewards_array, total_profit

In [8]:
save_directory = 'results/test-all/'
test_summary_writer = tf.summary.create_file_writer('results/summaries/test/' + identifier)
mean_test_reward = tf.keras.metrics.Mean(name='mean_test_reward')

repeat = 100

test_total_profits = []

for i in range(repeat):

    operation_array, days_array, rewards_array, test_total_profit = test_env(record_days=True)
    test_total_profits.append(test_total_profit)

    with test_summary_writer.as_default():
        tf.summary.scalar('mean_test_reward', mean_test_reward.result(), step=i)

    # serialize weights to HDF5
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    if not os.path.exists(save_directory+'operations/'):
        os.makedirs(save_directory+'operations/')
    if not os.path.exists(save_directory+'endingdays/'):
        os.makedirs(save_directory+'endingdays/')
    if not os.path.exists(save_directory+'rewards/'):
        os.makedirs(save_directory+'rewards/')
    if not os.path.exists(save_directory+'profits/'):
        os.makedirs(save_directory+'profits/')
    pd.DataFrame(operation_array).to_csv(save_directory+"operations/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(days_array).to_csv(save_directory+"endingdays/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(rewards_array).to_csv(save_directory+"rewards/{}-iteration{}.csv".format(identifier, i), 
                                         header=env.get_current_symbols(), index=None)
    pd.DataFrame(test_total_profits).to_csv(save_directory+"profits/{}.csv".format(identifier),
                                            index=None)
    mean_test_reward.reset_states()
    print("{}: {}".format(i, np.mean(test_total_profits)/run_lenght*30))

0: -0.0001492032599007476
1: -0.004973678941175092
2: -0.0015641374246647018
3: 0.00975324996893991
4: 0.0055536838600527836
5: 0.010784616390497902
6: 0.004133763282035133
7: 0.004963154364624164
8: 0.005113413642148282
9: 0.007004089951708279
10: 0.007410930491010144
11: 0.007416573163612946
12: 0.007827362547468618
13: 0.007221430360842185
14: 0.008835309023044136
15: 0.008476553176981799
16: 0.007733371263268878
17: 0.008019615386011454
18: 0.00903606559036222
19: 0.009186144522256216
20: 0.008627522689395945
21: 0.007662848775040835
22: 0.007940598921823463
23: 0.007839522433063848
24: 0.009734756384191056
25: 0.01077975316953718
26: 0.010443399710336193
27: 0.010686514315284841
28: 0.011051406499508079
29: 0.011135541684639438
30: 0.010836673824837346
31: 0.01036345988564249
32: 0.009953497908673198
33: 0.010725576546664237
34: 0.010277095266484078
35: 0.010323046797473097
36: 0.01071649026873508
37: 0.010717941338948728
38: 0.01067478277739096
39: 0.010696601884490637
40: 0.0104

In [13]:
np.var(np.array(test_total_profits)/run_lenght*30)

0.018753545661428575