In [1]:
from agents.dpm_agent import Agent
from trading_env.environment import TradingEnv

In [2]:
import dill
import numpy as np
import tensorflow as tf

In [3]:
yf_file = "./data/archive_data/yf_data.dill"
with open(yf_file,'rb') as dill_file:
    yf_df = dill.load(dill_file)
    

In [4]:
symbols = yf_df['Symbol'].unique()
stocks = []
for symbol in symbols:
    df = yf_df[yf_df['Symbol'] == symbol]
    stocks.append(df.iloc[:,2:6].to_numpy())
stocks = np.array(stocks)

In [5]:
# Split into training, validation, and testing sets

n_total_data = stocks.shape[1]

split = [int(n_total_data*0.80),int(n_total_data*0.90)]
split_data = np.split(stocks,split,axis=1)

training_data = split_data[0]
validation_data = split_data[1]
test_data = split_data[2]

# Save testing set
with open('test_data.dill','wb') as dill_file:
    dill.dump(test_data,dill_file)

In [6]:
training_data.shape

(21, 4227, 4)

In [7]:
# Batch the data
data_len = training_data.shape[1]
div = list(map(int,np.linspace(0,data_len,41)))
batched_training_data = np.split(stocks,div[1:-1],axis=1)


In [8]:
n_stocks = stocks.shape[0]
n_stock_feats = stocks.shape[2]

agent = Agent(n_stocks,n_stock_feats)

In [9]:
env = TradingEnv(batched_training_data[0])
obs = env.reset()
done = False
last_raw_action = tf.zeros((1,n_stocks+1))
while not done:
    raw_action = agent.act(obs,last_raw_action) 
    obs, reward, done, _ = env.step(agent.model.softmax_layer(raw_action))
    last_raw_action = raw_action 
print('Untrained portfolio end value:')
print(env.portfolio_value_hist[-1])


Untrained portfolio end value:
tf.Tensor(957549.0, shape=(), dtype=float32)


In [10]:
batched_training_data[0].shape

(21, 105, 4)

In [11]:
def loss_val(stock_data):
    total_loss = tf.convert_to_tensor(0.0)
    env = TradingEnv(stock_data)
    obs = env.reset()
    done = False
    last_raw_action = tf.zeros((1,n_stocks+1))
    while not done:

        raw_action=agent.act(obs,last_raw_action)
        obs,reward,done,_=env.step(agent.model.softmax_layer(raw_action))
        last_raw_action=raw_action
        total_loss-=reward

    return total_loss, env



In [12]:
import datetime
time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
validation_log_dir = 'logs/grad_tape/'+time+'/valid'
validation_summary_writer = tf.summary.create_file_writer(validation_log_dir)

In [13]:
validation_loss = tf.keras.metrics.Mean('validation_loss', dtype=tf.float32)
EPOCHS = 5

for epoch in range(EPOCHS):

    for batch in batched_training_data:
        # Train on current batch
        with tf.GradientTape() as tape:
            tape.watch(agent.model.trainable_variables)
            loss, env = loss_val(batch)
        grad = tape.gradient(loss,agent.model.trainable_variables)
        grad = [g / tf.reduce_mean(tf.abs(g)) for g in grad]
        agent.opt.apply_gradients(zip(grad,agent.model.trainable_variables))

    # Evaluate performance on validation set:
    loss,env = loss_val(validation_data)
    print(env.portfolio_value_hist[-1])
    validation_loss(loss)
    with validation_summary_writer.as_default():
        tf.summary.scalar('loss',validation_loss.result(),step = epoch)



tf.Tensor(1501730.2, shape=(), dtype=float32)
tf.Tensor(1465503.1, shape=(), dtype=float32)
tf.Tensor(1457917.5, shape=(), dtype=float32)
tf.Tensor(1456738.4, shape=(), dtype=float32)
tf.Tensor(1455627.2, shape=(), dtype=float32)


In [None]:
#%load_ext tensorboard
#%tensorboard  --logdir logs/grad_tape/