In [1]:
from agents.dpm__agent import Agent
from trading_env.environment import TradingEnv

In [2]:
import dill
import numpy as np
import tensorflow as tf

In [3]:
yf_file = "./data/archive_data/yf_data.dill"
with open(yf_file,'rb') as dill_file:
    yf_df = dill.load(dill_file)
    

In [4]:
symbols = yf_df['Symbol'].unique()
stocks = []
for symbol in symbols:
    df = yf_df[yf_df['Symbol'] == symbol]
    stocks.append(df.iloc[:,2:6].to_numpy())
stocks = np.array(stocks)

In [5]:
# Batch the data
div = list(map(int,np.linspace(0,5284,11)))
batched_data = np.split(stocks,div[1:-1],axis=1)

# Segment batches into training batches, 1 validation batch, 1 testing batch 
training_stock_data = batched_data[:-2]
validation_stock_data = batched_data[-2]
test_stock_data = batched_data[-1]



In [6]:
n_stocks = stocks.shape[0]
window = 64
n_stock_feats = stocks.shape[2]

In [7]:
agent = Agent(n_stocks,n_stock_feats)

In [8]:
env = TradingEnv(training_stock_data[0])
obs = env.reset()
done = False
last_action = tf.zeros((1,n_stocks+1))
while not done:
    action = agent.act(obs,last_action) 
    obs, reward, done, _ = env.step(action)
    last_action = action 
print('Untrained portfolio end value:')
print(env.portfolio_value_hist[-1])


Untrained portfolio end value:
tf.Tensor(1024432.06, shape=(), dtype=float32)


In [9]:
def loss_val(stock_data):
    total_loss = tf.convert_to_tensor(0.0)
    env = TradingEnv(stock_data)
    obs = env.reset()
    done = False
    last_action = tf.zeros((1,n_stocks+1))
    while not done:

        action=agent.act(obs,last_action)
        obs,reward,done,_=env.step(action)
        last_action=action
        total_loss-=reward

    return total_loss, env



In [10]:
final_portfolio_values_validation_set = []
final_portfolio_values_training_set = []

for batch in training_stock_data:
    # Evaluate pretraining performance on validation set and record final value:
    _,env = loss_val(validation_stock_data)
    final_portfolio_values_validation_set.append(env.portfolio_value_hist[-1])
    # Train on current batch
    with tf.GradientTape() as tape:
        tape.watch(agent.model.trainable_variables)
        loss, env = loss_val(batch)
    # record final portfolio value
    final_portfolio_values_training_set.append(env.portfolio_value_hist[-1])
    grad = tape.gradient(loss,agent.model.trainable_variables)
    agent.opt.apply_gradients(zip(grad,agent.model.trainable_variables))

_, env = loss_val(validation_stock_data)
final_portfolio_values_validation_set.append(env.portfolio_value_hist[-1])

In [11]:
final_portfolio_values_training_set

[<tf.Tensor: shape=(), dtype=float32, numpy=1024432.06>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1134769.6>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1375342.4>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1704820.1>,
 <tf.Tensor: shape=(), dtype=float32, numpy=941240.94>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1401714.9>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1264007.9>,
 <tf.Tensor: shape=(), dtype=float32, numpy=2517363.0>]

In [12]:
final_portfolio_values_validation_set

[<tf.Tensor: shape=(), dtype=float32, numpy=1147479.4>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1137141.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1171535.6>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1274916.4>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1368940.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1446460.6>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1514675.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1475885.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1546953.6>]

In [13]:
grad

[<tf.Tensor: shape=(1, 3, 4, 2), dtype=float32, numpy=
 array([[[[-0.05841961, -0.06230804],
          [-0.05359459, -0.05709395],
          [-0.05522793, -0.05856491],
          [-0.05576876, -0.05954437]],
 
         [[-0.06019116, -0.06434103],
          [-0.05514999, -0.05921707],
          [-0.05670406, -0.06068263],
          [-0.05719268, -0.06119261]],
 
         [[-0.06146988, -0.06585895],
          [-0.05739947, -0.06123728],
          [-0.05798244, -0.0621325 ],
          [-0.05998799, -0.06377127]]]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([-6.5459983e-07, -1.5492532e-07], dtype=float32)>,
 <tf.Tensor: shape=(1, 62, 2, 20), dtype=float32, numpy=
 array([[[[ 0.        ,  0.        ,  0.        , ..., -0.00145845,
            0.        ,  0.        ],
          [ 0.        ,  0.        ,  0.        , ..., -0.00271013,
            0.        ,  0.        ]],
 
         [[ 0.        ,  0.        ,  0.        , ..., -0.00096602,
            0.      