In [None]:
from agents.dpm__agent import Agent
from trading_env.environment import TradingEnv

In [None]:
import dill
import numpy as np
import tensorflow as tf

In [None]:
yf_file = "./data/archive_data/yf_data.dill"
with open(yf_file,'rb') as dill_file:
    yf_df = dill.load(dill_file)
    

In [None]:
symbols = yf_df['Symbol'].unique()
stocks = []
for symbol in symbols:
    df = yf_df[yf_df['Symbol'] == symbol]
    stocks.append(df.iloc[:,2:6].to_numpy())
stocks = np.array(stocks)

In [None]:
# Batch the data
div = list(map(int,np.linspace(0,5284,51)))
batched_data = np.split(stocks,div[1:-1],axis=1)

# Segment batches into training batches, 1 validation batch, 1 testing batch 
training_stock_data = batched_data[:-2]
validation_stock_data = batched_data[-2]
test_stock_data = batched_data[-1]



In [None]:
training_stock_data[0].shape

In [None]:
n_stocks = stocks.shape[0]
window = 64
n_stock_feats = stocks.shape[2]

In [None]:
agent = Agent(n_stocks,n_stock_feats)

In [None]:
env = TradingEnv(training_stock_data[0])
obs = env.reset()
done = False
last_raw_action = tf.zeros((1,n_stocks+1))
while not done:
    raw_action = agent.act(obs,last_raw_action) 
    obs, reward, done, _ = env.step(agent.model.softmax_layer(raw_action))
    last_raw_action = raw_action 
print('Untrained portfolio end value:')
print(env.portfolio_value_hist[-1])


In [None]:
def loss_val(stock_data):
    total_loss = tf.convert_to_tensor(0.0)
    env = TradingEnv(stock_data)
    obs = env.reset()
    done = False
    last_raw_action = tf.zeros((1,n_stocks+1))
    while not done:

        raw_action=agent.act(obs,last_raw_action)
        obs,reward,done,_=env.step(agent.model.softmax_layer(raw_action))
        last_raw_action=raw_action
        total_loss-=reward

    return total_loss, env



In [None]:
final_portfolio_values_validation_set = []
final_portfolio_values_training_set = []

for batch in training_stock_data:
    # Train on each batch for several iterations
    end_values_for_current_batch = []
    # Find end value of portfolio before training on the batch
    _,env = loss_val(batch)
    end_values_for_current_batch.append(env.portfolio_value_hist[-1])

    for _ in range(5):
        # Evaluate pretraining performance on validation set and record final value:
        _,env = loss_val(validation_stock_data)
        print(env.portfolio_value_hist[-1])
        final_portfolio_values_validation_set.append(env.portfolio_value_hist[-1])
        # Train on current batch
        with tf.GradientTape() as tape:
            tape.watch(agent.model.trainable_variables)
            loss, env = loss_val(batch)
        # record final portfolio value
        end_values_for_current_batch.append(env.portfolio_value_hist[-1])
        grad = tape.gradient(loss,agent.model.trainable_variables)
        grad = [g / tf.reduce_mean(tf.abs(g)) for g in grad]
        #grad = [tf.clip_by_value(g,-1.5,1.5) for g in grad]
        #for idx in range(len(grad)):
        #    reduced_max = tf.reduce_max(tf.abs(grad[idx]))
        #    grad[idx] = grad[idx]/reduced_max
        agent.opt.apply_gradients(zip(grad,agent.model.trainable_variables))
    
    final_portfolio_values_training_set.append(end_values_for_current_batch)

_, env = loss_val(validation_stock_data)
final_portfolio_values_validation_set.append(env.portfolio_value_hist[-1])

In [None]:
final_portfolio_values_training_set

In [None]:
final_portfolio_values_validation_set

In [None]:
for g in grad:
    print(tf.reduce_mean(tf.abs(g)))

In [None]:
agent.model.softmax_layer(agent.act(obs,tf.zeros((1,n_stocks+1))))

In [None]:
agent.act(obs,tf.zeros((1,n_stocks+1)))/tf.reduce_sum(agent.act(obs,tf.zeros((1,n_stocks+1))))

In [None]:
agent.model.trainable_variables