In [1]:
from agents import DPM_Agent, agent_loss, sampled_agent_reward
from trading_env.environment import TradingEnv

In [2]:
import dill
import random
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
yf_file = "./data/archive_data/yf_data.dill"
with open(yf_file,'rb') as dill_file:
    yf_df = dill.load(dill_file)
    

In [4]:
print('Head of data set:')
yf_df.head()

Head of data set:


Unnamed: 0,Date,Symbol,high,low,open,close,volume,adjclose
0,2000-01-03,AAPL,1.004464,0.907924,0.936384,0.999442,535796800,0.859423
1,2000-01-04,AAPL,0.987723,0.90346,0.966518,0.915179,512377600,0.786965
2,2000-01-05,AAPL,0.987165,0.919643,0.926339,0.928571,778321600,0.798481
3,2000-01-06,AAPL,0.955357,0.848214,0.947545,0.848214,767972800,0.729382
4,2000-01-07,AAPL,0.901786,0.852679,0.861607,0.888393,460734400,0.763932


In [5]:
print('Tail of data set:')
yf_df.tail()

Tail of data set:


Unnamed: 0,Date,Symbol,high,low,open,close,volume,adjclose
190219,2020-12-24,WMT,144.389999,143.179993,143.539993,143.5,3018200,142.344345
190220,2020-12-28,WMT,145.300003,143.330002,144.039993,145.220001,6448300,144.050491
190221,2020-12-29,WMT,145.850006,143.589996,145.639999,144.300003,5979400,143.137909
190222,2020-12-30,WMT,145.149994,143.940002,144.880005,144.179993,6250400,143.01886
190223,2020-12-31,WMT,144.270004,142.850006,144.199997,144.149994,5938000,142.98909


Each line of the dataframe contains the values of a single symbol for a single day. 

In [6]:
# Generate array of symbols in the yf dataframe
symbols = yf_df['Symbol'].unique()

# Create list of dataframes for each symbol


## Split data into training, validation, and test sets

In [7]:
# Generate array of symbols in the yf dataframe
symbols = yf_df['Symbol'].unique()

# Create date masks to partition data 
training_start = '2000-1-1'
training_end = '2018-12-31'
training_mask = (yf_df['Date'] >= training_start) & (yf_df['Date'] <= training_end) 
validation_start = '2019-1-1'
validation_end = '2019-12-31'
validation_mask = (yf_df['Date'] >= validation_start) & (yf_df['Date'] <= validation_end) 
test_start = '2020-1-1'
test_end = '2020-12-31'
test_mask = (yf_df['Date'] >= test_start) & (yf_df['Date'] <= test_end)

# Select stock data for each symbol,
# partition into training, validation, or test set
training_data = [yf_df[(yf_df['Symbol'] == symbol) & training_mask] for symbol in symbols]
validation_data = [yf_df[(yf_df['Symbol'] == symbol) & validation_mask] for symbol in symbols]
test_data = [yf_df[(yf_df['Symbol'] == symbol) & test_mask] for symbol in symbols]
  
validation_dates = validation_data[0]['Date']
test_dates = test_data[0]['Date']

# Select only high, low, open, and close prices for each stock
training_data = np.array([df.iloc[:,2:6].to_numpy() for df in training_data])
validation_data = np.array([df.iloc[:,2:6].to_numpy() for df in validation_data])
test_data = np.array([df.iloc[:,2:6].to_numpy() for df in test_data])

# Save the test data for post traning
with open('test_data.dill','wb') as dill_file:
    dill.dump({'dates':test_dates,'data':test_data},dill_file)

## Batch the data

The training data covers 19 years of trading -- begining of 2000 to the end of 2018. We will partition the trading data into 40 batches. Note there are approximately 253 trading days per year, so there are approximately 253*19/40 ~ 120 trading days per batch. The batches provide data for each training episode for the agent. The agent does not trade over the first 64 days (the agent uses intial data to inform trading) so each training episode contains ~ 56 steps.

In [8]:
# Batch the data
n_batches = 40
data_len = training_data.shape[1]
div = list(map(int,np.linspace(0,data_len,n_batches+1)))
batched_training_data = np.split(training_data,div[1:-1],axis=1)


In [9]:
# Create environment
validation_env = TradingEnv(validation_data) 
start_idx = validation_env._start_tick
n_stocks = validation_env.n_stocks
# Create DPM agent
agent = DPM_Agent()

## Untrained agent

Without training the agent is selecting a portfolio at random (actually the randomly initialized parameters are selecting a portfolio). We will view its performance on the validation set.

In [10]:
SAMPLES = 100
STOCKS = 10

sar = sampled_agent_reward(TradingEnv,validation_data,[agent],STOCKS,SAMPLES)
plt.hist(sar[agent.name])
plt.title("Sample agent returns without training.")
plt.show()

m = np.mean(sar[agent.name])
s = np.std(sar[agent.name])
L = m - 1.96 * (s / np.sqrt(SAMPLES)) 
H = m + 1.96 * (s / np.sqrt(SAMPLES)) 

print(f"Mean return: {m:.4f}.")
print(f"Standard deviation: {m:.4f}.")
print(f"95% confidence interval: [{L:.4f},{H:.4f}].")

In [None]:
# Simulate untrained agent to view baseline
loss = agent_loss(validation_env,agent)
plt.figure(figsize=(8,8))
plt.plot(validation_dates[start_idx:], validation_env.portfolio_value_hist)
plt.xticks(rotation=45)
plt.title('Agent performance on validation set without training.')
plt.show()
portfolio_vals = validation_env.portfolio_value_hist
print('Initial value of portfolio: ',portfolio_vals[0].numpy())
print('Final value of portfolio: ', portfolio_vals[-1].numpy())
print('Maximum value of portfolio: ',max(portfolio_vals).numpy())
print('Minimum value of portfolio: ',min(portfolio_vals).numpy())

## Agent training

In [None]:
TRADING_DAYS_PER_YEAR = 253

#@tf.function
def train_step(agent,batch,training_reward):
    """Runs a model training step."""
    env = TradingEnv(batch)
 
    with tf.GradientTape() as tape:
        tape.watch(agent.model.trainable_variables)
        loss = agent_loss(env,agent,dsct = 0.99)

        grad = tape.gradient(loss,agent.model.trainable_variables)
        grad = [g / tf.reduce_mean(tf.abs(g)) for g in grad]
        agent.opt.apply_gradients(zip(grad,agent.model.trainable_variables))
    return loss


In [None]:
def Callback_EarlyStopping(RewardsList, min_delta=0.01, patience=30):
    
    if RewardsList[-1] != RewardsList[-1]:
        print('NAN error')
        return True   


    #No early stopping for 3*patience epochs 
    if len(RewardsList)//patience < 3 :
        return False
    #Mean loss for last patience epochs and second-last patience epochs
    mean_previous = np.mean(RewardsList[::-1][2*patience:3*patience]) #third-last
    mean_recent = np.mean(RewardsList[::-1][:patience]) #last
    #you can use relative or absolute change
    delta = mean_recent - mean_previous # change
    percent_delta = delta / mean_previous  # relative change
    if percent_delta < min_delta : 
        print(f"*CB_ES* Percent change in reward value: {percent_delta*1e2:.4f}")
        return True 
    else:
        return False

In [None]:
train_avg_rewards = []
val_avg_rewards = []
MAX_EPOCHS = 150
n_substocks = 10
for epoch in range(MAX_EPOCHS):
    training_rewards = []
    for batch in batched_training_data:
        subsample = random.sample(list(range(n_stocks)),n_substocks)
        sub_batch = batch[subsample,:,:]

        #loss = train_step(agent,sub_batch,training_rewards)

        env = TradingEnv(batch,train_noise=0.5)
 
        with tf.GradientTape() as tape:
            tape.watch(agent.model.trainable_variables)
            loss = agent_loss(env,agent,dsct = 0.999)

            grad = tape.gradient(loss,agent.model.trainable_variables)
            grad = [g / tf.reduce_mean(tf.abs(g)) for g in grad]
            agent.opt.apply_gradients(zip(grad,agent.model.trainable_variables))
        
        training_rewards.append(-TRADING_DAYS_PER_YEAR * loss/(env._end_tick-env._start_tick))
    
    train_avg_rewards.append(np.mean(training_rewards))

    sar = sampled_agent_reward(TradingEnv,validation_data,[agent],10,20)
    val_avg_rewards.append(np.mean(sar[agent.name]))
    


    if Callback_EarlyStopping(val_avg_rewards):
        break


In the above code, note that a random subset of stocks are selected each epoch for training and validation. The purpose for this is to temper the possibility of overfitting to a single overperforming or underperforming stock. However, as a result the output performance over epochs is noisy. 

Due to the noise we will plot a rolling average of the training and validation rewards.

In [None]:
df = pd.DataFrame({'training rewards':train_avg_rewards,'validation rewards':val_avg_rewards})

sns.lineplot( data = df.rolling(10, min_periods=1).mean())
plt.legend(loc='upper right',bbox_to_anchor=(1.5, 1) )
plt.title("Training and validation returns per epoch")
plt.show()

In [None]:
SAMPLES = 100
STOCKS = 10

sar = sampled_agent_reward(TradingEnv,validation_data,[agent],STOCKS,SAMPLES)
plt.hist(sar[agent.name])
plt.title("Sample agent returns after training.")
plt.show()

m = np.mean(sar[agent.name])
s = np.std(sar[agent.name])
L = m - 1.96 * (s / np.sqrt(SAMPLES)) 
H = m + 1.96 * (s / np.sqrt(SAMPLES)) 

print(f"Mean return: {m:.4f}.")
print(f"Standard deviation: {m:.4f}.")
print(f"95% confidence interval: [{L:.4f},{H:.4f}].")

In [None]:
# Simulate trained agent to view behavior after training
loss = agent_loss(validation_env,agent)
plt.figure(figsize=(8,8))
plt.plot(validation_dates[start_idx:], validation_env.portfolio_value_hist)
plt.xticks(rotation=45)
plt.title(f'Agent performance on validation set after {epoch} training epochs.')
plt.show()
print('Initial value of portfolio: ',validation_env.portfolio_value_hist[0].numpy())
print('Final value of portfolio: ',validation_env.portfolio_value_hist[-1].numpy())
print('Maximum value of portfolio: ',max(validation_env.portfolio_value_hist).numpy())
print('Minimum value of portfolio: ',min(validation_env.portfolio_value_hist).numpy())

In [None]:
with open('dpm_agent.dill','wb') as dill_file:
    dill.dump(agent, dill_file)