In [1]:
import torch
import os
import json
from cust_transf import DecisionTransformer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# get directory of model
directory = 'model'
model_name = 'AAPL_model.pt'
model_params = 'AAPL_model_params.json'

# load the model parameters
with open(os.path.join(directory, model_params)) as json_file:
    params = json.load(json_file)

# get the parameters
state_dim = params['state_dim']
act_dim = params['act_dim']
n_blocks = params['n_blocks']
h_dim = params['h_dim']
context_len = params['context_len']
n_heads = params['n_heads']
drop_p = params['drop_p']

model = DecisionTransformer(state_dim, act_dim, n_blocks, h_dim, context_len, n_heads, drop_p).to(device)

# load the model in the directory
model.load_state_dict(torch.load(os.path.join(directory, model_name)))



  from .autonotebook import tqdm as notebook_tqdm


<All keys matched successfully>

In [2]:
import numpy as np
# evaluate the model by running it on the open ai gym environment

# the model has four inputs: norm_state, rtg, timestep, actions and three outputs: return_preds, state_preds, act_preds
# norm_state is the normalized state of the environment which is a tensor of shape (batch_size, seq_len, state_dim)
# rtg is the return to go which is a tensor of shape (batch_size, seq_len)
# timestep is the timestep of the environment which is a tensor of shape (batch_size, seq_len)
# actions is the actions taken by the agent which is a tensor of shape (batch_size, seq_len, act_dim)
# return_preds is the predicted return of the environment which is a tensor of shape (batch_size, seq_len)
# state_preds is the predicted state of the environment which is a tensor of shape (batch_size, seq_len, state_dim)

# the custom environment has one input: actions which is a numpy.ndarray with shape (2,) and four outputs: obs, reward, done, info where obs and reward are numpy.ndarray and done and info are bool and dict respectively

def evaluate_on_env(model, device, context_len, env, rtg_target, rtg_scale, num_eval_ep=1, max_test_ep_len=1000, state_mean=None, state_std=None, render_mode='None'):
    
    eval_batch_size = 1 # required for forward pass

    results = {}
    statistics = {}
    total_reward = 0
    total_steps = 0

    state_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    if state_mean is None:
        state_mean = torch.zeros(state_dim).to(device)
    else:
        state_mean = torch.tensor(state_mean).to(device)
    
    if state_std is None:
        state_std = torch.ones(state_dim).to(device)
    else:
        state_std = torch.tensor(state_std).to(device)

    # same as timesteps used for training the transformer
    timestep = torch.arange(start = 0, end = max_test_ep_len, step = 1)
    timestep = timestep.repeat(eval_batch_size, 1).to(device)
    episode_stats = []
    frames = {}

    # pick a random episode
    rand_ep = np.random.randint(0, num_eval_ep)
    # evaluate
    model.eval()
    with torch.no_grad():
        for i in range(num_eval_ep):

            # zeros place holders
            actions = torch.zeros((eval_batch_size, max_test_ep_len, act_dim), dtype=torch.float32, device=device)
            states = torch.zeros((eval_batch_size, max_test_ep_len, state_dim), dtype=torch.float32, device=device)
            rtg = torch.zeros((eval_batch_size, max_test_ep_len,1), dtype=torch.float32, device=device)

            # initialize environment
            env.reset()
            running_state = env.render(mode=None)
            running_reward = 0
            running_rtg = rtg_target/rtg_scale
            done = False
            for t in range(max_test_ep_len):
                total_steps += 1
                
                # add state in placeholder and normalize
                states[0,t] = torch.tensor(running_state).to(device)
                states[0,t] = (states[0,t] - state_mean)/state_std

                # calculate running rtg and add to placeholder
                running_rtg = running_rtg - (running_reward/rtg_scale)
                rtg[0,t] = running_rtg

                if t < context_len:
                    # run forward pass to get action
                    _, _, act_preds = model.forward(states[:,:t+1], rtg[:,:t+1], timestep[:,:t+1], actions[:,:t+1])
                    act = act_preds[0,t].detach()
                else:
                    # run forward pass to get action
                    _, _, act_preds = model.forward(states[:,t-context_len+1:t+1], rtg[:,t-context_len+1:t+1], timestep[:,t-context_len+1:t+1], actions[:,t-context_len+1:t+1])
                    act = act_preds[0,-1].detach()
                # check every 50 t
                """
                if t % 20 == 0:
                    print('act: ', act)
                    print('state: ', states[0,t])
                    print('state (to model): ', states[:,:t+1])
                    print('act (to model): ', actions[:,:t+1])
                """
                # step in environment using action
                _, running_reward, done, _ = env.step(act.cpu().numpy())
                running_state = env.render(mode=None)

                # add action in placeholder
                actions[0,t] = act
                total_reward += running_reward
                if render_mode == 'Plot' and i == rand_ep:
                    fig,step = env.render(mode='plot')
                    frames[step] = fig
                if done:
                    print('Episode finished after {} timesteps'.format(t+1))
                    break
            if not done:
                print('Episode finished after {} timesteps'.format(max_test_ep_len+1))        
            end_state = env.render(mode=None)
            statistics['end_balance'] = end_state[-6]
            statistics['end_networth'] = end_state[-5]
            statistics['episode'] = i

            # append the stat for this ep to the list
            episode_stats.append(statistics.copy())

    for key, value in statistics.items():
        results['eval/statistics/' + key] = value
    
    # find the max end_balance and end_networth across all episodes
    max_end_balance = max([ep['end_balance'] for ep in episode_stats])
    max_end_networth = max([ep['end_networth'] for ep in episode_stats])

    results['max_end_balance'] = max_end_balance
    results['max_end_networth'] = max_end_networth
    results['eval/avg_reward'] = total_reward/num_eval_ep
    results['eval/avg_steps'] = total_steps/num_eval_ep
    if render_mode == 'Plot':
        results['frames'] = frames

    return results
                


In [3]:
# import helper function for getting stock data
from getstock import get_stock_data_yf_between_with_indicators
# import time library
from datetime import datetime, timedelta
# get stock data with technical indicators
import json
import os

stock_name = 'AAPL'

# period of data to get
period = 365

# start_date in format 'YYYY-MM-DD'
start_date = '2022-01-01'
# calculate end date being x days after start date
start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
end_date_obj = start_date_obj + timedelta(days=period)

end_date = end_date_obj.strftime('%Y-%m-%d')

interval = '1d'
indicators = ['Volume', 'volume_cmf', 'trend_macd', 'momentum_rsi']

stockdata = get_stock_data_yf_between_with_indicators(stock_name, start_date, end_date, interval, indicators)

# create the test environment
from TradingEnvClass import StockTradingEnv

init_balance = 10000
max_step = len(stockdata)-1

env = StockTradingEnv(stockdata, init_balance, max_step, random = True)

[*********************100%***********************]  1 of 1 completed


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)
  logger.warn(


In [4]:
rtg_target = 50
rtg_scale = 0.75

# evaluate the model
results = evaluate_on_env(model, device, context_len, env, rtg_target, rtg_scale, num_eval_ep=3, max_test_ep_len=1000, render_mode='Plot')

Episode finished after 23 timesteps
77
[]
[]
[]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df['Volume'] = dfvolume


ValueError: Boolean array expected for the condition, not float64

In [14]:
print(results)

{'eval/statistics/end_balance': 24457.239860534668, 'eval/statistics/end_networth': 12403.509803771973, 'eval/statistics/episode': 0, 'max_end_balance': 24457.239860534668, 'max_end_networth': 12403.509803771973, 'eval/avg_reward': -13609.314943154017, 'eval/avg_steps': 251.0}
