### Algorithmic Trading with Deep Reinforcement Learning and Brazilian Stocks 

In [1]:
# General Imports
import math
import pandas as pd
import numpy as np
import quantstats as qs
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Ray Imports
import ray
from ray import tune
from ray.tune.registry import register_env
import ray.rllib.agents.ppo as ppo

# TensorTrade Imports
import tensortrade.env.default as default
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.exchanges import Exchange,ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.wallets import Portfolio

---

### Ambiente de Treino


Para utilizar o ambiente personalizado devemos escrever uma função (e.g. `create_training_env(config)`) para criar uma instância de um `TradingEnv` a partir de um dicionário de configurações.



In [2]:
ticker: str = 'PETR4'
use_lstm: bool = True
use_sentiments: bool = True

In [3]:
def create_training_env(config):
    """
    Creates a Training Trading Environment.
    """
    
    # 1. Data and TensorTrade Objects (Exchange, Instruments and Portfolio)
    dataset = pd.read_csv(f'../../data/contextual_data_market_and_text/daily/train/{ticker}_train.csv')

    if not use_sentiments:
      try:
        dataset.drop(['sent_score_news', 'sent_score_twitter'], axis=1, inplace=True)
      except:
        dataset = dataset
    
    # Price Series
    price = Stream.source(list(dataset["close"]), dtype="float").rename("BRL-ASSET")
    
    # Exchange
    b3_commission = 0.0035
    b3_options = ExchangeOptions(commission=b3_commission, is_live=False)
    b3_exchange = Exchange(name="B3", 
                           service=execute_order, 
                           options=b3_options)(price)
    
    # Instruments
    BRL = Instrument(symbol="BRL", precision=2, name="Brazilian Currency (Real)")
    ASSET = Instrument(symbol="ASSET", precision=2, name="Stock")

    # Portfolio
    cash = Wallet(exchange=b3_exchange, balance=100000 * BRL) # Money
    asset = Wallet(exchange=b3_exchange, balance=0 * ASSET) # Stock/Asset
    
    portfolio = Portfolio(base_instrument=BRL, wallets=[cash, asset])
    
    features = []
    for c in dataset.columns[1:]:
        s = Stream.source(list(dataset[c]), dtype="float").rename(dataset[c].name)
        features += [s]
    print (features)
    feed = DataFeed(features)
    feed.compile()
    
    # 2. Rewards

    # Risk Adjusted Returns: A reward scheme that rewards the agent for increasing 
    # its net worth, while penalizing more volatile strategies.
    
    reward_scheme = default.rewards.RiskAdjustedReturns(
        return_algorithm='sharpe',
        risk_free_rate=0.000429, #  ~11% ano (média selic 2022, até julho)
        window_size=config["reward_window_size"]
    )
    
    # Simple Profit: A simple reward scheme that rewards the agent 
    # for incremental increases in net worth.

    # reward_scheme = default.rewards.SimpleProfit(
    #     window_size=config["reward_window_size"] # The size of the look back window for computing the reward.
    # )
    
    # 3. Actions

    # Managed Risk Orders: A discrete action scheme that determines actions 
    # based on managing risk, through setting a follow-up stop loss and take profit 
    # on every order.

    action_scheme = default.actions.ManagedRiskOrders(
        stop=[0.075], # A list of possible stop loss percentages for each order.
        take=[0.10], # A list of possible take profit percentages for each order
        min_order_pct=1 # The minimum value when placing an order, calculated in percent over net_worth.
    )

    # 4. Visualization

    renderer_feed = DataFeed([
        Stream.source(list(dataset['date'])).rename("date"),
        Stream.source(list(dataset["open"]), dtype="float").rename("open"),
        Stream.source(list(dataset["high"]), dtype="float").rename("high"),
        Stream.source(list(dataset["low"]), dtype="float").rename("low"),
        Stream.source(list(dataset["close"]), dtype="float").rename("close"),
        Stream.source(list(dataset["volume"]), dtype="float").rename("volume")
    ])

    # 5. Environment

    # Creates the default `TradingEnv` of the project to be used in training
    # RL agents.

    environment = default.create(
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        feed=feed,
        window_size=config["window_size"],
        renderer_feed=renderer_feed,
        renderer=default.renderers.PlotlyTradingChart(display=True, auto_open_html=False, save_format="png"),
        max_allowed_loss=config["max_allowed_loss"]
    )
    
    return environment

ray.shutdown()
ray.init(
    num_cpus=2, 
    num_gpus=0,
    _memory=2000 * 1024 * 1024,
    object_store_memory=200 * 1024 * 1024,
    _driver_object_store_memory=100 * 1024 * 1024
)

register_env("TrainingTradingEnv", create_training_env)

2022-08-03 22:40:51,336	INFO services.py:1338 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


---

Com o ambiente de negociação registrado, podemos executar o algortimo (treinar o algoritmo) utilizando o algoritmo PPO (Proximal Policy Optimization) implementado pela Rllib.

In [4]:
window_size: int = 10

env_config_training = {
    # We want to look at the last N samples (days or hours)
    "window_size": window_size, # hours * days
    # And calculate reward based on the actions taken in the next 7 hours
    "reward_window_size": int(math.ceil(window_size / 2)),
    # If it goes past 50% loss during the iteration, we don't want to waste time on a "loser".
    "max_allowed_loss": 0.5
}

# Tuning parameters (Grid Search)

# FC_SIZE = tune.grid_search([[256, 256], [128, 64, 32]])
# LSTM_SIZE = tune.grid_search([64, 256])
# LEARNING_RATE = tune.grid_search([0.001, 0.00001])
# GAMMA = tune.grid_search([0.25, 0.50, 0.75])

# Fixed parameters
FC_SIZE = [512, 256]
LSTM_SIZE = 256
LEARNING_RATE = 0.00001
GAMMA = 0.5

nn_model_config = {}

# FullyConnectedNetwork
nn_model_config = {
    "fcnet_hiddens": FC_SIZE,
    "fcnet_activation": "relu"
}

# LSTM
if use_lstm:
  nn_model_config.update({
      "use_lstm": True,
      "lstm_cell_size": LSTM_SIZE
  })

In [5]:
# RLLib Models Reference:
# https://docs.ray.io/en/latest/rllib/rllib-models.html?highlight=MODEL_DEFAULTS#default-model-config-settings

PPO_config = {
    "env": "TrainingTradingEnv",
    "env_config": env_config_training,
    "log_level": "WARNING",
    "framework": "tf2",
    "eager_tracing": False,
    "ignore_worker_failures": True,
    "num_workers": 1,
    "num_envs_per_worker": 1,
    "num_gpus": 0,
    "clip_rewards": True,
    "lr": LEARNING_RATE,
    "lr_schedule": [
        [0, 1e-1],
        [int(1e2), 1e-2],
        [int(1e3), 1e-3],
        [int(1e4), 1e-4],
        [int(1e5), 1e-5],
        [int(1e6), 1e-6],
        [int(1e7), 1e-7]
    ],
    "gamma": GAMMA,
    "observation_filter": "MeanStdFilter",
    "model": nn_model_config,
    "lambda": 0.72,
    "vf_loss_coeff": 0.5,
    "entropy_coeff": 0.01
}

In [6]:
analysis = tune.run(
    run_or_experiment="PPO",
    name="MyExperiment",
    metric="episode_reward_mean",
    mode="max",
    stop={
      "training_iteration": 5
    },
    config=PPO_config,
    checkpoint_at_end=True,
    checkpoint_freq=1
)

2022-08-03 22:41:01,165	ERROR syncer.py:111 -- Log sync requires rsync to be installed.
 pid=40496)[0m 2022-08-03 22:41:06,579	INFO trainer.py:712 -- Executing eagerly (framework='tf2'), with eager_tracing=False. For production workloads, make sure to set `eager_tracing=True` in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.


 pid=43856)[0m [<tensortrade.feed.core.base.IterableStream object at 0x00000192845F6490>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F65E0>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6730>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6790>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F68E0>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6A30>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6B80>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6CD0>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6E20>, <tensortrade.feed.core.base.IterableStream object at 0x00000192845F6F70>, <tensortrade.feed.core.base.IterableStream object at 0x0000019284649100>, <tensortrade.feed.core.base.IterableStream object at 0x0000019284649250>, <tensortrade.feed.core.base.IterableStream object at 0x00000192846493A0>, <tensortrade.feed.cor

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:17,699	INFO trainable.py:124 -- Trainable.setup took 11.823 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:23,846	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=40496)[0m Traceback (most recent call last):
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=40496)[0m     result = self.step_attempt()
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=40496)[0m     return method(self, *_args, **_kwargs)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=40496)[0m     step_results = next(self.train_exec_impl)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=40496)[0m     return next(self.built_iterator)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in appl

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:31,530	ERROR worker.py:84 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=43856, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000019281D435B0>)
 pid=40496)[0m   File "python\ray\_raylet.pyx", line 585, in ray._raylet.execute_task
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\memory_monitor.py", line 156, in raise_if_low_memory
 pid=40496)[0m     raise RayOutOfMemoryError(
 pid=40496)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-625611C is used (15.6 / 15.86 GB). The top 10 memory consumers are:
 pid=40496)[0m 
 pid=40496)[0m PID	MEM	COMMAND
 pid=40496)[0m 40496	3.72GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\w
 pid=40496)[0m 43856	3.39GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\math

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496




Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:40,179	ERROR worker.py:84 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=43856, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000019281D435B0>)
 pid=40496)[0m   File "python\ray\_raylet.pyx", line 585, in ray._raylet.execute_task
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\memory_monitor.py", line 156, in raise_if_low_memory
 pid=40496)[0m     raise RayOutOfMemoryError(
 pid=40496)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-625611C is used (15.5 / 15.86 GB). The top 10 memory consumers are:
 pid=40496)[0m 
 pid=40496)[0m PID	MEM	COMMAND
 pid=40496)[0m 40496	3.66GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\w
 pid=40496)[0m 43856	3.37GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\math

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:46,723	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=40496)[0m Traceback (most recent call last):
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=40496)[0m     result = self.step_attempt()
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=40496)[0m     return method(self, *_args, **_kwargs)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=40496)[0m     step_results = next(self.train_exec_impl)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=40496)[0m     return next(self.built_iterator)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in appl

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:52,601	ERROR worker.py:84 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=43856, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000019281D435B0>)
 pid=40496)[0m   File "python\ray\_raylet.pyx", line 585, in ray._raylet.execute_task
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\memory_monitor.py", line 156, in raise_if_low_memory
 pid=40496)[0m     raise RayOutOfMemoryError(
 pid=40496)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-625611C is used (15.67 / 15.86 GB). The top 10 memory consumers are:
 pid=40496)[0m 
 pid=40496)[0m PID	MEM	COMMAND
 pid=40496)[0m 40496	3.67GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\w
 pid=40496)[0m 43856	3.35GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mat

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


 pid=40496)[0m 2022-08-03 22:41:59,613	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=40496)[0m Traceback (most recent call last):
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=40496)[0m     result = self.step_attempt()
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=40496)[0m     return method(self, *_args, **_kwargs)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=40496)[0m     step_results = next(self.train_exec_impl)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=40496)[0m     return next(self.built_iterator)
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in appl

Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,RUNNING,127.0.0.1:40496


2022-08-03 22:42:06,204	ERROR trial_runner.py:958 -- Trial PPO_TrainingTradingEnv_7ea96_00000: Error processing event.
Traceback (most recent call last):
  File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\tune\trial_runner.py", line 924, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\tune\ray_trial_executor.py", line 787, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\worker.py", line 1713, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::PPO.train()[39m (pid=40496, ip=127.0.0.1, repr=PPO)
  File "python\ray\_raylet.pyx", line 625, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line

Result for PPO_TrainingTradingEnv_7ea96_00000:
  date: 2022-08-03_22-41-17
  experiment_id: 927e64fe972d4e70bd078fecd0c84613
  hostname: DESKTOP-625611C
  node_ip: 127.0.0.1
  pid: 40496
  timestamp: 1659577277
  trial_id: 7ea96_00000
  


Trial name,status,loc
PPO_TrainingTradingEnv_7ea96_00000,ERROR,127.0.0.1:40496

Trial name,# failures,error file
PPO_TrainingTradingEnv_7ea96_00000,1,C:\Users\mathe\ray_results\MyExperiment\PPO_TrainingTradingEnv_7ea96_00000_0_2022-08-03_22-41-01\error.txt


 pid=40496)[0m 2022-08-03 22:42:07,237	ERROR worker.py:84 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=43856, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000019281D435B0>)
 pid=40496)[0m   File "python\ray\_raylet.pyx", line 585, in ray._raylet.execute_task
 pid=40496)[0m   File "c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\memory_monitor.py", line 156, in raise_if_low_memory
 pid=40496)[0m     raise RayOutOfMemoryError(
 pid=40496)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-625611C is used (15.63 / 15.86 GB). The top 10 memory consumers are:
 pid=40496)[0m 
 pid=40496)[0m PID	MEM	COMMAND
 pid=40496)[0m 43856	3.62GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\w
 pid=40496)[0m 40496	3.53GiB	c:\Users\mathe\anaconda3\envs\tf\python.exe c:\Users\mat

TuneError: ('Trials did not complete', [PPO_TrainingTradingEnv_7ea96_00000])

In [None]:
dfs = analysis.trial_dataframes

ax = None
for d in dfs.values():
    ax = d.episode_reward_mean.plot(ax=ax, legend=False).set_title('Episode Reward Mean (Per Iteration)')

---

After training is complete, we would now like to get access to the agents policy. We can do that by restoring the agent using the following code.

In [None]:
# Get checkpoint
checkpoint_metric = "episode_reward_mean"

checkpoints = analysis.get_trial_checkpoints_paths(
    trial=analysis.get_best_trial(checkpoint_metric, mode="max"),
    metric=checkpoint_metric   
)

checkpoint_path = checkpoints[0][0]
print (checkpoint_path)

config = analysis.get_best_config(checkpoint_metric, mode='max')
config


In [None]:
# Restore agent
agent = ppo.PPOTrainer(
    env="TrainingTradingEnv",
    config=PPO_config
)

agent.restore(checkpoint_path)

In [None]:
# See how the model is wrapped by LSTM
agent.get_policy().model

---

In [None]:
# Restore agent
agent.restore(checkpoint_path)

# Instantiate the environment
env = create_training_env(env_config_training)

# Run until episode ends
episode_reward = 0
done = False
obs = env.reset()

# Initialize hidden_state variable that will correspond to lstm_cell_size
lstm_cell_size = PPO_config['model']['lstm_cell_size']
hidden_state = [np.zeros(lstm_cell_size), np.zeros(lstm_cell_size)]

while not done:
    action, hidden_state, _ = agent.compute_single_action(obs,
                                                          state=hidden_state,
                                                          full_fetch=True)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

env.render()

---

### Validation Set

In [None]:
def create_eval_env(config):
    """
    Creates the Evaluation Environment.
    """
    
    dataset = pd.read_csv(f'../../data/contextual_data_market_and_text/daily/test/{ticker}_test.csv')

    if not use_sentiments:
      try:
        dataset.drop(['sent_score_news', 'sent_score_twitter'], axis=1, inplace=True)
      except:
        dataset = dataset

    # Price Series
    price = Stream.source(list(dataset["close"]), dtype="float").rename("BRL-ASSET")
    
    b3_commission = 0.0035
    b3_options = ExchangeOptions(commission=b3_commission)
    b3_exchange = Exchange("B3", service=execute_order, options=b3_options)(price)
    
    # Instruments
    initial_amount = 100000
    BRL = Instrument("BRL", 2, "Brazilian Currency (Real)")
    ASSET = Instrument("ASSET", 2, "Stock")

    # Portfolio
    cash = Wallet(b3_exchange, initial_amount * BRL) # Money
    asset = Wallet(b3_exchange, 0 * ASSET) # Stocks/Assets
    
    portfolio = Portfolio(BRL, [cash, asset])
    
    features = []
    for c in dataset.columns[1:]:
        s = Stream.source(list(dataset[c]), dtype="float").rename(dataset[c].name)
        features += [s]
    print (features)
    feed = DataFeed(features)
    feed.compile()
    
    # Rewards - Risk Adjusted Returns
    reward_scheme = default.rewards.RiskAdjustedReturns(
        return_algorithm='sharpe',
        risk_free_rate=0.000429, #  ~11% ano (média selic 2022, até julho)
        window_size=config["reward_window_size"]
    )

    # Rewards - Simple Profit
    # A simple reward scheme that rewards the agent for 
    # incremental increases in net worth.
    # reward_scheme = default.rewards.SimpleProfit(
    #     # The size of the look back window for computing the reward
    #     window_size=config['reward_window_size']
    # )
    
    # Actions
    action_scheme = default.actions.ManagedRiskOrders(
        stop=[0.15],
        take=[0.10],
        min_order_pct=1
    )

    # A discrete action scheme that determines actions based on managing risk
    # action_scheme = default.actions.ManagedRiskOrders()
    
    # Visualization
    renderer_feed = DataFeed([
        Stream.source(list(dataset['date'])).rename("date"),
        Stream.source(list(dataset["open"]), dtype="float").rename("open"),
        Stream.source(list(dataset["high"]), dtype="float").rename("high"),
        Stream.source(list(dataset["low"]), dtype="float").rename("low"),
        Stream.source(list(dataset["close"]), dtype="float").rename("close"),
        Stream.source(list(dataset["volume"]), dtype="float").rename("volume")
    ])

    environment = default.create(
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        feed=feed,
        renderer_feed=renderer_feed,
        renderer=default.renderers.PlotlyTradingChart(display=True, 
                                                      auto_open_html=False, 
                                                      save_format="png"),
        window_size=config["window_size"],
        max_allowed_loss=config["max_allowed_loss"]
    )
    
    return environment, portfolio

In [None]:
env_config_evaluation = {
    # We want to look at the last N samples (days or hours)
    "window_size": window_size, # hours * days
    # And calculate reward based on the actions taken in the next N/2 samples (days or hours)
    "reward_window_size": int(math.ceil(window_size / 2)),
    # The maximum percentage of initial funds that is willing to
    # be lost before stopping the episode.
    "max_allowed_loss": 0.1,
}

render_test_env: bool = False
df_net_worths: pd.DataFrame = pd.DataFrame()
envs = list()
portfolios = list()

for run in range(1, 101):
  # 1. Instantiate the evaluation environment
  env_test, portfolio = create_eval_env(env_config_evaluation)

  # 2. Run until episode ends
  done = False
  episode_reward = 0
  obs = env_test.reset()

  # 3. Initialize hidden_state variable that will correspond to lstm_cell_size
  lstm_cell_size = PPO_config['model']['lstm_cell_size']
  hidden_state = [np.zeros(lstm_cell_size), np.zeros(lstm_cell_size)]

  while not done:
      action, hidden_state, _ = agent.compute_single_action(obs, state=hidden_state, full_fetch=True)
      obs, reward, done, info = env_test.step(action)
      episode_reward += reward

  # Final Performance Data
  df = pd.DataFrame(portfolio.performance)

  # Get Final Net Worth over Time
  df_net_worths[f'Trial_{run}'] = df.T.net_worth.fillna(method='ffill')

  # Get Final Portfolio Data
  portfolios.append(portfolio)

  # Get Final Env Data
  envs.append(env_test)

  if render_test_env:   
    env_test.render()

In [None]:
fig, ax = plt.subplots(figsize=(30, 20))
df_net_worths.plot(ax=ax)
ax.axhline(100000, color='black', linestyle='dashed')
plt.show();

In [None]:
import seaborn as sns
ax = sns.boxplot(x=df_net_worths.tail(1).values)

In [None]:
over_initial_amount = (df_net_worths.tail(1) > 100000).sum().sum()
over_initial_amount

In [None]:
perc_trial_over_initial_amount = over_initial_amount / df_net_worths.shape[1] * 100
perc_trial_over_initial_amount

In [None]:
best_trial = int(df_net_worths.tail(1).idxmax(axis=1).values[0].replace('Trial_', ''))
best_trial

In [None]:
# Show trades of the best_trial
envs[best_trial - 1].render()

In [None]:
ledger = portfolios[best_trial - 1].ledger.as_frame()
ledger

In [None]:
df = pd.DataFrame(portfolios[best_trial - 1].performance)

In [None]:
df.T.plot(figsize=(20,6))

In [None]:
df.loc["B3:/BRL-ASSET"].plot(figsize=(20,6))

In [None]:
df.loc["B3:/ASSET:/worth"].plot(figsize=(20,6))

---

### Net Worth Analysis (with Quantstats)

In [None]:
# extend pandas functionality with metrics, etc.
qs.extend_pandas()

In [None]:
dataset = pd.read_csv(f'../../data/contextual_data_market_and_text/daily/test/{ticker}_test.csv')

net_worth = df.loc["net_worth"].rename('close')
net_worth.index = dataset['date'].loc[:]
net_worth.index = pd.to_datetime(net_worth.index)

net_worth = net_worth.resample('D').last()
net_worth.index = net_worth.index.date
net_worth.index = net_worth.index.rename('date')

In [None]:
net_returns = net_worth.pct_change().dropna()
net_returns.index = pd.to_datetime(net_returns.index)
net_returns

In [None]:
# show sharpe ratio
qs.stats.sharpe(net_returns)

In [None]:
# Show overall performance
qs.plots.snapshot(net_returns, title=f'Evaluation Set Performance')

In [None]:
# Convert datetime to date (keep end of day result)
# qs.reports.html(net_returns, "^BVSP")

---