In [1]:
import ray
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from datetime import datetime, timedelta

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.exchanges import Exchange,ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.env.default.rewards import TensorTradeRewardScheme
from tensortrade.feed.core import Stream, DataFeed

from gym.spaces import Discrete
from tensortrade.env.default.actions import TensorTradeActionScheme
from tensortrade.env.generic import ActionScheme, TradingEnv
from tensortrade.core import Clock
from tensortrade.oms.instruments import ExchangePair, Instrument
from tensortrade.oms.wallets import Portfolio
from tensortrade.oms.orders import (
    Order,
    proportion_order,
    TradeSide,
    TradeType
)

import matplotlib.pyplot as plt

from tensortrade.env.generic import Renderer

import ray.rllib.agents.ppo as ppo

In [2]:
class BSH(TensorTradeActionScheme):
    """The ActionScheme interprets and applies the agent’s actions to the environment."""

    registered_name = "bsh"

    def __init__(self, cash: 'Wallet', asset: 'Wallet'):
        super().__init__()
        self.cash = cash
        self.asset = asset

        self.listeners = []
        self.action = 0

    @property
    def action_space(self):
        return Discrete(2)

    def attach(self, listener):
        self.listeners += [listener]
        return self

    def get_orders(self, action: int, portfolio: 'Portfolio'):
        order = None

        if abs(action - self.action) > 0:
            src = self.cash if self.action == 0 else self.asset
            tgt = self.asset if self.action == 0 else self.cash
            order = proportion_order(portfolio, 
                                     src, 
                                     tgt, 
                                     1.0)
            self.action = action

        for listener in self.listeners:
            listener.on_action(action)

        return [order]

    def reset(self):
        super().reset()
        self.action = 0

In [3]:
class PBR(TensorTradeRewardScheme):

    """ Position-based reward scheme (PBR).
    
    The RewardScheme computes the reward for 
    each time step based on the agent’s performance.
    """
    
    registered_name = "pbr"

    def __init__(self, price: 'Stream'):
        super().__init__()
        self.position = -1

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (r * position).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int):
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio'):
        return self.feed.next()["reward"]

    def reset(self):
        self.position = -1
        self.feed.reset()

In [4]:
class PositionChangeChart(Renderer):
    """The Renderer renders a view of the environment and interactions."""
    
    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        # The Observer generates the next observation for the agent.
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        p = list(history.price)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = p[i]
                else:
                    sell[i] = p[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")

        axs[0].plot(np.arange(len(p)), p, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="v", color="red") # BUY
        axs[0].scatter(sell.index, sell.values, marker="^", color="green") # SELL
        axs[0].set_title("Trading Chart")
        axs[0].legend(['Price', 'Buys', 'Sells'])

        performance_df = pd.DataFrame().from_dict(env.action_scheme.portfolio.performance, orient='index')
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")

        plt.show()

---

### Train

Now in order to use our custom environment in ray we must first write a function that creates an instance of the TradingEnv from a configuration dictionary.

In [5]:
def generate_train_test_datasets(ticker, train_test_split):
    """Get Yahoo! Finance Data for Train/Test Splits."""

    yf_ticker = yf.Ticker(ticker=f'{ticker}.SA')

    df = yf_ticker.history(period='1y', interval='1h')
    df.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True)
    df["Volume"] = df["Volume"].fillna(0).astype(int)
    df.ta.log_return(append=True, length=16)
    df.ta.rsi(append=True, length=14)
    df.ta.macd(append=True, fast=12, slow=26)
    df.dropna(inplace=True)

    next_day = datetime.strptime(train_test_split, '%Y-%m-%d')
    next_day = next_day + timedelta(days=1)
    next_day = next_day.strftime('%Y-%m-%d')

    df_training = df.loc[:train_test_split].copy()
    df_evaluation = df.loc[next_day:].copy()

    df_training.dropna().to_csv('training.csv', index=True)
    df_evaluation.dropna().to_csv('evaluation.csv', index=True)

    return df_training, df_evaluation

In [6]:
df_train, df_test = generate_train_test_datasets('PETR4', '2021-08-27')

In [7]:
df_train.head()

Unnamed: 0,Open,High,Low,Close,Volume,LOGRET_16,RSI_14,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9
2021-02-08 15:00:00-03:00,28.379999,28.41,27.9,27.91,14302300,-0.013523,38.305879,0.085221,-0.284593,0.369814
2021-02-09 10:00:00-03:00,28.049999,28.370001,28.0,28.16,6090500,-0.00743,42.848306,0.03409,-0.268579,0.302669
2021-02-09 11:00:00-03:00,28.16,28.219999,27.57,27.77,10963100,-0.022785,38.131609,-0.037469,-0.272111,0.234642
2021-02-09 12:00:00-03:00,27.76,27.77,27.49,27.52,9910300,-0.037094,35.438578,-0.113051,-0.278154,0.165103
2021-02-09 13:00:00-03:00,27.51,27.809999,27.379999,27.690001,11529500,-0.039998,38.61343,-0.157417,-0.258016,0.100599


In [8]:
for c in df_train.columns:
    print (c)

Open
High
Low
Close
Volume
LOGRET_16
RSI_14
MACD_12_26_9
MACDh_12_26_9
MACDs_12_26_9


---

In [15]:
def create_training_env(config):
    """Creates Trading Environment. """
    
    ticker = 'PETR4'
    
    dataset = pd.read_csv(filepath_or_buffer="C:\\Users\\mathe\\Desktop\\Desktop\\Estudos\\Courses\\(Framework) TensorTrade\\training.csv", 
                          parse_dates=True).fillna(method='backfill').fillna(method='ffill')
    
    # Price Series
    price = Stream.source(list(dataset["Close"]), dtype="float").rename("BRL-ASSETS")
    
    b3_commission = 0.0035
    b3_options = ExchangeOptions(commission=b3_commission)
    b3_exchange = Exchange("B3", service=execute_order, options=b3_options)(price)
    
    # Instruments
    BRL = Instrument("BRL", 2, "Brazilian Currency")
    ASSETS = Instrument("ASSETS", 2, "Assets")

    # Portfolio
    cash = Wallet(b3_exchange, 10000 * BRL) # Money
    asset = Wallet(b3_exchange, 0 * ASSETS) # Stocks/Assets
    
    portfolio = Portfolio(BRL, [cash, asset])
    
    features = []
    for c in dataset.columns[1:]:
        s = Stream.source(list(dataset[c]), dtype="float").rename(dataset[c].name)
        features += [s]
    feed = DataFeed(features)
    feed.compile()
    
    # Rewards
    reward_scheme = default.rewards.RiskAdjustedReturns(
        return_algorithm='sharpe',
        risk_free_rate=0.005,
        window_size=7*5
    )
    
    # Actions
    action_scheme = default.actions.ManagedRiskOrders(
        stop=[0.05],
        take=[0.075],
        min_order_pct=0.5
    )
    
    # Visualization
    renderer_feed = DataFeed([
        Stream.source(list(dataset.index)).rename("date"),
        Stream.source(list(dataset["Close"]), dtype="float").rename("price"),
        Stream.sensor(action_scheme.broker, lambda b: len(b.unexecuted), dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=config["window_size"],
        max_allowed_loss=0.4
    )
    
    return environment

register_env("TradingEnv", create_training_env)

---

Now that the environment is registered we can run the training algorithm using the Proximal Policy Optimization (PPO) algorithm implemented in rllib.

In [16]:
env_config_training = {
    # We want to look at the last 14 samples (hours)
    "window_size": 7 * 5, # hours * days
    # And calculate reward based on the actions taken in the next 7 hours
    #"reward_window_size": 7,
    # If it goes past 10% loss during the iteration, we don't want to waste time on a "loser".
    "max_allowed_loss": 0.10,
}

analysis = tune.run(
    # We'll be using the builtin PPO agent in RLLib
    run_or_experiment="PPO",
    name="MyExperiment1",
    metric='episode_reward_mean',
    stop={
      "episode_reward_mean": 0.05
    },
    config={
        "env": "TradingEnv",
        "env_config": env_config_training,
        "log_level": "WARNING",
        "framework": "tf2",
        "eager_tracing": True,
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 1,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=True
)

Trial name,status,loc
PPO_TradingEnv_05259_00000,PENDING,


[2m[36m(PPO pid=9560)[0m 2022-02-01 12:45:50,985	INFO trainer.py:712 -- Executing eagerly (framework='tf2'), with eager_tracing=True. For production workloads, make sure to set `eager_tracing=True` in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.


Trial name,status,loc
PPO_TradingEnv_05259_00000,RUNNING,127.0.0.1:9560


[2m[36m(PPO pid=9560)[0m 2022-02-01 12:46:00,547	INFO trainable.py:124 -- Trainable.setup took 10.277 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Trial name,status,loc
PPO_TradingEnv_05259_00000,RUNNING,127.0.0.1:9560


 pid=9560)[0m 2022-02-01 12:46:03,190	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=9560)[0m Traceback (most recent call last):
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=9560)[0m     result = self.step_attempt()
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=9560)[0m     return method(self, *_args, **_kwargs)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=9560)[0m     step_results = next(self.train_exec_impl)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=9560)[0m     return next(self.built_iterator)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in apply_foreach
 

Trial name,status,loc
PPO_TradingEnv_05259_00000,RUNNING,127.0.0.1:9560


 pid=9560)[0m 2022-02-01 12:46:07,802	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=9560)[0m Traceback (most recent call last):
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=9560)[0m     result = self.step_attempt()
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=9560)[0m     return method(self, *_args, **_kwargs)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=9560)[0m     step_results = next(self.train_exec_impl)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=9560)[0m     return next(self.built_iterator)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in apply_foreach
 

Trial name,status,loc
PPO_TradingEnv_05259_00000,RUNNING,127.0.0.1:9560


 pid=9560)[0m 2022-02-01 12:46:12,168	ERROR trainer.py:872 -- Error in train call, attempting to recover
 pid=9560)[0m Traceback (most recent call last):
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 867, in step
 pid=9560)[0m     result = self.step_attempt()
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\tracing\tracing_helper.py", line 451, in _resume_span
 pid=9560)[0m     return method(self, *_args, **_kwargs)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\rllib\agents\trainer.py", line 920, in step_attempt
 pid=9560)[0m     step_results = next(self.train_exec_impl)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 756, in __next__
 pid=9560)[0m     return next(self.built_iterator)
 pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\util\iter.py", line 783, in apply_foreach
 

Trial name,status,loc
PPO_TradingEnv_05259_00000,RUNNING,127.0.0.1:9560


2022-02-01 12:46:17,908	ERROR trial_runner.py:958 -- Trial PPO_TradingEnv_05259_00000: Error processing event.
Traceback (most recent call last):
  File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\tune\trial_runner.py", line 924, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\tune\ray_trial_executor.py", line 787, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\worker.py", line 1713, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::PPO.train()[39m (pid=9560, ip=127.0.0.1, repr=PPO)
  File "python\ray\_raylet.pyx", line 625, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 629, in 

Result for PPO_TradingEnv_05259_00000:
  date: 2022-02-01_12-46-00
  experiment_id: 2647bf441fb342dc87789a4ec5ba3aa2
  hostname: DESKTOP-625611C
  node_ip: 127.0.0.1
  pid: 9560
  timestamp: 1643730360
  trial_id: 05259_00000
  


Trial name,status,loc
PPO_TradingEnv_05259_00000,ERROR,127.0.0.1:9560

Trial name,# failures,error file
PPO_TradingEnv_05259_00000,1,C:\Users\mathe\ray_results\MyExperiment1\PPO_TradingEnv_05259_00000_0_2022-02-01_12-45-45\error.txt


[2m[36m(PPO pid=9560)[0m 2022-02-01 12:46:18,754	ERROR worker.py:84 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=4832, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001E308DC51F0>)
[2m[36m(PPO pid=9560)[0m   File "python\ray\_raylet.pyx", line 585, in ray._raylet.execute_task
[2m[36m(PPO pid=9560)[0m   File "C:\Users\mathe\anaconda3\envs\tf\lib\site-packages\ray\_private\memory_monitor.py", line 156, in raise_if_low_memory
[2m[36m(PPO pid=9560)[0m     raise RayOutOfMemoryError(
[2m[36m(PPO pid=9560)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-625611C is used (15.51 / 15.86 GB). The top 10 memory consumers are:
[2m[36m(PPO pid=9560)[0m 
[2m[36m(PPO pid=9560)[0m PID	MEM	COMMAND
[2m[36m(PPO pid=9560)[0m 11920	3.85GiB	C:\Users\mathe\anaconda3\envs\tf\python.exe -m ipykernel_launcher -f C:\Users

TuneError: ('Trials did not complete', [PPO_TradingEnv_05259_00000])

---

After training is complete, we would now like to get access to the agents policy. We can do that by restoring the agent using the following code.

In [11]:
# Get checkpoint
checkpoints = analysis.get_trial_checkpoints_paths(
    trial=analysis.get_best_trial("episode_reward_mean", mode="max"),
    metric="episode_reward_mean"    
)

checkpoint_path = checkpoints[0][0]

# Restore agent
agent = ppo.PPOTrainer(
    env="TradingEnv",
    config={
        "env_config": env_config_training,
        "log_level": "DEBUG",
        "framework": "tf2",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 1,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "model": {
            "fcnet_hiddens": [256, 256], # Hyperparameter grid search defined above
        },
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
)

2022-02-01 12:36:25,695	INFO trainer.py:712 -- Executing eagerly (framework='tf2'), with eager_tracing=False. For production workloads, make sure to set `eager_tracing=True` in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
 pid=18976)[0m 2022-02-01 12:36:50,138	INFO rollout_worker.py:1705 -- Validating sub-env at vector index=0 ... (ok)
 pid=18976)[0m 2022-02-01 12:36:50,227	DEBUG rollout_worker.py:1534 -- Creating policy for default_policy
 pid=18976)[0m 2022-02-01 12:36:50,237	DEBUG catalog.py:706 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x0000014F6BCBC3D0>: Box([[-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 pid=18976)[0m  [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 pid=18976)[0m  [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 pid=18976)[0m  [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 pid=18976)[0m  [-inf -inf -inf -inf -inf -


2022-02-01 12:36:51,706	DEBUG rollout_worker.py:1534 -- Creating policy for default_policy
2022-02-01 12:36:51,717	DEBUG catalog.py:706 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x000002B2BF291550>: Box([[-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-inf -inf -in

---

After training is complete, we would now like to get access to the agents policy. We can do that by restoring the agent using the following code.

In [12]:
window_size = 7 * 5

# Restore agent
agent.restore(checkpoint_path)

# Instantiate the environment
env = create_training_env({
    "window_size": window_size,
    #"reward_window_size": 7
})

# Run until episode ends
episode_reward = 0
done = False
obs = env.reset()

while not done:
    action = agent.compute_single_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

env.render()

2022-02-01 12:36:56,624	INFO trainable.py:467 -- Restored on 127.0.0.1 from checkpoint: C:\Users\mathe\ray_results\MyExperiment1\PPO_TradingEnv_05f79_00000_0_2022-02-01_12-17-09\checkpoint_000036\checkpoint-36
2022-02-01 12:36:56,625	INFO trainable.py:475 -- Current state after restoring: {'_iteration': 36, '_timesteps_total': 0, '_time_total': 1138.403665304184, '_episodes_total': 157}


AttributeError: 'DataFrame' object has no attribute 'action'

---

### Validation Set

In [None]:
def create_eval_env(config):
    """Creates the Evaluation Environment."""
    dataset = pd.read_csv(filepath_or_buffer="C:\\Users\\mathe\\Desktop\\Desktop\\Estudos\\Courses\\(Framework) TensorTrade\\evaluation.csv", 
                          parse_dates=True).fillna(method='backfill').fillna(method='ffill')
    
    # Price Series
    price = Stream.source(list(dataset["Close"]), dtype="float").rename("BRL-ASSETS")
    
    b3_commission = 0.0035
    b3_options = ExchangeOptions(commission=b3_commission)
    b3_exchange = Exchange("B3", service=execute_order, options=b3_options)(price)
    
    # Instruments
    BRL = Instrument("BRL", 2, "Brazilian Currency")
    ASSETS = Instrument("ASSETS", , "Assets")

    # Portfolio
    cash = Wallet(b3_exchange, 1000 * BRL) # Money
    asset = Wallet(b3_exchange, 0 * ASSETS) # Stocks/Assets
    
    portfolio = Portfolio(BRL, [cash, asset])
    
    features = []
    for c in dataset.columns[1:]:
        s = Stream.source(list(dataset[c]), dtype="float").rename(dataset[c].name)
        features += [s]
    feed = DataFeed(features)
    feed.compile()
    
#     # Reward
#     reward_scheme = PBR(price=price)
    
#     # Actions
#     action_scheme = BSH(
#         cash=cash,
#         asset=asset
#     ).attach(reward_scheme)

    reward_scheme = default.actions.SimpleOrders()
    
    action_scheme = default.rewards.SimpleProfit()#.attach(reward_scheme)
    
    # Visualization
    renderer_feed = DataFeed([
        Stream.source(list(dataset.index)).rename("date"),
        Stream.source(list(dataset["Close"]), dtype="float").rename("price"),
        Stream.sensor(action_scheme, lambda s: s.action, dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=reward_scheme,
        reward_scheme=action_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=config["window_size"],
        max_allowed_loss=0.4
    )
    
    return environment, portfolio

In [None]:
dataset = pd.read_csv(filepath_or_buffer="C:\\Users\\mathe\\Desktop\\Desktop\\Estudos\\Courses\\(Framework) TensorTrade\\evaluation.csv", 
                      parse_dates=True).fillna(method='backfill').fillna(method='ffill')
    
# PRICES
price_eval = Stream.source(list(dataset["Close"]), dtype="float").rename("BRL-TTC")

# Instantiate the environment
env, portfolio = create_eval_env({
    "window_size": 14
})

# Run until episode ends
episode_reward = 0
done = False
obs = env.reset()

while not done:
    action = agent.compute_single_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward
    
env.render()

In [None]:
portfolio.ledger.as_frame().head(10)

In [None]:
df = pd.DataFrame(portfolio.performance)

In [None]:
df.T.plot()

In [None]:
df.loc["net_worth"].plot()

In [None]:
df.loc["B3:/BRL-ASSETS"].plot()

In [None]:
df.loc["B3:/ASSETS:/worth"].plot()

In [None]:
worth_streams = []
for wallet in portfolio.wallets:
    total_balance = Stream.sensor(
        wallet,
        lambda w: w.total_balance.as_float(),
        dtype="float"
    )
    
    symbol = wallet.instrument.symbol

    if symbol == portfolio.base_instrument.symbol:
        worth_streams += [total_balance]
    else:
        price = Stream.select(
        wallet.exchange.streams(),
            lambda s: s.name.endswith(symbol)
        )
        worth_streams += [(price * total_balance)]

net_worth = Stream.reduce(worth_streams).sum().rename("net_worth")

---