In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensortrade.env.generic import Renderer


class PositionChangeChart(Renderer):

    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        p = list(history.price)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = p[i]
                else:
                    sell[i] = p[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")

        axs[0].plot(np.arange(len(p)), p, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="^", color="green")
        axs[0].scatter(sell.index, sell.values, marker="^", color="red")
        axs[0].set_title("Trading Chart")

        performance_df = pd.DataFrame().from_dict(env.action_scheme.portfolio.performance, orient='index')
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")

        plt.show()

In [2]:
import ray
import numpy as np
import pandas as pd

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.instruments import Instrument
from tensortrade.env.default.rewards import (
    TensorTradeRewardScheme,
    SimpleProfit,
    RiskAdjustedReturns,
    PBR,
)
from tensortrade.env.default.actions import (
    BSH
)

USD = Instrument("USD", 2, "U.S. Dollar")
TTC = Instrument("TTC", 8, "TensorTrade Coin")


def create_env(config):
    x = np.arange(0, 2*np.pi, 2*np.pi / 1001)
    y = 50*np.sin(3*x) + 100

    x = np.arange(0, 2*np.pi, 2*np.pi / 1000)
    p = Stream.source(y, dtype="float").rename("USD-TTC")

    bitfinex = Exchange("bitfinex", service=execute_order)(
        p
    )

    cash = Wallet(bitfinex, 100000 * USD)
    asset = Wallet(bitfinex, 0 * TTC)

    portfolio = Portfolio(USD, [
        cash,
        asset
    ])

    feed = DataFeed([
        p,
        p.rolling(window=10).mean().rename("fast"),
        p.rolling(window=50).mean().rename("medium"),
        p.rolling(window=100).mean().rename("slow"),
        p.log().diff().fillna(0).rename("lr")
    ])

    reward_scheme = PBR(price=p)

    action_scheme = BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    renderer_feed = DataFeed([
        Stream.source(y, dtype="float").rename("price"),
        Stream.sensor(action_scheme, lambda s: s.action, dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=config["window_size"],
        max_allowed_loss=0.6
    )
    return environment

register_env("TradingEnv", create_env)

In [3]:
analysis = tune.run(
    "PPO",
    stop={
      "episode_reward_mean": 500
    },
    config={
        "env": "TradingEnv",
        "env_config": {
            "window_size": 25
        },
        "log_level": "DEBUG",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=True
)

2023-03-16 18:23:24,898	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,460	DEBUG rollout_worker.py:1948 -- Creating policy for default_policy
[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,460	DEBUG catalog.py:781 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x000002A572529FA0>: Box(-inf, inf, (25, 5), float32) -> (25, 5)
[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,470	INFO policy.py:1214 -- Policy (worker=1) running on CPU.
[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,470	INFO torch_policy_v2.py:110 -- Found 0 visible cuda devices.
[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,480	INFO util.py:122 -- Using connectors:
[2m[36m(RolloutWorker pid=10644)[0m 2023-03-16 18:23:41,480	INFO util.py:123 --     AgentConnectorPipeline
[2m[36m(RolloutWorker pid=10644)[0m         ClipRewardAgentConnector
[2m[36m(RolloutWorker pid=10644)[0m         ObsPreprocessorConnector
[2m[36m(RolloutWorker pid=10644)[0m   

Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
PPO_TradingEnv_97786_00000,4000,"{'ClipRewardAgentConnector_ms': 0.0, 'ObsPreprocessorConnector_ms': 0.0, 'MeanStdObservationFilterAgentConnector_ms': 0.14282975878034318, 'StateBufferConnector_ms': 0.0, 'ViewRequirementAgentConnector_ms': 0.028623853410993303}","{'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}",{},2023-03-16_18-23-55,False,518.286,{},38,-10.4286,-49,7,7,f34957b411b7450bb141ce4fd8edcecd,DESKTOP-VQNVLBP,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.9787943588469619, 'cur_kl_coeff': 0.20000000000000004, 'cur_lr': 0.10000000000000002, 'total_loss': 0.3050557771517384, 'policy_loss': -0.19665496241231198, 'vf_loss': 0.9976007962739596, 'vf_explained_var': 0.0003735168646740657, 'kl': 0.04701697227046456, 'entropy': 0.6493055167377636, 'entropy_coeff': 0.01}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 128.0, 'num_grad_updates_lifetime': 465.5, 'diff_num_grad_updates_vs_sampler_policy': 464.5}}, 'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}",1,127.0.0.1,4000,4000,4000,4000,4000,4000,0,1,0,0,4000,"{'cpu_util_percent': 21.235, 'ram_util_percent': 53.620000000000005}",22016,{},{},{},"{'mean_raw_obs_processing_ms': 0.31153674841940143, 'mean_inference_ms': 1.1878294278549808, 'mean_action_processing_ms': 0.15516729242829674, 'mean_env_wait_ms': 0.547459291774194, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 38.0, 'episode_reward_min': -49.0, 'episode_reward_mean': -10.428571428571429, 'episode_len_mean': 518.2857142857143, 'episode_media': {}, 'episodes_this_iter': 7, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [38.0, -6.0, -49.0, 9.0, -26.0, -39.0, 0.0], 'episode_lengths': [533, 527, 506, 518, 511, 516, 517]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.31153674841940143, 'mean_inference_ms': 1.1878294278549808, 'mean_action_processing_ms': 0.15516729242829674, 'mean_env_wait_ms': 0.547459291774194, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ClipRewardAgentConnector_ms': 0.0, 'ObsPreprocessorConnector_ms': 0.0, 'MeanStdObservationFilterAgentConnector_ms': 0.14282975878034318, 'StateBufferConnector_ms': 0.0, 'ViewRequirementAgentConnector_ms': 0.028623853410993303}}",14.119,14.119,14.119,"{'training_iteration_time_ms': 14108.964, 'load_time_ms': 0.0, 'load_throughput': 0.0, 'learn_time_ms': 5216.867, 'learn_throughput': 766.744, 'synch_weights_time_ms': 9.97}",1678962235,0,4000,1,97786_00000,6.36223


[2m[36m(PPO pid=22016)[0m 2023-03-16 18:23:55,633	DEBUG filter_manager.py:34 -- Synchronizing filters ...
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:23:55,633	DEBUG filter_manager.py:55 -- Updating remote filters ...
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:23:55,643	DEBUG algorithm.py:2300 -- synchronized filters: defaultdict(<class 'ray.rllib.utils.filter.NoFilter'>, {'default_policy': <ray.rllib.utils.filter.MeanStdFilter object at 0x0000024E892D3820>})
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:24:04,291	DEBUG train_ops.py:156 -- == sgd epochs for default_policy ==
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:24:09,593	DEBUG filter_manager.py:34 -- Synchronizing filters ...
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:24:09,593	DEBUG filter_manager.py:55 -- Updating remote filters ...
[2m[36m(PPO pid=22016)[0m 2023-03-16 18:24:09,599	DEBUG algorithm.py:2300 -- synchronized filters: defaultdict(<class 'ray.rllib.utils.filter.NoFilter'>, {'default_policy': <ray.rllib.utils.filter