In [1]:
import sys
from pathlib import Path
import json
import pickle
# sys.path.append(str(pathlib.Path().resolve().parent))

In [2]:
import ray
from ray.rllib.agents import ppo

In [4]:
# from rl_bot.envs import create_env
from rl_bot.data_loader import DataLoader
from rl_bot.preprocessor import Preprocessor

In [None]:
DATA_PATH = Path("./data/BTCUSDT/").resolve()

In [None]:
with open("./config/trainer.json", "r") as f:
    trainer_config = json.load(f)

In [None]:
config = {
    "env": "TradingEnv",
    "env_config": env_config,
    "log_level": "WARN",
    "framework": "torch",
    "num_workers": 4,
    "num_gpus": 0,
    "observation_filter": "MeanStdFilter",
    # "evaluation_config": {
    #     "env_config": env_config_eval,
    #     "explore": False,
    # },
    # "evaluation_interval": 1,
    # "evaluation_num_episodes": 1,
    "seed": 3407,
    # "output": {}
}

In [None]:
ray.init()

2022-01-27 21:34:26,626	INFO services.py:1410 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '172.17.0.2',
 'raylet_ip_address': '172.17.0.2',
 'redis_address': '172.17.0.2:6379',
 'object_store_address': '/tmp/ray/session_2022-01-27_21-34-24_347568_15506/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2022-01-27_21-34-24_347568_15506/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2022-01-27_21-34-24_347568_15506',
 'metrics_export_port': 60422,
 'gcs_address': '172.17.0.2:64868',
 'address': '172.17.0.2:6379',
 'node_id': 'aea63e9d39062eeda686c364ea5bca6e8bfb02e349a139f4f223d713'}

In [None]:
local_dir = "./ray_results"

In [None]:
from rl_bot.train import train

In [None]:
agent_class = ppo.PPOTrainer
analysis = train(agent_class, stop={"timesteps_total": 10000}, expt_name=None, config=config, local_dir=local_dir, resume=False)

In [None]:
env = create_env(env_config)

In [None]:
done = False
env.reset()
total_reward = 0
while not done:
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    total_reward += reward
    # env.render()

In [None]:
# import pandas as pd
# equity_curve = pd.Series(env.equity_curve[1:]).apply(np.log).diff()
# rewards = pd.Series(rewards)
# pd.Series(env.equity_curve).apply(np.log).diff().dropna()
# pd.concat([equity_curve, rewards], axis=1)

In [None]:
import os
import warnings
from typing import Optional

import pandas as pd

with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    from backtesting import Backtest, Strategy


In [None]:
class DRLStrategy(Strategy):
    env = None
    agent = None
    debug = False

    def init(self):
        self.observation = self.env.reset()
        self.done = False

    def next(self):
        # if self.data.index[-1] != self.env.current_time or self.done:
        if self.data.index[-1] != self.env.observer.datetime[self.env.current_step] or self.done:
            pass

        else:
            assert self.data.Close[-1] == self.env.observer.price, self.error()
            assert self._broker._cash == self.env.cash, self.error()
            assert self.equity == self.env.equity, self.error()
            if self.agent == "Random":
                action = self.env.action_space.sample()
                price = self.env.observer.price
                bet_proportion, atr_range = action[0], action[1]
                atr = self.env.actions.atr[self.env.current_step]

                trade_size = bet_proportion
                limit_price = self.env.observer.price + atr * atr_range
                if atr_range > 0:
                    self.buy(size=trade_size, limit=limit_price)
                else:
                    self.sell(size=trade_size, limit=limit_price)

            elif self.agent == "Buy&Hold":
                action = 2 if len(self.env.actions) == 3 else 1
            elif self.agent == "Sell&Hold":
                action = 0 if len(self.env.actions) == 2 else 0
            elif self.agent:
                action = self.agent.compute_single_action(
                    self.env.observer.observation, explore=False
                )
                bet_prob, atr_range = action[0], action[1]
                atr = self.env.actions.atr[self.env.current_step]
                limit_price = self.env.observer.price + atr * atr_range

                if bet_prob == 0:
                    pass

                elif atr_range > 0:
                    self.buy(size=bet_prob, limit=limit_price)

                elif atr_range < 0:
                    self.sell(size=bet_prob, limit=limit_price)
                print("Agent Action:", action)
            # do Trade
            self.env.action = action
            # if self.env.done:
            #     self.position.close()

            # else:
            #     self.env.actions.perform(self, action)

            if self.debug:
                self.render()

            self.observation, _, self.done, _ = self.env.step(action)

    def error(self):
        print("===" * 10, "DEBUG", "===" * 10)
        print("Env Step: ", self.env.current_step)
        print(
            "Env Position: ", self.env.position, "| Backtest Position: ", self.position
        )
        print(
            "Env Price: ",
            self.env.observer.price,
            "| Backtest Price: ",
            self.data.Close[-1],
        )
        print(
            "Env Price with fee: ",
            self.env._adjusted_price(1),
            "| Backtest Price with fee: ",
            self._broker._adjusted_price(1),
        )
        print("Env Equity: ", self.env.equity, "| Backtest Equity: ", self.equity)
        print(
            "Env Assets: ", self.env.cash, "| Backtest Cash: ", self._broker._cash
        )

        print("Env Trades: ", self.env.trades, "| Backtest Trades", self.trades)
        print("Env Position: ", self.env.position, "| Backtest Postion", self.position)
        print("===" * 10, "=====", "===" * 10)
        self.render()
        return "See Debug Above"

    def render(self):
        print("===" * 5, f"Backtesting ({self.data.index[-1]})", "===" * 5)
        print(f"Price: {self.data.Close[-1]}")
        print(f"Cash: {self._broker._cash}")
        print(f"Equity: {self.equity}")
        print(f"Orders: {self.orders}")
        print(f"Trades: {self.trades}")
        print(f"Position: {self.position}")
        print(f"Closed Trades: {self.closed_trades}")

    @property
    def trade_size(self):
        return self.env.trade_size

    @property
    def latest_high_price(self):
        return self.env.latest_high_price

    @property
    def latest_low_price(self):
        return self.env.latest_low_price


def backtest(
    env,
    agent="Random",
    save_dir: str = "./backtest-stats",
    plot: bool = True,
    open_browser: bool = True,
    debug: bool = False,
) -> pd.DataFrame:

    bt = Backtest(
        env.observer._ohlc,
        DRLStrategy,
        cash=env.initial_cash,
        commission=env.fee,
        trade_on_close=True,
        # hedging=True,
    )
    stats = bt.run(env=env, agent=agent, debug=debug)
    print(stats)



In [None]:
checkpoint_path = analysis.get_last_checkpoint()
config = analysis.get_best_config()

In [None]:
agent = agent_class(config=config)



In [None]:
backtest(env, agent)

Agent Action: [4.9837205e-01 9.5367432e-05]
Agent Action: [0.49879384 0.00058091]
Agent Action: [4.9818176e-01 4.4834614e-04]
Agent Action: [ 0.49957755 -0.00477242]
Agent Action: [0.49593642 0.00165784]
Agent Action: [0.49887908 0.00054801]
Agent Action: [0.497477   0.00056493]
Agent Action: [ 0.49582392 -0.00462258]
Agent Action: [ 0.4959151  -0.00710499]
Agent Action: [ 0.49749196 -0.00159246]
Agent Action: [ 0.50222385 -0.0059104 ]
Agent Action: [ 4.9902248e-01 -2.1106005e-04]
Agent Action: [ 0.49857488 -0.00375336]
Agent Action: [0.4972035  0.00070643]
Agent Action: [ 0.5000926  -0.00639635]
Agent Action: [0.5000066  0.00253367]
Agent Action: [ 0.4980044 -0.001706 ]
Agent Action: [ 0.50034195 -0.00458175]
Agent Action: [ 0.50169605 -0.00308532]
Agent Action: [ 0.49992397 -0.00121033]
Agent Action: [ 0.49815133 -0.00163901]
Agent Action: [0.4980322  0.00124705]
Agent Action: [ 0.4955605 -0.0045954]
Agent Action: [ 0.498055   -0.00808156]
Agent Action: [ 0.49386224 -0.00585502]
Agen

In [None]:
backtest(env, agent="Random")

Env Action [0.5161137  0.22363713]
Env Action [0.23641647 0.15904129]
Env Action [0.12763487 0.8474253 ]
Env Action [ 0.97099113 -0.5300258 ]
Env Action [0.38380986 0.8333787 ]
Env Action [ 0.84173805 -0.34494445]
Env Action [0.1786628  0.38150358]
Env Action [ 0.9464522 -0.9413309]
Env Action [ 0.03598088 -0.5854236 ]
Env Action [0.510787  0.9234985]
Env Action [0.36617345 0.70031357]
Env Action [0.8782901  0.76948315]
Env Action [ 0.713393  -0.4551287]
Env Action [0.305619   0.96739143]
Env Action [ 0.0120848  -0.96208614]
Env Action [0.19130264 0.79529935]
Env Action [ 0.9561785  -0.81637746]
Env Action [ 0.75182426 -0.4142144 ]
Env Action [0.22789927 0.19243744]
Env Action [ 0.9937113 -0.8169374]
Env Action [ 0.05354117 -0.90771735]
Env Action [ 0.05675332 -0.78072906]
Env Action [0.08309363 0.5925874 ]
Env Action [ 0.966822   -0.10449297]
Env Action [ 0.8177388  -0.08350809]
Env Action [0.32340226 0.00557926]
Env Action [0.2283891 0.6158019]
Env Action [0.7522972 0.5491858]
Env Ac