In [1]:
import pandas as pd
import os

from envs import TradingEnv
from utils import preprocessing

from stable_baselines3 import DQN, A2C, PPO
from sb3_contrib import QRDQN

In [2]:
log_dir = "./logs/"
os.makedirs(log_dir, exist_ok=True)
lookback_window = 20
# fee = 0.00019
fee = 0.000665
assets = 10000

In [3]:
df = pd.read_csv("./data/3600/ethusd/2021-01-01.csv", parse_dates=[0]).set_index("Date")
train_df = df[: len(df) // 2]
eval_df = df[len(df) // 2 :]
print(train_df.head(3))
print(eval_df.head(3))

                       Open    High     Low   Close    Volume
Date                                                         
2021-01-01 00:00:00  734.21  734.28  723.79  731.63  3015.608
2021-01-01 01:00:00  731.42  739.26  731.42  736.66   912.377
2021-01-01 02:00:00  737.14  737.46  725.51  727.42  4502.349
                       Open    High     Low   Close    Volume
Date                                                         
2021-04-01 00:00:00  1829.2  1839.7  1825.0  1834.7   929.029
2021-04-01 01:00:00  1834.6  1854.5  1833.0  1849.6  2446.906
2021-04-01 02:00:00  1849.9  1862.6  1838.6  1846.2  1712.854


In [4]:
train_env = TradingEnv(lookback_window=lookback_window, df=train_df, preprocessed_df=preprocessing(train_df), assets=assets, fee=fee)
# train_env = make_vec_env(lambda: train_env, n_envs=1)

eval_env = TradingEnv(lookback_window=lookback_window, df=eval_df, preprocessed_df=preprocessing(eval_df), assets=assets, fee=fee)
# eval_env = make_vec_env(lambda: eval_env, n_envs=1)

In [None]:
def evaluate(model: A2C, env, render=True):
    state = env.reset()
    done = False
    episode_rewards = []
    episode_reward = 0.0

    while not done:
        action, state = model.predict(state)
        state, reward, done, info = env.step(action)

        episode_reward += reward

        if render:
            env.render()

        episode_rewards.append(episode_reward)

        if done:
            break

    mean_reward = np.mean(episode_rewards)
    std_reward = np.std(episode_rewards)

    return mean_reward, std_reward

In [5]:
import warnings
from bokeh.util.warnings import BokehDeprecationWarning
warnings.simplefilter('ignore', BokehDeprecationWarning)
warnings.simplefilter('ignore', UserWarning)
from backtesting import Strategy, Backtest

class DRLStrategy(Strategy):
    model = None
    env = None

    def init(self):
        self.state = self.env.reset()
        self.max_step = len(self.data.df) - 1

    def next(self):
        self.step = len(self.data.df) - 1
        self.env.current_step = self.step  # BacktestのステップとEnvironmentのステップを同期させる
        self.env.broker.current_step = self.step

        if self.step < self.env.lookback_window or self.env.is_terminal:
            return

        self.env.update_state()
        # print("=" * 50, self.step, "=" * 50)
        # print(self._broker.position, self.env.broker.position)
        # print(self._broker.trades)
        # print("Assets:", self._broker._cash, self.env.broker.assets)
        # print("Equity:", self.equity, self.env.broker.equity)
        # print("Close Price:", self._broker.last_price, self.env.broker.current_price)
        # print(f"Available Margin: {self._broker.margin_available}, {self.env.broker.free_assets}")
        assert self._broker._cash == self.env.broker.assets, f"{self.step}/{self.max_step}: {self._broker._cash} != {self.env.broker.assets}"
        assert self.equity == self.env.broker.equity, f"{self.step}/{self.max_step}: {self.equity} != {self.env.broker.equity}"

        if self.step + 1 == self.max_step:
            self.env.broker.position.close()

        action, _ = self.model.predict(self.env.state)
        if action == 0:
            pass

        elif action == 1 and not self.position.is_long:
            if self.position.is_short:
                self.position.close()
            else:
                size = self._broker.margin_available // (self._broker.last_price * (1 + self._broker._commission))
                if size != 0:
                    self.buy(size=size)
            self.env.buy()

        elif action == 2 and not self.position.is_short:
            if self.position.is_long:
                self.position.close()
            else:
                size = self._broker.margin_available // (self._broker.last_price * (1 + self._broker._commission))
                if size != 0:
                    self.sell(size=size)
            self.env.sell()

In [6]:
models = [DQN, A2C, PPO, QRDQN]
for model_class in models:
    model = model_class("MlpPolicy", train_env, verbose=0, tensorboard_log="./logs")
    model.learn(50000)
    model.save(f"./results/{model.__class__.__name__}")

    bt = Backtest(train_df, DRLStrategy, cash=assets, commission=fee, trade_on_close=True, exclusive_orders=False)
    stats = bt.run(model=model, env=train_env)
    bt.plot(filename=f"./results/BackTest-{model.__class__.__name__}-train")
    print(stats)

    bt = Backtest(eval_df, DRLStrategy, cash=assets, commission=fee, trade_on_close=True, exclusive_orders=False)
    stats = bt.run(model=model, env=eval_env)
    bt.plot(filename=f"./results/BackTest-{model.__class__.__name__}-eval")
    print(stats)


Start                     2021-01-01 00:00:00
End                       2021-03-31 23:00:00
Duration                     89 days 23:00:00
Exposure Time [%]                   78.333333
Equity Final [$]                 19118.388655
Equity Peak [$]                  19118.388655
Return [%]                          91.183887
Buy & Hold Return [%]              149.948745
Return (Ann.) [%]                 1284.973063
Volatility (Ann.) [%]             1381.975725
Sharpe Ratio                         0.929809
Sortino Ratio                        22.60283
Calmar Ratio                        41.356454
Max. Drawdown [%]                  -31.070678
Avg. Drawdown [%]                   -3.056943
Max. Drawdown Duration       45 days 01:00:00
Avg. Drawdown Duration        1 days 16:00:00
# Trades                                   28
Win Rate [%]                        71.428571
Best Trade [%]                      23.334842
Worst Trade [%]                    -11.459471
Avg. Trade [%]                    