In [1]:
import pandas as pd
import os

from envs import TradingEnv
from utils import preprocessing

from stable_baselines3 import DQN, A2C, PPO
from sb3_contrib import QRDQN

In [2]:
log_dir = "./logs/"
os.makedirs(log_dir, exist_ok=True)
lookback_window = 20
# fee = 0.00019
fee = 0.000665
assets = 10000

In [3]:
df = pd.read_csv("./data/3600/ethusd/2021-01-01.csv", parse_dates=[0]).set_index("Date")
train_df = df[: len(df) // 2]
eval_df = df[len(df) // 2 :]
print(train_df.head(3))
print(eval_df.head(3))

                       Open    High     Low   Close    Volume
Date                                                         
2021-01-01 00:00:00  734.21  734.28  723.79  731.63  3015.608
2021-01-01 01:00:00  731.42  739.26  731.42  736.66   912.377
2021-01-01 02:00:00  737.14  737.46  725.51  727.42  4502.349
                       Open    High     Low   Close    Volume
Date                                                         
2021-04-01 00:00:00  1829.2  1839.7  1825.0  1834.7   929.029
2021-04-01 01:00:00  1834.6  1854.5  1833.0  1849.6  2446.906
2021-04-01 02:00:00  1849.9  1862.6  1838.6  1846.2  1712.854


In [4]:
train_env = TradingEnv(lookback_window=lookback_window, df=train_df, preprocessed_df=preprocessing(train_df), assets=assets, fee=fee)
# train_env = make_vec_env(lambda: train_env, n_envs=1)

eval_env = TradingEnv(lookback_window=lookback_window, df=eval_df, preprocessed_df=preprocessing(eval_df), assets=assets, fee=fee)
# eval_env = make_vec_env(lambda: eval_env, n_envs=1)

In [5]:
from utils import backtest

In [6]:
models = [DQN, A2C, PPO, QRDQN]
for model_class in models:
    model = model_class("MlpPolicy", train_env, verbose=0, tensorboard_log="./logs")
    model_name = model.__class__.__name__
    model.learn(5000)
    model.save(f"./results/{model_name}")

    stats_train = backtest(model, train_env, assets, fee, plot=False, plot_filename=f"./results/BackTest-{model_name}-train")
    stats_eval = backtest(model, eval_env, assets, fee, plot=False, plot_filename=f"./results/BackTest-{model_name}-eval")

    stats = pd.DataFrame()
    stats["train"] = stats_train
    stats["eval"] = stats_eval
    stats = stats.drop(["Best Trade [%]", "Worst Trade [%]", "_strategy", "_equity_curve", "_trades"], axis=0)
    print(model_name)
    print(stats)

DQN
                                      train                 eval
Start                   2021-01-01 00:00:00  2021-04-01 00:00:00
End                     2021-03-31 23:00:00  2021-06-30 00:00:00
Duration                   89 days 23:00:00     90 days 00:00:00
Exposure Time [%]                 95.277778            88.246182
Equity Final [$]                  2462.1602         11160.863056
Equity Peak [$]                10012.971749         14627.769389
Return [%]                       -75.378398            11.608631
Buy & Hold Return [%]            149.948745            21.191475
Return (Ann.) [%]                -99.661806            55.351435
Volatility (Ann.) [%]              0.562059           196.285187
Sharpe Ratio                            0.0             0.281995
Sortino Ratio                           0.0             0.950665
Calmar Ratio                            0.0             1.317467
Max. Drawdown [%]                -76.802745            -42.01354
Avg. Drawdown [%]    

In [7]:
train_env = TradingEnv(lookback_window=lookback_window, df=train_df, preprocessed_df=preprocessing(train_df), assets=assets, fee=fee)
eval_env = TradingEnv(lookback_window=lookback_window, df=eval_df, preprocessed_df=preprocessing(eval_df), assets=assets, fee=fee)

TypeError: __init__() got an unexpected keyword argument 'obs_flatten'

In [None]:
from models.cnn_features_extractor import CustomCNN

In [None]:
policy_kwargs = dict(
    features_extractor_class=CustomCNN,
    features_extractor_kwargs=dict(features_dim=128),
)
model = PPO("CnnPolicy", train_env, verbose=0, tensorboard_log="./logs", policy_kwargs=policy_kwargs)
model_name = model.__class__.__name__
model.learn(10000)
model.save(f"./results/{model_name}")
del model

In [None]:
model = PPO.load("./results/PPO")
stats_train = backtest(model, train_env, assets, fee, plot=True, plot_filename=f"./results/BackTest-{model_name}-train")
stats_eval = backtest(model, eval_env, assets, fee, plot=True, plot_filename=f"./results/BackTest-{model_name}-eval")

stats = pd.DataFrame()
stats["train"] = stats_train
stats["eval"] = stats_eval
stats = stats.drop(["Best Trade [%]", "Worst Trade [%]", "_strategy", "_equity_curve", "_trades"], axis=0)
print(model_name)
print(stats)

PPO
                                      train                 eval
Start                   2021-01-01 00:00:00  2021-04-01 00:00:00
End                     2021-03-31 23:00:00  2021-06-30 00:00:00
Duration                   89 days 23:00:00     90 days 00:00:00
Exposure Time [%]                 87.546296            87.875983
Equity Final [$]                8406.227844          5574.452909
Equity Peak [$]                14686.509833         12143.905628
Return [%]                       -15.937722           -44.255471
Buy & Hold Return [%]            149.948745            21.191475
Return (Ann.) [%]                -50.544363           -90.405551
Volatility (Ann.) [%]             51.791585            11.474288
Sharpe Ratio                            0.0                  0.0
Sortino Ratio                           0.0                  0.0
Calmar Ratio                            0.0                  0.0
Max. Drawdown [%]                -48.860988            -58.56959
Avg. Drawdown [%]    