In [1]:
import pandas as pd

from stable_baselines3 import DQN, PPO, A2C
from sb3_contrib import QRDQN

from utils.data_loader import DataLoader
from utils.feature_enginner import FeatureEngineer
from envs.base import BaseTradingEnv
from models.features_extractor import CustomCNN

### Prepare Dataset

In [2]:
# load the data
df = DataLoader.load_data("./data/3600/ethusd/2021-01-01.csv")
df_train = df[: len(df) // 2]
df_eval = df[len(df) // 2 :]

# extract feature from the data
feature_enginner = FeatureEngineer(df_train, df_eval)
data_train = feature_enginner.entire_data["train"]
data_val = feature_enginner.entire_data["val"]
features_train = feature_enginner.entire_normalized_features["train"]
features_eval = feature_enginner.entire_normalized_features["val"]

In [3]:
data_train.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-03 01:00:00,763.26,770.72,762.63,769.78,3796.167
2021-01-03 02:00:00,770.36,784.59,765.7,783.61,7542.043
2021-01-03 03:00:00,782.81,786.05,776.31,780.64,4480.9
2021-01-03 04:00:00,781.17,785.52,773.85,780.87,954.168
2021-01-03 05:00:00,781.07,788.88,778.58,786.51,1414.752


In [4]:
features_train.head()

Unnamed: 0_level_0,Price Log Diff,Volume Log Diff,SMA-20 Log Diff,SMA-50 Log Diff,MACD,RSI,BB Sigma-1 Upper Bound,BB Sigma-1 Lower Bound,BB Sigma-2 Upper Bound,BB Sigma-2 Lower Bound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-01-03 01:00:00,-0.731082,0.399727,1.240058,0.639219,0.741964,1.273165,1.166905,1.14039,1.005889,0.935612
2021-01-03 02:00:00,-1.343157,-0.969791,1.705924,0.973054,0.902788,1.6796,1.48273,1.696311,1.177664,1.504663
2021-01-03 03:00:00,0.331145,0.73507,1.469065,0.871528,0.918387,1.470357,1.18697,1.55298,0.861988,1.451831
2021-01-03 04:00:00,0.014022,2.184168,1.367556,0.848126,0.87423,1.477364,1.036559,1.516252,0.686979,1.471406
2021-01-03 05:00:00,-0.520892,-0.556514,1.480696,0.967852,0.850743,1.649243,1.08511,1.681039,0.681729,1.66086


### Define Environmnet

#### Hyperparameter for Envirionment

In [5]:
window_size = 20
fee = 0.000665

If you want to change the action, define it.  
The default is BUY and SELL only.

In [6]:
from enum import Enum
class Actions(Enum):
    Sell = 0
    Buy = 1
    # Hold = 2

In [7]:
class TradingEnv(BaseTradingEnv):
    def _calculate_reward(self):
        # reward = 0.0
        reward = self.position.profit_or_loss_pct
        if self.closed_trades.empty:
            return reward

        trade = self.closed_trades.iloc[-1, :]
        return reward if trade["ExitTime"] != self.current_datetime else trade["ReturnPct"]

In [8]:
train_env = TradingEnv(data_train, features_train, window_size=window_size, fee=fee, actions=Actions)
eval_env = TradingEnv(data_val, features_eval, window_size=window_size, fee=fee, actions=Actions)

### Define Agent

In [9]:
# policy_kwargs = dict(
#     features_extractor_class=CustomCNN,
#     features_extractor_kwargs=dict(features_dim=32),
# )
# model = PPO("CnnPolicy", train_env, verbose=0, tensorboard_log="./logs", policy_kwargs=policy_kwargs)
# model = PPO("MlpPolicy", train_env, verbose=1, tensorboard_log="./logs")
# model_class = DQN
# model = model_class("MlpPolicy", train_env, tensorboard_log="./logs", target_update_interval=500, learning_starts=500, learning_rate=1e-3)
model_class = A2C
model = model_class('MlpPolicy', train_env, tensorboard_log='./logs')
model_name = model.__class__.__name__

### Training And Evaluation

In [10]:
model.learn(total_timesteps=15000, eval_env=eval_env, eval_freq=1000, n_eval_episodes=1)
model.save(f"./results/{model_name}")
del model

Eval num_timesteps=1000, episode_reward=66.97 +/- 0.00
Episode length: 2452.00 +/- 0.00
New best mean reward!
Eval num_timesteps=2000, episode_reward=54.74 +/- 0.00
Episode length: 2452.00 +/- 0.00
Eval num_timesteps=3000, episode_reward=53.14 +/- 0.00
Episode length: 2452.00 +/- 0.00
Eval num_timesteps=4000, episode_reward=62.89 +/- 0.00
Episode length: 2452.00 +/- 0.00
Eval num_timesteps=5000, episode_reward=82.59 +/- 0.00
Episode length: 2452.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6000, episode_reward=80.94 +/- 0.00
Episode length: 2452.00 +/- 0.00
Eval num_timesteps=7000, episode_reward=79.12 +/- 0.00
Episode length: 2452.00 +/- 0.00
Eval num_timesteps=8000, episode_reward=104.40 +/- 0.00
Episode length: 2452.00 +/- 0.00
New best mean reward!
Eval num_timesteps=9000, episode_reward=106.59 +/- 0.00
Episode length: 2452.00 +/- 0.00
New best mean reward!
Eval num_timesteps=10000, episode_reward=109.77 +/- 0.00
Episode length: 2452.00 +/- 0.00
New best mean reward!
Eval n

In [11]:
model = model_class.load(f"./results/{model_name}")
print(model)

<stable_baselines3.a2c.a2c.A2C object at 0x0000026A86A5F438>


In [12]:
from utils import backtest

In [13]:
stats_train = backtest(model, train_env, plot=True, plot_filename=f"./results/BackTest-{model_name}-train")
stats_eval = backtest(model, eval_env, plot=True, plot_filename=f"./results/BackTest-{model_name}-eval")

stats = pd.DataFrame()
stats["train"] = stats_train
stats["eval"] = stats_eval
stats

NameError: name 'pd' is not defined