In [1]:
import os
import time
import pickle

import pandas as pd
import torch as th
from sklearn.preprocessing import StandardScaler
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import configure_logger
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3 import PPO, A2C, DQN

from envs.trading_env import TradingEnv, SRTradingEnv
from utils.data_loader import DataLoader
from utils.preprocessor import Preprocessor
from utils.backtest import backtest
from utils.callbacks import CustomEvalCallback, CustomCheckpointCallback
from utils.utils import set_random_seed, send_line_notification



# Config

In [2]:
algo = PPO
n_cpus = 8
verbose = 0
n_timesteps = 3e5

In [3]:
set_random_seed()

# 1. Prepare Data

In [4]:
data = DataLoader.fetch_data("BTC-USD", interval="1d")
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200
2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700
2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600
2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100
...,...,...,...,...,...
2021-08-27,46894.554688,49112.785156,46394.281250,49058.667969,34511076995
2021-08-28,49072.585938,49283.503906,48499.238281,48902.402344,28568103401
2021-08-29,48911.250000,49644.113281,47925.855469,48829.832031,25889650240
2021-08-30,48834.851562,48925.605469,46950.273438,47054.984375,31847007016


# 2. Preprocessing

In [5]:
# Split data
data_len = len(data)
data_train = data.iloc[: int(data_len * 0.8), :]
data_test = data.iloc[int(data_len * 0.8) :, :]
print(f"Train Sapn: {data_train.index[0]} to {data_train.index[-1]}")
print(f"Test: Span {data_test.index[0]} to {data_test.index[-1]}")

Train Sapn: 2014-09-17 00:00:00 to 2020-04-06 00:00:00
Test: Span 2020-04-07 00:00:00 to 2021-08-31 00:00:00


In [6]:
features_train = Preprocessor.extract_features(data_train)
features_test = Preprocessor.extract_features(data_test)
data_train, features_train = Preprocessor.align_date(data_train, features_train)
data_test, features_test = Preprocessor.align_date(data_test, features_test)
features_train

Unnamed: 0_level_0,DMI_Diff,MACD,RSI,MFI,CMF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-10-20,-4.586603,4.553430,48.756987,67.275681,0.072686
2014-10-21,-3.494113,4.121292,50.393685,64.905179,0.090156
2014-10-22,-3.412275,3.483206,48.858026,57.863374,0.100156
2014-10-23,-12.296532,1.374317,39.250383,41.722074,0.105009
2014-10-24,-12.956175,0.025295,39.226211,45.832680,0.167972
...,...,...,...,...,...
2020-04-02,-4.250508,169.823422,50.680893,62.724523,0.066235
2020-04-03,-4.041038,169.641528,49.851568,54.460562,0.066599
2020-04-04,-3.925794,172.777630,51.745213,61.430514,0.116273
2020-04-05,-3.745142,163.999144,50.573894,62.139303,0.110001


In [7]:
scaler = StandardScaler()
features_train = pd.DataFrame(scaler.fit_transform(features_train), index=data_train.index)
features_test = pd.DataFrame(scaler.transform(features_test), index=data_test.index)

# 3. Define Environment

In [8]:
env_class = TradingEnv
vec_env_cls = DummyVecEnv
vec_env_train = make_vec_env(env_class, n_envs=n_cpus, env_kwargs={"df": data_train, "features": features_train}, vec_env_cls=vec_env_cls)
vec_env_eval = make_vec_env(env_class, n_envs=1, env_kwargs={"df": data_test, "features": features_test}, vec_env_cls=vec_env_cls)
# vec_env_train = make_vec_env(env_class, n_envs=n_cpus, env_kwargs={"df": data_train, "features": features_train}, vec_env_cls=SubprocVecEnv)
# vec_env_eval = make_vec_env(env_class, n_envs=1, env_kwargs={"df": data_test, "features": features_test}, vec_env_cls=SubprocVecEnv)

# 4. Define Agnet

In [9]:
algo_name = algo.__name__
policy_kwargs = {
    "activation_fn": th.nn.PReLU,
    "net_arch": [128, dict(pi=[64, 32], vf=[64, 32])],
}
model: BaseAlgorithm = algo("MlpPolicy", vec_env_train, device="cpu", policy_kwargs=policy_kwargs)
print(model.policy)

# Set Logger
logger = configure_logger(verbose, tensorboard_log="./logs/", tb_log_name=algo_name)
model.set_logger(logger)
log_path = model.logger.dir
print(f"Training results are saved in {log_path}")

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential(
      (0): Linear(in_features=100, out_features=128, bias=True)
      (1): PReLU(num_parameters=1)
    )
    (policy_net): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): PReLU(num_parameters=1)
    )
    (value_net): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): PReLU(num_parameters=1)
    )
  )
  (action_net): Linear(in_features=32, out_features=2, bias=True)
  (value_net): Linear(in_features=32, out_features=1, bias=True)
)
Training results are saved in ./logs/PPO_4


# 5. Training

In [10]:
# Make Callback
eval_callback = CustomEvalCallback(vec_env_eval, best_model_save_path=log_path, eval_freq=30000 // n_cpus, n_eval_episodes=1)
callback = CallbackList([eval_callback])

# Training
start_time = time.time()
model = model.learn(n_timesteps, callback=callback)
total_time = time.time() - start_time
print(f"Took {total_time:.2f}s, {n_timesteps / total_time:.2f} FPS")

# Save
model.save(os.path.join(log_path, "final_model"))
with open(os.path.join(log_path, 'scaler.pickle'), 'rb') as f:
    pickle.dump(scaler, f)
del model, vec_env_train, vec_env_eval

Eval num_timesteps=30000, episode_reward=-11.99
New best mean reward!
Eval num_timesteps=60000, episode_reward=2.94
New best mean reward!
Eval num_timesteps=90000, episode_reward=3.26
New best mean reward!
Eval num_timesteps=120000, episode_reward=20.39
New best mean reward!
Eval num_timesteps=150000, episode_reward=22.82
New best mean reward!
Eval num_timesteps=180000, episode_reward=19.89
Eval num_timesteps=210000, episode_reward=19.02
Eval num_timesteps=240000, episode_reward=22.20
Eval num_timesteps=270000, episode_reward=29.57
New best mean reward!
Eval num_timesteps=300000, episode_reward=24.29
Took 516.48s, 580.86 FPS


# 6. Backtest

In [11]:
# Prepare Environment for backtest
single_env_train = env_class(data_train, features_train)
single_env_eval = env_class(data_test, features_test)

In [12]:
# Backtest final model
final_model = algo.load(os.path.join(log_path, "final_model"), device="cpu")

stats = pd.DataFrame()
stats["train"] = backtest(final_model, single_env_train, plot=True)
stats["test"] = backtest(final_model, single_env_eval, plot=True)
stats

Unnamed: 0,train,test
Start,2014-10-20 00:00:00,2020-05-11 00:00:00
End,2020-04-06 00:00:00,2021-08-31 00:00:00
Duration,1995 days 00:00:00,477 days 00:00:00
Exposure Time [%],98.947896,95.578947
Equity Final [$],22174582114045.238281,616959.791686
Equity Peak [$],22424038794881.609375,1496825.985576
Return [%],2217458111.404524,-38.304021
Buy & Hold Return [%],1799.406085,447.716064
Return (Ann.) [%],2104.475259,-31.003199
Volatility (Ann.) [%],1619.315827,48.03318


In [13]:
# Backtest best model
best_model = algo.load(os.path.join(log_path, "best_model"), device='cpu')

stats = pd.DataFrame()
stats["train"] = backtest(best_model, single_env_train, plot=True)
stats["test"] = backtest(best_model, single_env_eval, plot=True)
stats.to_csv(os.path.join(log_path, "backtest_stats.csv"))
stats

Unnamed: 0,train,test
Start,2014-10-20 00:00:00,2020-05-11 00:00:00
End,2020-04-06 00:00:00,2021-08-31 00:00:00
Duration,1995 days 00:00:00,477 days 00:00:00
Exposure Time [%],98.947896,95.578947
Equity Final [$],26668794687337.882812,1386085.055199
Equity Peak [$],26968809766403.789062,2196270.801885
Return [%],2666879368.733788,38.608506
Buy & Hold Return [%],1799.406085,447.716064
Return (Ann.) [%],2180.140147,28.515151
Volatility (Ann.) [%],1663.224457,95.470533


In [14]:
send_line_notification("Training | Finished")