In [1]:
import os
import time
import pickle

import pandas as pd
from sklearn.preprocessing import StandardScaler
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import configure_logger
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3 import PPO, A2C

from envs.trading_env import TradingEnv, TradingEnv_v2
from utils.data_loader import DataLoader
from utils.preprocessor import Preprocessor
from utils.backtest import backtest
from utils.callbacks import CustomEvalCallback, CustomCheckpointCallback
from utils.utils import set_random_seed, send_line_notification
from utils.config import POLICY_KWARGS, A2C_HYPERPARAMETER


# Config

In [2]:
algo = PPO
n_cpus = 8
verbose = 0
n_timesteps = 3e5

In [3]:
set_random_seed()

# 1. Prepare Data

In [4]:
data = DataLoader.fetch_data("BTC-USD", interval="1d")
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200
2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700
2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600
2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100
...,...,...,...,...,...
2021-08-30,48834.851562,48925.605469,46950.273438,47054.984375,31847007016
2021-08-31,47024.339844,48189.550781,46750.093750,47166.687500,34730363427
2021-09-01,47099.773438,49111.089844,46562.437500,48847.027344,39139399125
2021-09-02,48807.847656,50343.421875,48652.320312,49327.722656,39508070319


# 2. Preprocessing

In [5]:
# Split data
data_len = len(data)
data_train = data.iloc[: int(data_len * 0.8), :]
data_test = data.iloc[int(data_len * 0.8) :, :]
print(f"Train Sapn: {data_train.index[0]} to {data_train.index[-1]}")
print(f"Test: Span {data_test.index[0]} to {data_test.index[-1]}")

Train Sapn: 2014-09-17 00:00:00 to 2020-04-09 00:00:00
Test: Span 2020-04-10 00:00:00 to 2021-09-03 00:00:00


In [6]:
features_train = Preprocessor.extract_features(data_train)
features_test = Preprocessor.extract_features(data_test)
data_train, features_train = Preprocessor.align_date(data_train, features_train)
data_test, features_test = Preprocessor.align_date(data_test, features_test)
features_train

Unnamed: 0_level_0,DMI_Diff,MACD,STC,RSI,StochRSI,MFI,CMF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-11-27,11.149497,-0.855557,24.373770,50.149369,0.015584,52.859564,-0.121805
2014-11-28,6.539378,-0.354460,46.507056,52.802277,0.106584,61.658212,-0.103362
2014-11-29,7.938696,-0.099079,68.204734,52.378818,0.159107,66.758370,-0.168129
2014-11-30,7.655292,0.213258,84.102367,53.453635,0.149006,63.438094,-0.180264
2014-12-01,7.898296,0.457770,92.051184,53.977936,0.102972,59.287548,-0.189688
...,...,...,...,...,...,...,...
2020-04-05,-3.745142,163.999144,99.993896,50.573894,-0.020104,62.139303,0.110001
2020-04-06,2.855376,183.388838,99.996948,57.146334,0.002539,62.615990,0.155062
2020-04-07,5.314330,182.239223,99.998474,55.567481,-0.012826,62.618116,0.100709
2020-04-08,5.127950,184.009981,99.999237,57.650885,0.010543,62.347072,0.091503


In [7]:
scaler = StandardScaler()
features_train = pd.DataFrame(scaler.fit_transform(features_train), index=data_train.index)
features_test = pd.DataFrame(scaler.transform(features_test), index=data_test.index)

# 3. Define Environment

In [8]:
env_class = TradingEnv
vec_env_cls = DummyVecEnv  # or SubprocVecEnv
vec_env_train = make_vec_env(env_class, n_envs=n_cpus, env_kwargs={"df": data_train, "features": features_train}, vec_env_cls=vec_env_cls)
vec_env_eval = make_vec_env(env_class, n_envs=1, env_kwargs={"df": data_test, "features": features_test}, vec_env_cls=vec_env_cls)

# 4. Define Agnet

In [9]:
algo_name = algo.__name__
model: BaseAlgorithm = algo("MlpPolicy", vec_env_train, policy_kwargs=POLICY_KWARGS, device="cpu")
print(model.policy)

# Set Logger
logger = configure_logger(verbose, tensorboard_log="./logs/", tb_log_name=algo_name)
model.set_logger(logger)
log_path = model.logger.dir
print(f"Training results are saved in {log_path}")

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential(
      (0): Linear(in_features=140, out_features=32, bias=True)
      (1): PReLU(num_parameters=1)
    )
    (policy_net): Sequential(
      (0): Linear(in_features=32, out_features=16, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=16, out_features=8, bias=True)
      (3): PReLU(num_parameters=1)
    )
    (value_net): Sequential(
      (0): Linear(in_features=32, out_features=16, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=16, out_features=8, bias=True)
      (3): PReLU(num_parameters=1)
    )
  )
  (action_net): Linear(in_features=8, out_features=2, bias=True)
  (value_net): Linear(in_features=8, out_features=1, bias=True)
)


  return f(*args, **kwds)


Training results are saved in ./logs/PPO_7


# 5. Training

In [10]:
# Make Callback
eval_callback = CustomEvalCallback(vec_env_eval, best_model_save_path=log_path, eval_freq=50000 // n_cpus, n_eval_episodes=1)
callback = CallbackList([eval_callback])

# Training
start_time = time.time()
model = model.learn(n_timesteps, callback=callback)
total_time = time.time() - start_time
print(f"Took {total_time:.2f}s, {n_timesteps / total_time:.2f} FPS")

# Save
model.save(os.path.join(log_path, "final_model"))
with open(os.path.join(log_path, 'scaler.pickle'), 'wb') as f:
    pickle.dump(scaler, f)
del model, vec_env_train, vec_env_eval

Eval num_timesteps=30000, episode_reward=-1.29
New best mean reward!
Eval num_timesteps=60000, episode_reward=1.40
New best mean reward!
Eval num_timesteps=90000, episode_reward=4.90
New best mean reward!
Eval num_timesteps=120000, episode_reward=11.67
New best mean reward!
Eval num_timesteps=150000, episode_reward=22.53
New best mean reward!
Eval num_timesteps=180000, episode_reward=22.77
New best mean reward!
Eval num_timesteps=210000, episode_reward=29.21
New best mean reward!
Eval num_timesteps=240000, episode_reward=30.76
New best mean reward!
Eval num_timesteps=270000, episode_reward=31.15
New best mean reward!
Eval num_timesteps=300000, episode_reward=33.10
New best mean reward!
Took 499.07s, 601.11 FPS


FileNotFoundError: [Errno 2] No such file or directory: './logs/PPO_7\\scaler.pickle'

# 6. Backtest

In [11]:
# Prepare Environment for backtest
single_env_train = env_class(data_train, features_train)
single_env_eval = env_class(data_test, features_test)

In [12]:
# Backtest final model
final_model = algo.load(os.path.join(log_path, "final_model"), device="cpu")

stats = pd.DataFrame()
stats["train"] = backtest(final_model, single_env_train, plot=True)
stats["test"] = backtest(final_model, single_env_eval, plot=True)
stats

Unnamed: 0,train,test
Start,2014-11-27 00:00:00,2020-07-08 00:00:00
End,2020-04-09 00:00:00,2021-09-03 00:00:00
Duration,1960 days 00:00:00,422 days 00:00:00
Exposure Time [%],98.878123,94.761905
Equity Final [$],156960.624098,111658.83016
Equity Peak [$],157582.222562,123351.083598
Return [%],56.960624,11.65883
Buy & Hold Return [%],1875.299345,427.202074
Return (Ann.) [%],8.753297,10.057935
Volatility (Ann.) [%],4.554263,28.032581


In [13]:
# Backtest best model
best_model = algo.load(os.path.join(log_path, "best_model"), device='cpu')

stats = pd.DataFrame()
stats["train"] = backtest(best_model, single_env_train, plot=True)
stats["test"] = backtest(best_model, single_env_eval, plot=True)
stats.to_csv(os.path.join(log_path, "backtest_stats.csv"))
stats

Unnamed: 0,train,test
Start,2014-11-27 00:00:00,2020-07-08 00:00:00
End,2020-04-09 00:00:00,2021-09-03 00:00:00
Duration,1960 days 00:00:00,422 days 00:00:00
Exposure Time [%],98.878123,94.761905
Equity Final [$],152197.366512,129364.73784
Equity Peak [$],153980.772905,134424.451641
Return [%],52.197367,29.364738
Buy & Hold Return [%],1875.299345,427.202074
Return (Ann.) [%],8.131283,25.075818
Volatility (Ann.) [%],4.63119,29.766264


In [None]:
send_line_notification("Training | Finished")