In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from stable_baselines3 import PPO

from envs.trading_env import TradingEnv
from utils.data_loader import DataLoader
from utils.preprocessor import Preprocessor
from utils.backtest import backtest

In [2]:
data = DataLoader.fetch_data('BTC-USD')
features = Preprocessor.extract_features(data)
data, features = Preprocessor.align_date(data, features)

[*********************100%***********************]  1 of 1 completed


In [3]:
kf = KFold(n_splits=3, shuffle=False)
for fold, (index_train, index_valid) in enumerate(kf.split(features)):
    data_train = data.iloc[index_train, :]
    data_valid = data.iloc[index_valid, :]
    features_train = features.iloc[index_train, :]
    features_valid = features.iloc[index_valid, :]
    
    scaler = StandardScaler()
    features_train = pd.DataFrame(scaler.fit_transform(features_train), index=features_train.index)
    features_valid = pd.DataFrame(scaler.transform(features_valid), index=features_valid.index)

    env_train = TradingEnv(data_train, features_train)
    env_valid = TradingEnv(data_valid, features_valid)

    model_class = PPO
    model_name = PPO.__name__
    model = model_class('MlpPolicy', env_train, tensorboard_log='./logs')
    epoch = 1e4
    eval_freq = epoch / 10
    model.learn(total_timesteps=epoch, eval_env=env_valid, eval_freq=eval_freq, n_eval_episodes=1)
    
    model.save(f'./models/{model_name}_{fold}')
    with open(f'./models/env_valid_{fold}.pickle', 'wb') as f:
        pickle.dump(env_valid, f)

Eval num_timesteps=1000, episode_reward=-5.28 +/- 0.00
Episode length: 807.00 +/- 0.00
New best mean reward!
Eval num_timesteps=2000, episode_reward=-5.28 +/- 0.00
Episode length: 807.00 +/- 0.00
Eval num_timesteps=3000, episode_reward=15.38 +/- 0.00
Episode length: 807.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4000, episode_reward=15.38 +/- 0.00
Episode length: 807.00 +/- 0.00
Eval num_timesteps=5000, episode_reward=27.89 +/- 0.00
Episode length: 807.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6000, episode_reward=27.89 +/- 0.00
Episode length: 807.00 +/- 0.00
Eval num_timesteps=7000, episode_reward=40.93 +/- 0.00
Episode length: 807.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8000, episode_reward=40.93 +/- 0.00
Episode length: 807.00 +/- 0.00
Eval num_timesteps=9000, episode_reward=41.20 +/- 0.00
Episode length: 807.00 +/- 0.00
New best mean reward!
Eval num_timesteps=10000, episode_reward=41.20 +/- 0.00
Episode length: 807.00 +/- 0.00
Eval num_timesteps=

In [4]:
model = PPO.load('./models/PPO_0.zip')
with open('./models/env_valid_0', 'rb') as f:
    env_valid = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: './models/env_valid_0'

In [None]:
stats_valid = backtest(model, env_valid, plot=True, plot_filename=f"./results/BackTest-PPO-valid")
stats_valid