In [None]:
import os
import pandas as pd
import ray
from ray import tune
from ray.rllib.models import ModelCatalog

from src.envs.reward_func import equity_log_return_reward
from src.utils.data_loader import DataLoader
from src.utils.preprocessor import Preprocessor
from src.utils.trainer import Trainer
from src.utils.backtest import backtest

In [None]:
tickers = ["ES=F", "^GSPC", "^N225"]
data = DataLoader.fetch_data("^N225", interval="1d", start="2009-01-01", end="2021-09-30")
# data = DataLoader.load_data("./data/LTCUSD.csv")
data_len = len(data)
# _data_train = data.iloc[: int(data_len * 0.8), :]
_data_train = data.loc[:"2019-12-31", :]
# _data_eval = data.iloc[int(data_len * 0.8) :, :]
_data_eval = data.loc["2020-01-01":, :]
print(f"Training Sapn: {_data_train.index[0]} to {_data_train.index[-1]}, Length: {len(_data_train)}")
print(f"Evaluating Span: {_data_eval.index[0]} to {_data_eval.index[-1]}, Length: {len(_data_eval)}")

In [None]:
data_train, features_train = Preprocessor.preprocessing(_data_train)
data_eval, features_eval = Preprocessor.preprocessing(_data_eval)

In [None]:
# data_train, features_train, data_eval, features_eval = Preprocessor.preprocessing(_data_train, _data_eval, use_tech_indicators=False)
# features_train

In [None]:
window_size = 20
fee = 0.001
reward_func = equity_log_return_reward
user_config = {
    "env": "DescTradingEnv",
    "env_config": {
        "df": data_train,
        "features": features_train,
        "reward_func": reward_func,
        "window_size": window_size,
    },

    "evaluation_num_workers": 4,
    "evaluation_interval": 1,
    "evaluation_num_episodes": 10,
    "evaluation_config": {
        "env_config": {
            "df": data_eval,
            "features": features_eval,
            "reward_func": reward_func,
            "window_size": window_size,
        },
        # "explore": False,
        "explore": True,
    },

    # "model": {
    #     # By default, the MODEL_DEFAULTS dict above will be used.

    #     # Change individual keys in that dict by overriding them, e.g.
    #     "fcnet_hiddens": [64, 16],
    #     "custom_model": "bn_model",
    #     # "vf_share_layers": True,
    #     # "use_attention": True
    # },

    "num_workers": 4,  # parallelism
    "framework": "torch",
    "log_level": "WARN",  # "WARN", "DEBUG"
    "seed": 3407,
    "batch_mode": "complete_episodes",
    # "observation_filter": "MeanStdFilter",

    # "n_step": 5,
    # "noisy": True
}

In [None]:
agent_class, config = Trainer.get_agent_class("PPO")
config.update(user_config)
agent = agent_class(config=config)

In [None]:
agent, last_checkpoint = Trainer.learn(agent, timesteps_total=5e5, checkpoint_freq=10)
# agent.restore("./ray_results/PPO\checkpoint_000071\checkpoint-71")

In [None]:
env_train = Trainer.get_env(config["env"], config["env_config"])
env_eval = Trainer.get_env(config["env"], config["evaluation_config"]["env_config"])

In [None]:
stats = backtest(_data_eval, env_eval, agent, plot=True, plot_filename="PPO_backtest")
stats

In [None]:
# stats = pd.DataFrame()
# stats["train"] = backtest(_data_train, env_train, agent, plot=True, plot_filename="PPO_train_backtest")
# stats["eval"] = backtest(_data_eval, env_eval, agent, plot=True, plot_filename="PPO_eval_backtest")
# stats

In [None]:
# from pprint import pprint
# observation = env_train.reset()
# policy_id = "default_policy"
# policy = agent.get_policy(policy_id)
# local_worker = agent.workers.local_worker()
# pp = local_worker.preprocessors[policy_id]
# done = False
# while not done:
#     obs, reward, done, _ = env_train.step(0)
#     filtered_observation = local_worker.filters[policy_id](observation)
#     print(local_worker.get_filters())
# pprint(observation)
# pprint(filtered_observation)

In [None]:
from utils.utils import send_line_notification
send_line_notification('Lab | Training Finished')

In [None]:
agent.get_policy().model