In [1]:
import argparse
from pathlib import Path

import numpy as np
import pandas as pd
import ray
from ray.rllib.agents import a3c, ddpg, dqn, impala, ppo
from ray.rllib.models.torch.recurrent_net import RecurrentNetwork
from ray.rllib.models.torch.visionnet import VisionNetwork
from ray.tune.registry import register_env
from ray.tune.stopper import MaximumIterationStopper
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from ta.volatility import average_true_range

from rl_bot.data_loader import DataLoader
from rl_bot.envs import create_env
from rl_bot.preprocessor import Preprocessor
from rl_bot.simulate import simulate
from rl_bot.train import train
from rl_bot.util import get_agent_class
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
DATA_PATH = Path("/home/napnel/ml-bot/data/BTCUSDT/").resolve()
TMP_PATH = Path("/home/napnel/ml-bot/tmp/").resolve()
EXPERIENCE_PATH = Path("/home/napnel/ml-bot/experience/").resolve()
DATA_PATH.mkdir(exist_ok=True)
TMP_PATH.mkdir(exist_ok=True)
EXPERIENCE_PATH.mkdir(exist_ok=True)

In [3]:
CPT = "/home/napnel/ml-bot/ray_results/A3C_2022-02-02_20-33-02/A3C_TradingEnv_e17c0_00000_0_2022-02-02_20-33-02/checkpoint_000010/checkpoint-10"
ALGO = "A3C"

In [4]:
agent_class, agent_config = get_agent_class(ALGO)

In [20]:
agent_config = {
    "grad_clip": 40.0,
}
model_config = {
    "fcnet_hiddens": [1024, 256],
    "free_log_std": True,
}
env_config_train = {
    "df_path": str(TMP_PATH / "df_train.pkl"),
    "window_size": 25,
    "fee": 0,
}
env_config_eval = {
    "df_path": str(TMP_PATH / "df_test.pkl"),
    "window_size": 25,
    "fee": 0,
}
config = {
    "env": "TradingEnv",
    "env_config": env_config_train,
    "log_level": "WARN",
    "framework": "torch",
    "num_workers": 4,
    "num_gpus": 0,
    # "observation_filter": "MeanStdFilter",
    "model": model_config,
    "evaluation_config": {
        "env_config": env_config_eval,
        "explore": False,
    },
    "evaluation_interval": 1,
    "evaluation_num_episodes": 1,
    "timesteps_per_iteration": 10000,
    # "batch_mode": "complete_episodes",
    "seed": 3407,
    "metrics_smoothing_episodes": 50,
    "output": str(EXPERIENCE_PATH),
}

In [21]:
config["env_config"] = config["evaluation_config"]["env_config"].copy()
config.pop("evaluation_config")
config["num_workers"] = 1
config["logger_config"] = {"type": ray.tune.logger.NoopLogger}
env = create_env(config["env_config"])
agent = agent_class(config=config)
agent.restore(CPT)

actions = []
rewards = []
done = False
obs = env.reset()
while not done:
    action = agent.compute_single_action(obs, explore=False, clip_action=True)
    # action = agent.compute_action(obs)
    obs, reward, done, info = env.step(action)
    actions.append(action)
    rewards.append(reward)

actions = np.array(actions)
rewards = np.array(rewards)

2022-02-02 20:46:43,529	INFO trainable.py:467 -- Restored on 172.21.128.176 from checkpoint: /home/napnel/ml-bot/ray_results/A3C_2022-02-02_20-33-02/A3C_TradingEnv_e17c0_00000_0_2022-02-02_20-33-02/checkpoint_000010/checkpoint-10
2022-02-02 20:46:43,531	INFO trainable.py:475 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': 0, '_time_total': 101.62753534317017, '_episodes_total': 12}


3.508984327316284


In [25]:
actions[actions < 0].sum()

0.0

In [None]:
simulate(agent_class, config, CPT, mode="eval")