In [1]:
import os
import pathlib
import glob
import random
import argparse
from numpy import isin
import pandas as pd
from datetime import datetime
from typing import Dict, List, Any, Optional

import ray
from ray import tune
from ray.tune import Callback
from ray.tune import ProgressReporter
from ray.tune import CLIReporter
from ray.tune.logger import UnifiedLogger
from ray.tune.schedulers.pb2 import PB2
from src.envs import BaseTradingEnv
from src.utils import DataLoader, Preprocessor, backtest
from src.utils.misc import get_agent_class
from src.trainable.cross_validation_v2 import ExperimentCV

In [2]:
args = {
    "ticker": "^N225",
    "algo": "A2C",
    "max_timesteps": 10000,
    "metric": "evaluation/episode_reward_mean",
    "mode": "max",
    "num_samples": 1,
    "criteria": "timesteps_total",
    "perturb": 0.25,
    "seed": 3407,
    "logdir": "./experiments",
}
args = argparse.Namespace(**args)
args

Namespace(algo='A2C', criteria='timesteps_total', logdir='./experiments', max_timesteps=10000, metric='evaluation/episode_reward_mean', mode='max', num_samples=1, perturb=0.25, seed=3407, ticker='^N225')

In [3]:
# ray.shutdown()
ray.init(log_to_driver=False, num_gpus=0, local_mode=True)

config = {
    "env": "BaseTradingEnv",
    "env_config": {},
    "evaluation_interval": 1,
    "evaluation_num_episodes": 1,
    "evaluation_config": {
        "env_config": {},
        "explore": False,
    },
    "num_workers": 2,
    "framework": "torch",
    "log_level": "WARN",
    "timesteps_per_iteration": 2500,
    "num_gpus": 0,
    "seed": args.seed,
    "_algo": args.algo,
    "_ticker": args.ticker,
    "_n_splits": 3,
    # "lambda": tune.sample_from(lambda spec: random.uniform(0.9, 1.0)),
    # "lr": tune.sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
}

# pb2 = PB2(
#     time_attr="timesteps_total",
#     metric="evaluation/episode_reward_mean",
#     mode="max",
#     perturbation_interval=2500,
#     quantile_fraction=0.25,  # copy bottom % with top %
#     # Specifies the hyperparam search space
#     hyperparam_bounds={
#         "lambda": [0.9, 1.0],
#         "lr": [1e-3, 1e-5],
#     },
# )

timelog = str(datetime.date(datetime.now())) + "_" + datetime.time(datetime.now()).strftime("%H-%M")

reporter = CLIReporter(
    {
        "episode_reward_mean": "episode_reward",
        "evaluation/episode_reward_mean": "eval/episode_reward",
        "timesteps_total": "steps",
        "episodes_total": "episodes",
    },
    max_report_frequency=20,
)

2021-11-15 10:00:59,835	INFO services.py:1252 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [4]:
analysis = tune.run(
    ExperimentCV,
    name=f"{args.algo}_{timelog}",
    num_samples=args.num_samples,
    metric=args.metric,
    mode=args.mode,
    stop={"timesteps_total": args.max_timesteps},
    config=config,
    progress_reporter=reporter,
    local_dir=args.logdir,
    resources_per_trial=tune.PlacementGroupFactory([{"CPU": 4}, {"CPU": 4}]),
    # scheduler=pb2,
    # reuse_actors=True,
    # verbose=2,
)



:task_name:bundle_reservation_check_func
:actor_name:ExperimentCV
[*********************100%***********************]  1 of 1 completed


2021-11-15 10:01:11,325	INFO trainer.py:760 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


:actor_name:RolloutWorker
:actor_name:RolloutWorker


2021-11-15 10:01:11,725	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_0\checkpoint_000000\checkpoint-0
2021-11-15 10:01:11,726	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 0, '_timesteps_total': None, '_time_total': 0.0, '_episodes_total': None}


['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\']
['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_0\\checkpoint_000000']
c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_0\checkpoint_000000\checkpoint-0
agent logdir:  c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\


2021-11-15 10:01:16,780	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_1\checkpoint_000000\checkpoint-0
2021-11-15 10:01:16,781	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 0, '_timesteps_total': None, '_time_total': 0.0, '_episodes_total': None}


['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_1\\checkpoint_000000']
c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_1\checkpoint_000000\checkpoint-0
agent logdir:  c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\


2021-11-15 10:01:23,885	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\checkpoint_000000\checkpoint-0
2021-11-15 10:01:23,886	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 0, '_timesteps_total': None, '_time_total': 0.0, '_episodes_total': None}


['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_2\\checkpoint_000000']
c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\checkpoint_000000\checkpoint-0
agent logdir:  c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\


2021-11-15 10:01:34,295	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_0\checkpoint_000001\checkpoint-1
2021-11-15 10:01:34,295	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': None, '_time_total': 4.988656997680664, '_episodes_total': 2}


== Status ==
Memory usage on this node: 10.5/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/13.66 GiB heap, 0.0/6.83 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01
Number of trials: 1/1 (1 RUNNING)
+-----------------------------------------+----------+-------+
| Trial name                              | status   | loc   |
|-----------------------------------------+----------+-------|
| ExperimentCV_BaseTradingEnv_84a04_00000 | RUNNING  |       |
+-----------------------------------------+----------+-------+


Result for ExperimentCV_BaseTradingEnv_84a04_00000:
  agent_timesteps_total: 5200.0
  custom_metrics: {}
  date: 2021-11-15_10-01-34
  done: false
  episode_len_mean: 1218.0
  episode_media: {}
  episode_reward_max: -0.16228584394204995
  episode_reward_mean: -0.37043221600976156
  episode_reward_min: -0.4715229780280037
  episodes_this_iter: 4.0
  episodes_total: 4.0
  evaluation:
    

2021-11-15 10:01:44,709	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_1\checkpoint_000001\checkpoint-1
2021-11-15 10:01:44,709	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': None, '_time_total': 7.054163217544556, '_episodes_total': 4}


['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_1\\checkpoint_000000', 'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_1\\checkpoint_000001']
c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_1\checkpoint_000001\checkpoint-1
agent logdir:  c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\


2021-11-15 10:01:54,856	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\checkpoint_000001\checkpoint-1
2021-11-15 10:01:54,857	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': None, '_time_total': 10.304469347000122, '_episodes_total': 6}


['c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_2\\checkpoint_000000', 'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\A2C_2021-11-15_10-01\\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\\splits_2\\checkpoint_000001']
c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\checkpoint_000001\checkpoint-1
agent logdir:  c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01\ExperimentCV_BaseTradingEnv_84a04_00000_0_2021-11-15_10-01-09\splits_2\


2021-11-15 10:02:04,597	INFO tune.py:617 -- Total run time: 55.77 seconds (55.61 seconds for the tuning loop).


== Status ==
Memory usage on this node: 10.3/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/13.66 GiB heap, 0.0/6.83 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\A2C_2021-11-15_10-01
Number of trials: 1/1 (1 RUNNING)
+-----------------------------------------+----------+----------------------+------------------+-----------------------+---------+------------+
| Trial name                              | status   | loc                  |   episode_reward |   eval/episode_reward |   steps |   episodes |
|-----------------------------------------+----------+----------------------+------------------+-----------------------+---------+------------|
| ExperimentCV_BaseTradingEnv_84a04_00000 | RUNNING  | 172.22.130.108:14740 |        -0.370432 |            0.00213514 |    5200 |          4 |
+-----------------------------------------+----------+----------------------+------------------+-----------------------+---------+

In [5]:
analysis.results_df

Unnamed: 0_level_0,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,episodes_this_iter,num_healthy_workers,timesteps_total,agent_timesteps_total,done,episodes_total,...,evaluation.sampler_perf.mean_raw_obs_processing_ms,evaluation.sampler_perf.mean_inference_ms,evaluation.sampler_perf.mean_action_processing_ms,evaluation.sampler_perf.mean_env_wait_ms,evaluation.sampler_perf.mean_env_render_ms,config.evaluation_config.explore,info.learner.default_policy.learner_stats.allreduce_latency,info.learner.default_policy.learner_stats.policy_entropy,info.learner.default_policy.learner_stats.policy_loss,info.learner.default_policy.learner_stats.vf_loss
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
84a04_00000,0.137811,-0.554593,-0.268113,1218.0,6.666667,2.0,13000.0,13000.0,True,10.666667,...,0.10989,3.301562,0.031104,0.379002,0.0,False,0.0,219.655467,-0.267977,0.014139


In [6]:
trials = analysis.trials
trials

[ExperimentCV_BaseTradingEnv_84a04_00000]

In [7]:
trainer = Trainer(config=analysis.best_config)

NameError: name 'Trainer' is not defined

In [None]:
trainer.restore(analysis.best_checkpoint)

2021-11-11 15:24:30,210	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\PPO_2021-11-11_15-08\Trainer_BaseTradingEnv_cba21_00000_0_2021-11-11_15-08-31\splits_0\checkpoint_000025\checkpoint-25
2021-11-11 15:24:30,210	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 25, '_timesteps_total': None, '_time_total': 191.8985846042633, '_episodes_total': 82}
2021-11-11 15:24:30,222	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\PPO_2021-11-11_15-08\Trainer_BaseTradingEnv_cba21_00000_0_2021-11-11_15-08-31\splits_1\checkpoint_000025\checkpoint-25
2021-11-11 15:24:30,223	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 25, '_timesteps_total': None, '_time_total': 185.76498436927795, '_episodes_total': 82}
2021-11-11 15:24:30,235	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xia

In [None]:
checkpoint = pathlib.Path(analysis.best_checkpoint)
checkpoint.parent

WindowsPath('c:/Users/xiang-lab/Documents/DRL-Trading/experiments/PPO_2021-11-11_15-08/Trainer_BaseTradingEnv_cba21_00000_0_2021-11-11_15-08-31')

In [None]:
for i, agent in enumerate(trainer.agents):
    env_train = agent.workers.local_worker().env
    env_eval = agent.evaluation_workers.local_worker().env
    backtest(env_train, agent, save_dir=os.path.join(checkpoint.parent, f"splits_{i}", "last-stats-train"), plot=False)
    backtest(env_eval, agent, save_dir=os.path.join(checkpoint.parent, f"splits_{i}", "last-stats-eval"), plot=True)

