In [1]:
import os
import pathlib
import glob
import random
import argparse
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from typing import Dict, List, Any, Optional

import ray
from ray import tune
from ray.tune.suggest.repeater import Repeater
from ray.tune.suggest.bayesopt import BayesOptSearch
from ray.tune.suggest.suggestion import Searcher
from ray.tune import CLIReporter
from src.envs import BaseTradingEnv
from src.utils import backtest
from src.utils.misc import prepare_config_for_agent
from src.envs.base_env import LNS

from src.trainable.cross_validation_repeater import ExperimentCV

In [2]:
args = {
    "ticker": "^N225",
    "algo": "DQN",
    "max_timesteps": 15000,
    "metric": "evaluation/episode_reward_mean",
    "mode": "max",
    "num_samples": 1,
    "criteria": "timesteps_total",
    "perturb": 0.25,
    "seed": 340,
    "local_dir": "./experiments",
}
args = argparse.Namespace(**args)
args

Namespace(algo='DQN', criteria='timesteps_total', local_dir='./experiments', max_timesteps=15000, metric='evaluation/episode_reward_mean', mode='max', num_samples=1, perturb=0.25, seed=340, ticker='^N225')

In [3]:
ray.init(log_to_driver=False, num_gpus=0, local_mode=False)

config = {
    "env": "BaseTradingEnv",
    "env_config": {},
    "evaluation_interval": 1,
    "evaluation_num_episodes": 1,
    "evaluation_config": {
        "env_config": {},
        "explore": False,
    },
    "num_workers": 4,
    "framework": "torch",
    "log_level": "WARN",
    "timesteps_per_iteration": 5000,
    "num_gpus": 0,
    "seed": args.seed,
    "_algo": args.algo,
    "_ticker": args.ticker,
    "_train_start": "2010-01-01",
    "_train_years": 1,
    "_eval_years": 1,
    # "lambda": tune.sample_from(lambda spec: random.uniform(0.9, 1.0)),
    # "lr": tune.sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
}
timelog = str(datetime.date(datetime.now())) + "_" + datetime.time(datetime.now()).strftime("%H-%M")

reporter = CLIReporter(
    {
        "episode_reward_mean": "episode_reward",
        "evaluation/episode_reward_mean": "eval/episode_reward",
        "timesteps_total": "steps",
        "episodes_total": "episodes",
    },
    max_report_frequency=60,
)

re_searcher = Repeater(BayesOptSearch(), repeat=2)
# re_searcher = Repeater(Searcher, repeat=3)

analysis = tune.run(
    ExperimentCV,
    name=f"{args.algo}_{timelog}",
    num_samples=2,
    metric=args.metric,
    mode=args.mode,
    stop={"timesteps_total": args.max_timesteps},
    config=config,
    progress_reporter=reporter,
    checkpoint_freq=1,
    local_dir=args.local_dir,
    trial_dirname_creator=lambda trial: str(trial).split("__")[0],
    resources_per_trial=tune.PlacementGroupFactory([{"CPU": 4}, {"CPU": 4}]),
    # resources_per_trial=tune.PlacementGroupFactory([{"CPU": 8}]),
    search_alg=re_searcher,
    verbose=1,
)


2021-11-17 22:17:02,330	INFO services.py:1252 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-11-17 22:17:11,435	ERROR syncer.py:73 -- Log sync requires rsync to be installed.


== Status ==
Memory usage on this node: 9.8/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/13.63 GiB heap, 0.0/6.81 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-17_22-17
Number of trials: 1/2 (1 PENDING)
+--------------------------------------+----------+-------+-------------------+
| Trial name                           | status   | loc   |   __trial_index__ |
|--------------------------------------+----------+-------+-------------------|
| ExperimentCV_BaseTradingEnv_ac511498 | PENDING  |       |                 0 |
+--------------------------------------+----------+-------+-------------------+


== Status ==
Memory usage on this node: 11.2/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/13.63 GiB heap, 0.0/6.81 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-17_22-17
Number of trials: 2/2 (1 RUNNING, 1 TERMINATED)

2021-11-17 22:18:19,112	INFO tune.py:617 -- Total run time: 67.88 seconds (67.72 seconds for the tuning loop).


== Status ==
Memory usage on this node: 11.4/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/13.63 GiB heap, 0.0/6.81 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-17_22-17
Number of trials: 2/2 (2 TERMINATED)
+--------------------------------------+------------+-------+-------------------+------------------+-----------------------+---------+------------+
| Trial name                           | status     | loc   |   __trial_index__ |   episode_reward |   eval/episode_reward |   steps |   episodes |
|--------------------------------------+------------+-------+-------------------+------------------+-----------------------+---------+------------|
| ExperimentCV_BaseTradingEnv_ac511498 | TERMINATED |       |                 0 |       0.00833306 |            -0.0685113 |   15024 |         60 |
| ExperimentCV_BaseTradingEnv_ac6a411e | TERMINATED |       |                 1 |       0.0338586  |             

In [4]:
all_config = analysis.get_all_configs()
all_config

{'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-17_22-17\\ExperimentCV_BaseTradingEnv_ac511498': {'__trial_index__': 0,
  '_algo': 'DQN',
  '_eval_years': 1,
  '_ticker': '^N225',
  '_train_start': '2010-01-01',
  '_train_years': 1,
  'env': 'BaseTradingEnv',
  'env_config': {},
  'evaluation_config': {'env_config': {}, 'explore': False},
  'evaluation_interval': 1,
  'evaluation_num_episodes': 1,
  'framework': 'torch',
  'log_level': 'WARN',
  'num_gpus': 0,
  'num_workers': 4,
  'seed': 340,
  'timesteps_per_iteration': 5000},
 'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-17_22-17\\ExperimentCV_BaseTradingEnv_ac6a411e': {'__trial_index__': 1,
  '_algo': 'DQN',
  '_eval_years': 1,
  '_ticker': '^N225',
  '_train_start': '2010-01-01',
  '_train_years': 1,
  'env': 'BaseTradingEnv',
  'env_config': {},
  'evaluation_config': {'env_config': {}, 'explore': False},
  'evaluation_interval': 1,
  'evaluation_num_episodes': 1,
  'framework'

In [5]:
checkpoint = analysis.best_checkpoint
checkpoint

'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-17_22-17\\ExperimentCV_BaseTradingEnv_ac6a411e\\checkpoint_000001\\checkpoint-1'

In [6]:
# Backtest
all_config = analysis.get_all_configs()
agent = None
for trial in analysis.trials:
    config = all_config[trial.logdir].copy()
    agent_class, algo_config = prepare_config_for_agent(config, pathlib.Path(trial.logdir).parent)

    algo_config["num_workers"] = 1
    algo_config["logger_config"] = {"type": ray.tune.logger.NoopLogger}
    # index = config.pop("__trial_index__")

    if agent is None:
        agent = agent_class(config=algo_config)
    else:
        agent.reset(algo_config)

    checkpoint = analysis.get_best_checkpoint(trial)
    agent.restore(checkpoint)

    # env_train = agent.workers.local_worker().env
    env_train = BaseTradingEnv(**algo_config["env_config"])
    env_eval = BaseTradingEnv(**algo_config["evaluation_config"]["env_config"])
    print(backtest(env_train, agent, save_dir=os.path.join(trial.logdir, "last-stats-train"), plot=False))
    print(backtest(env_eval, agent, save_dir=os.path.join(trial.logdir, "best-stats-eval"), plot=True))
    backtest(env_eval, agent="Buy&Hold", save_dir=os.path.join(trial.logdir, "buy-and-hold"), plot=False)

2021-11-17 22:18:19,532	INFO dqn.py:142 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2021-11-17 22:18:19,532	INFO trainer.py:760 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2021-11-17 22:18:27,962	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-17_22-17\ExperimentCV_BaseTradingEnv_ac511498\checkpoint_000002\checkpoint-2
2021-11-17 22:18:27,963	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 2, '_timesteps_total': None, '_time_total': 14.638916730880737, '_episodes_total': 40}


Start                     2010-01-04 00:00:00
End                       2010-12-30 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   67.078189
Equity Final [$]                112288.390445
Equity Peak [$]                 112703.671695
Return [%]                           12.28839
Buy & Hold Return [%]               -3.996983
Return (Ann.) [%]                   12.771436
Volatility (Ann.) [%]               11.313421
Sharpe Ratio                         1.123254
Sortino Ratio                        1.793014
Calmar Ratio                         1.961608
Max. Drawdown [%]                   -6.510699
Avg. Drawdown [%]                   -1.620386
Max. Drawdown Duration      109 days 00:00:00
Avg. Drawdown Duration       21 days 00:00:00
# Trades                                   66
Win Rate [%]                        53.030303
Best Trade [%]                       3.813424
Worst Trade [%]                     -6.125002
Avg. Trade [%]                    

2021-11-17 22:18:29,329	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-17_22-17\ExperimentCV_BaseTradingEnv_ac6a411e\checkpoint_000001\checkpoint-1
2021-11-17 22:18:29,330	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': None, '_time_total': 10.714633464813232, '_episodes_total': 20}


Start                     2011-01-04 00:00:00
End                       2011-12-30 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   73.061224
Equity Final [$]                 91946.803133
Equity Peak [$]                 113590.603615
Return [%]                          -8.053197
Buy & Hold Return [%]              -18.683703
Return (Ann.) [%]                     -8.2735
Volatility (Ann.) [%]               13.738851
Sharpe Ratio                        -0.562769
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -21.466079
Avg. Drawdown [%]                   -3.626745
Max. Drawdown Duration      219 days 00:00:00
Avg. Drawdown Duration       33 days 00:00:00
# Trades                                   54
Win Rate [%]                        44.444444
Best Trade [%]                       9.702088
Worst Trade [%]                     -5.908458
Avg. Trade [%]                    

In [7]:
ray.shutdown()