In [1]:
import os
import pathlib
import glob
import random
import argparse
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from typing import Dict, List, Any, Optional

import ray
from ray import tune
from ray.tune.suggest.repeater import Repeater
from ray.tune.suggest.bayesopt import BayesOptSearch
from ray.tune.suggest.suggestion import Searcher
from ray.tune import CLIReporter
from src.envs import TradingEnv
from src.utils import backtest
from src.envs.actions import BuySell, LongNeutralShort
from src.trainable.cross_validation import ExperimentCV
from src.trainable.util import prepare_config_for_agent

In [2]:
args = {
    "ticker": "^N225",
    "algo": "DQN",
    "max_timesteps": 15000,
    "metric": "evaluation/episode_reward_mean",
    "mode": "max",
    "repeat": 2,
    "num_samples": 1,
    "seed": 340,
    "local_dir": "./experiments",
}
args = argparse.Namespace(**args)
args

Namespace(algo='DQN', local_dir='./experiments', max_timesteps=15000, metric='evaluation/episode_reward_mean', mode='max', num_samples=1, repeat=2, seed=340, ticker='^N225')

In [3]:
config = {
    "env": "TradingEnv",
    "env_config": {},
    "evaluation_interval": 1,
    "evaluation_num_episodes": 1,
    "evaluation_config": {
        "env_config": {},
        "explore": False,
    },
    "num_workers": 4,
    "framework": "torch",
    "log_level": "WARN",
    "timesteps_per_iteration": 5000,
    "num_gpus": 0,
    "seed": args.seed,
    "_algo": args.algo,
    "_ticker": args.ticker,
    "_train_start": "2010-01-01",
    "_train_years": 1,
    "_eval_years": 1,
    "_actions": "BuySell",
    # "lambda": tune.sample_from(lambda spec: random.uniform(0.9, 1.0)),
    # "lr": tune.sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
}

In [4]:
ray.init(log_to_driver=False, num_gpus=0, local_mode=False)


timelog = str(datetime.date(datetime.now())) + "_" + datetime.time(datetime.now()).strftime("%H-%M")

reporter = CLIReporter(
    {
        "episode_reward_mean": "episode_reward",
        "evaluation/episode_reward_mean": "eval/episode_reward",
        "timesteps_total": "steps",
        "episodes_total": "episodes",
    },
    max_report_frequency=60,
)

re_searcher = Repeater(BayesOptSearch(), repeat=args.repeat)
# re_searcher = Repeater(Searcher, repeat=3)

analysis = tune.run(
    ExperimentCV,
    name=f"{args.algo}_{timelog}",
    num_samples=args.repeat * args.num_samples,
    metric=args.metric,
    mode=args.mode,
    stop={"timesteps_total": args.max_timesteps},
    config=config,
    progress_reporter=reporter,
    checkpoint_freq=1,
    local_dir=args.local_dir,
    trial_dirname_creator=lambda trial: str(trial).split("__")[0],
    resources_per_trial=tune.PlacementGroupFactory([{"CPU": 4}, {"CPU": 4}]),
    # resources_per_trial=tune.PlacementGroupFactory([{"CPU": 8}]),
    search_alg=re_searcher,
    verbose=1,
)


2021-11-18 14:30:39,437	INFO services.py:1252 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-11-18 14:30:48,533	ERROR syncer.py:73 -- Log sync requires rsync to be installed.


== Status ==
Memory usage on this node: 10.6/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/13.18 GiB heap, 0.0/6.59 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-18_14-30
Number of trials: 1/2 (1 PENDING)
+----------------------------------+----------+-------+-------------------+
| Trial name                       | status   | loc   |   __trial_index__ |
|----------------------------------+----------+-------+-------------------|
| ExperimentCV_TradingEnv_af9fad1c | PENDING  |       |                 0 |
+----------------------------------+----------+-------+-------------------+


== Status ==
Memory usage on this node: 11.9/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/13.18 GiB heap, 0.0/6.59 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-18_14-30
Number of trials: 2/2 (1 RUNNING, 1 TERMINATED)
+-----------------

2021-11-18 14:31:54,608	INFO tune.py:617 -- Total run time: 66.27 seconds (66.12 seconds for the tuning loop).


== Status ==
Memory usage on this node: 12.0/31.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/13.18 GiB heap, 0.0/6.59 GiB objects
Result logdir: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-18_14-30
Number of trials: 2/2 (2 TERMINATED)
+----------------------------------+------------+-------+-------------------+------------------+-----------------------+---------+------------+
| Trial name                       | status     | loc   |   __trial_index__ |   episode_reward |   eval/episode_reward |   steps |   episodes |
|----------------------------------+------------+-------+-------------------+------------------+-----------------------+---------+------------|
| ExperimentCV_TradingEnv_af9fad1c | TERMINATED |       |                 0 |        0.0536426 |             -0.258015 |   15024 |         60 |
| ExperimentCV_TradingEnv_afb7e150 | TERMINATED |       |                 1 |        0.0464222 |              0.216721 |   15024 

In [5]:
all_config = analysis.get_all_configs()
all_config

{'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-18_14-30\\ExperimentCV_TradingEnv_af9fad1c': {'__trial_index__': 0,
  '_actions': 'BuySell',
  '_algo': 'DQN',
  '_eval_years': 1,
  '_ticker': '^N225',
  '_train_start': '2010-01-01',
  '_train_years': 1,
  'env': 'TradingEnv',
  'env_config': {},
  'evaluation_config': {'env_config': {}, 'explore': False},
  'evaluation_interval': 1,
  'evaluation_num_episodes': 1,
  'framework': 'torch',
  'log_level': 'WARN',
  'num_gpus': 0,
  'num_workers': 4,
  'seed': 340,
  'timesteps_per_iteration': 5000},
 'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-18_14-30\\ExperimentCV_TradingEnv_afb7e150': {'__trial_index__': 1,
  '_actions': 'BuySell',
  '_algo': 'DQN',
  '_eval_years': 1,
  '_ticker': '^N225',
  '_train_start': '2010-01-01',
  '_train_years': 1,
  'env': 'TradingEnv',
  'env_config': {},
  'evaluation_config': {'env_config': {}, 'explore': False},
  'evaluation_interval': 1,
  'evaluati

In [6]:
checkpoint = analysis.best_checkpoint
checkpoint

'c:\\Users\\xiang-lab\\Documents\\DRL-Trading\\experiments\\DQN_2021-11-18_14-30\\ExperimentCV_TradingEnv_afb7e150\\checkpoint_000003\\checkpoint-3'

In [7]:
analysis.get_all_configs

<bound method Analysis.get_all_configs of <ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x000001CCDECA4608>>

In [8]:
# Backtest
all_config = analysis.get_all_configs()
agent = None
for trial in analysis.trials:
    config = all_config[trial.logdir].copy()
    config["_actions"] = BuySell
    # print(config)
    agent_class, algo_config = prepare_config_for_agent(config, pathlib.Path(trial.logdir).parent)

    algo_config["num_workers"] = 1
    algo_config["logger_config"] = {"type": ray.tune.logger.NoopLogger}
    # print(algo_config)
    # index = config.pop("__trial_index__")

    if agent is None:
        agent = agent_class(config=algo_config)
    else:
        agent.reset(algo_config)

    checkpoint = analysis.get_best_checkpoint(trial)
    agent.restore(checkpoint)

    # env_train = agent.workers.local_worker().env
    env_train = TradingEnv(**algo_config["env_config"])
    env_eval = TradingEnv(**algo_config["evaluation_config"]["env_config"])
    print(backtest(env_train, agent, save_dir=os.path.join(trial.logdir, "last-stats-train"), plot=False))
    print(backtest(env_eval, agent, save_dir=os.path.join(trial.logdir, "best-stats-eval"), plot=True))
    backtest(env_eval, agent="Buy&Hold", save_dir=os.path.join(trial.logdir, "buy-and-hold"), plot=False)

2021-11-18 14:31:54,923	INFO dqn.py:142 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2021-11-18 14:31:54,923	INFO trainer.py:760 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2021-11-18 14:32:03,012	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-18_14-30\ExperimentCV_TradingEnv_af9fad1c\checkpoint_000002\checkpoint-2
2021-11-18 14:32:03,013	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 2, '_timesteps_total': None, '_time_total': 13.619773864746094, '_episodes_total': 40}


Start                     2010-01-04 00:00:00
End                       2010-12-30 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   97.530864
Equity Final [$]                111518.700305
Equity Peak [$]                 118190.361063
Return [%]                            11.5187
Buy & Hold Return [%]               -3.996983
Return (Ann.) [%]                   11.969907
Volatility (Ann.) [%]               13.791396
Sharpe Ratio                         0.891974
Sortino Ratio                        1.314773
Calmar Ratio                         1.134242
Max. Drawdown [%]                  -10.553224
Avg. Drawdown [%]                   -3.192861
Max. Drawdown Duration      182 days 00:00:00
Avg. Drawdown Duration       33 days 00:00:00
# Trades                                   45
Win Rate [%]                        48.888889
Best Trade [%]                       8.896176
Worst Trade [%]                     -3.939312
Avg. Trade [%]                    

2021-11-18 14:32:04,403	INFO trainable.py:394 -- Restored on 172.22.130.108 from checkpoint: c:\Users\xiang-lab\Documents\DRL-Trading\experiments\DQN_2021-11-18_14-30\ExperimentCV_TradingEnv_afb7e150\checkpoint_000003\checkpoint-3
2021-11-18 14:32:04,403	INFO trainable.py:401 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': None, '_time_total': 24.971120595932007, '_episodes_total': 60}


Start                     2011-01-04 00:00:00
End                       2011-12-30 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                    97.55102
Equity Final [$]                 111772.20453
Equity Peak [$]                 115097.086062
Return [%]                          11.772205
Buy & Hold Return [%]              -18.683703
Return (Ann.) [%]                   12.128183
Volatility (Ann.) [%]               17.665607
Sharpe Ratio                         0.737499
Sortino Ratio                         1.13269
Calmar Ratio                         1.077061
Max. Drawdown [%]                  -11.260439
Avg. Drawdown [%]                   -5.310508
Max. Drawdown Duration      195 days 00:00:00
Avg. Drawdown Duration       69 days 00:00:00
# Trades                                   42
Win Rate [%]                        61.904762
Best Trade [%]                       7.223869
Worst Trade [%]                     -5.693272
Avg. Trade [%]                    

In [9]:
ray.shutdown()