Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix backtest #719

Merged
merged 10 commits into from Dec 7, 2021
144 changes: 119 additions & 25 deletions docs/component/strategy.rst
Expand Up @@ -84,31 +84,125 @@ Usage & Example
====================
``Portfolio Strategy`` can be specified in the ``Intraday Trading(Backtest)``, the example is as follows.

.. code-block:: python

from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import backtest
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,

}
# use default strategy
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)

# pred_score is the `prediction score` output by Model
report_normal, positions_normal = backtest(
pred_score, strategy=strategy, **BACKTEST_CONFIG
)
- daily

.. code-block:: python

from pprint import pprint

import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy

# init qlib
qlib.init(provider_uri=<qlib data dir>)

CSI300_BENCH = "SH000300"
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
# pred_score, pd.Series
"signal": pred_score,
}


strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = backtest_daily(
start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_obj
)
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"], freq=analysis_freq
)
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
)

analysis_df = pd.concat(analysis) # type: pd.DataFrame
pprint(analysis_df)



- nested decision execution

.. code-block:: python

from pprint import pprint

import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.backtest import backtest, executor
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy

# init qlib
qlib.init(provider_uri=<qlib data dir>)

CSI300_BENCH = "SH000300"
FREQ = "day"
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
# pred_score, pd.Series
"signal": pred_score,
}

EXECUTOR_CONFIG = {
"time_per_step": "day",
"generate_portfolio_metrics": True,
}

backtest_config = {
"start_time": "2017-01-01",
"end_time": "2020-08-01",
"account": 100000000,
"benchmark": CSI300_BENCH,
"exchange_kwargs": {
"freq": FREQ,
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
},
}

# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
# backtest info
report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)

# analysis
analysis = dict()
you-n-g marked this conversation as resolved.
Show resolved Hide resolved
analysis["excess_return_without_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"], freq=analysis_freq
)
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
)

analysis_df = pd.concat(analysis) # type: pd.DataFrame
# log metrics
analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
# print out results
pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
pprint(analysis["excess_return_without_cost"])
pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
pprint(analysis["excess_return_with_cost"])


To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

Expand Down
192 changes: 190 additions & 2 deletions examples/nested_decision_execution/workflow.py
@@ -1,9 +1,105 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
The expect result of `backtest` is following in current version

'The following are analysis results of benchmark return(1day).'
risk
mean 0.000651
std 0.012472
annualized_return 0.154967
information_ratio 0.805422
max_drawdown -0.160445
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.001258
std 0.007575
annualized_return 0.299303
information_ratio 2.561219
max_drawdown -0.068386
'The following are analysis results of the excess return with cost(1day).'
risk
mean 0.001110
std 0.007575
annualized_return 0.264280
information_ratio 2.261392
max_drawdown -0.071842
[1706497:MainThread](2021-12-07 14:08:30,263) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_30minute.
pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(30minute).'
risk
mean 0.000078
std 0.003646
annualized_return 0.148787
information_ratio 0.935252
max_drawdown -0.142830
('The following are analysis results of the excess return without '
'cost(30minute).')
risk
mean 0.000174
std 0.003343
annualized_return 0.331867
information_ratio 2.275019
max_drawdown -0.074752
'The following are analysis results of the excess return with cost(30minute).'
risk
mean 0.000155
std 0.003343
annualized_return 0.294536
information_ratio 2.018860
max_drawdown -0.075579
[1706497:MainThread](2021-12-07 14:08:30,277) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_5minute.p
kl' has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(5minute).'
risk
mean 0.000015
std 0.001460
annualized_return 0.172170
information_ratio 1.103439
max_drawdown -0.144807
'The following are analysis results of the excess return without cost(5minute).'
risk
mean 0.000028
std 0.001412
annualized_return 0.319771
information_ratio 2.119563
max_drawdown -0.077426
'The following are analysis results of the excess return with cost(5minute).'
risk
mean 0.000025
std 0.001412
annualized_return 0.281536
information_ratio 1.866091
max_drawdown -0.078194
[1706497:MainThread](2021-12-07 14:08:30,287) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day
.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(1day).'
value
ffr 0.945821
pa 0.000324
pos 0.542882
[1706497:MainThread](2021-12-07 14:08:30,293) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_30mi
nute.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(30minute).'
value
ffr 0.982910
pa 0.000037
pos 0.500806
[1706497:MainThread](2021-12-07 14:08:30,302) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_5min
ute.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(5minute).'
value
ffr 0.991017
pa 0.000000
pos 0.000000
[1706497:MainThread](2021-12-07 14:08:30,627) INFO - qlib.timer - [log.py:113] - Time cost: 0.014s | waiting `async_log` Done
"""


from copy import deepcopy
import qlib
import fire
import pandas as pd
from qlib.config import REG_CN, HIGH_FREQ_CONFIG
from qlib.data import D
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
Expand All @@ -14,6 +110,13 @@


class NestedDecisionExecutionWorkflow:
# TODO: add test for nested workflow.
# 1) comparing same backtest
# - Basic test idea: the shared accumulated value are equal in multiple levels
# - Aligning the profit calculation between multiple levels and single levels.
# 2) comparing different backtest
# - Basic test idea:
# - the daily backtest will be similar as multi-level(the data quality makes this gap samller)

market = "csi300"
benchmark = "SH000300"
Expand Down Expand Up @@ -167,8 +270,6 @@ def backtest(self):
par = PortAnaRecord(
recorder,
self.port_analysis_config,
risk_analysis_freq=["day", "30min", "5min"],
indicator_analysis_freq=["day", "30min", "5min"],
indicator_analysis_method="value_weighted",
)
par.generate()
Expand Down Expand Up @@ -199,6 +300,93 @@ def collect_data(self):
for trade_decision in data_generator:
print(trade_decision)

# the code below are for checking, users don't have to care about it
def check_diff_freq(self):
self._init_qlib()
exp = R.get_exp(experiment_name="backtest")
rec = next(iter(exp.list_recorders().values())) # assuming this will get the latest recorder
for check_key in "account", "total_turnover", "total_cost":
check_key = "total_cost"

acc_dict = {}
for freq in ["30minute", "5minute", "1day"]:
acc_dict[freq] = rec.load_object(f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
acc_df = pd.DataFrame(acc_dict)
acc_resam = acc_df.resample("1d").last().dropna()
assert (acc_resam["30minute"] == acc_resam["1day"]).all()

def backtest_only_daily(self):
"""
This backtest is used for comparing the nested execution and single layer execution
Due to the low quality daily-level and miniute-level data, they are hardly comparable.
So it is used for detecting serious bugs which make the results different greatly.

.. code-block:: shell

[1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(1day).'
risk
mean 0.000651
std 0.012472
annualized_return 0.154967
information_ratio 0.805422
max_drawdown -0.160445
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.001375
std 0.006103
annualized_return 0.327204
information_ratio 3.475016
max_drawdown -0.024927
'The following are analysis results of the excess return with cost(1day).'
risk
mean 0.001184
std 0.006091
annualized_return 0.281801
information_ratio 2.998749
max_drawdown -0.029568
[1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(1day).'
value
ffr 1.0
pa 0.0
pos 0.0
[1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done

"""
self._init_qlib()
model = init_instance_by_config(self.task["model"])
dataset = init_instance_by_config(self.task["dataset"])
self._train_model(model, dataset)
strategy_config = {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"signal": (model, dataset),
"topk": 50,
"n_drop": 5,
},
}
pa_conf = deepcopy(self.port_analysis_config)
pa_conf["strategy"] = strategy_config
pa_conf["executor"] = {
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",
"generate_portfolio_metrics": True,
"verbose": True,
},
}
pa_conf["backtest"]["benchmark"] = self.benchmark

with R.start(experiment_name="backtest"):
recorder = R.get_recorder()
par = PortAnaRecord(recorder, pa_conf)
par.generate()


if __name__ == "__main__":
fire.Fire(NestedDecisionExecutionWorkflow)
6 changes: 4 additions & 2 deletions qlib/backtest/__init__.py
Expand Up @@ -186,8 +186,10 @@ def get_strategy_executor(
trade_exchange = get_exchange(**exchange_kwargs)

common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange)
trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy, common_infra=common_infra)
trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy)
trade_strategy.reset_common_infra(common_infra)
trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor)
trade_executor.reset_common_infra(common_infra)

return trade_strategy, trade_executor

Expand Down