Skip to content

Commit

Permalink
Add backtest and backforward task (#1568)
Browse files Browse the repository at this point in the history
* * add TrainTask & BacktestTask;
* add BackForwardTask;
* adjust prompt_template.yaml which default config failed to backtest;
* run workflow in loop
* add update method to prompt_template.py

* remove debug code

* Adjust Learn Process
* add LearnManager class & use LearnManager to update system prompt;
* use qrun to replace recorder for training and backtesting;

* Adjust analyser
* analyser independent of recorder;
* rename analyser's workspace attribution;
* analyser load variable by recorder.

---------

Co-authored-by: Cadenza-Li <362237642@qq.com>
  • Loading branch information
Fivele-Li and Cadenza-Li committed Jun 30, 2023
1 parent 1326ac6 commit 7e84f3a
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 92 deletions.
54 changes: 28 additions & 26 deletions qlib/contrib/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,32 @@
from pathlib import Path
import numpy as np

from qlib.utils import class_casting

from ..data.dataset import DatasetH
from ..data.dataset.handler import DataHandlerLP
from ..log import get_module_logger
from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec

logger = get_module_logger("analysis", logging.INFO)


class AnalyzerTemp:
def __init__(self, workspace=None, **kwargs):
self.workspace = Path(workspace) if workspace else "./"
def __init__(self, recorder, output_dir=None, **kwargs):
self.recorder = recorder
self.output_dir = Path(output_dir) if output_dir else "./"

def load(self, name: str):
"""
It behaves the same as self.recorder.load_object.
But it is an easier interface because users don't have to care about `get_path` and `artifact_path`
Parameters
----------
name : str
the name for the file to be load.
Return
------
The stored records.
"""
return self.recorder.load_object(name)

def analyse(self, **kwargs):
"""
Expand All @@ -42,7 +55,10 @@ class HFAnalyzer(AnalyzerTemp):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def analyse(self, pred=None, label=None):
def analyse(self):
pred = self.load("pred.pkl")
label = self.load("label.pkl")

long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], label.iloc[:, 0], is_alpha=True)
ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
metrics = {
Expand All @@ -65,13 +81,13 @@ def analyse(self, pred=None, label=None):
table = [[k, v] for (k, v) in metrics.items()]
plt.table(cellText=table, loc="center")
plt.axis("off")
plt.savefig(self.workspace.joinpath("HFAnalyzerTable.jpeg"))
plt.savefig(self.output_dir.joinpath("HFAnalyzerTable.jpeg"))
plt.clf()

plt.scatter(np.arange(0, len(pred)), pred.iloc[:, 0])
plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
plt.title("HFAnalyzer")
plt.savefig(self.workspace.joinpath("HFAnalyzer.jpeg"))
plt.savefig(self.output_dir.joinpath("HFAnalyzer.jpeg"))
return "HFAnalyzer.jpeg"


Expand All @@ -86,24 +102,10 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

def analyse(self, dataset=None, **kwargs):
label = self.load("label.pkl")

with class_casting(dataset, DatasetH):
params = dict(segments="test", col_set="label", data_key=DataHandlerLP.DK_R)
try:
# Assume the backend handler is DataHandlerLP
raw_label = dataset.prepare(**params)
except TypeError:
# The argument number is not right
del params["data_key"]
# The backend handler should be DataHandler
raw_label = dataset.prepare(**params)
except AttributeError as e:
# The data handler is initialized with `drop_raw=True`...
# So raw_label is not available
logger.warning(f"Exception: {e}")
raw_label = None
plt.hist(raw_label)
plt.hist(label)
plt.title("SignalAnalyzer")
plt.savefig(self.workspace.joinpath("signalAnalysis.jpeg"))
plt.savefig(self.output_dir.joinpath("signalAnalysis.jpeg"))

return "signalAnalysis.jpeg"
15 changes: 15 additions & 0 deletions qlib/finco/cli_learn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import fire
from qlib.finco.workflow import LearnManager
from dotenv import load_dotenv
from qlib import auto_init


def main(prompt=None):
load_dotenv(verbose=True, override=True)
lm = LearnManager()
lm.run(prompt)


if __name__ == "__main__":
auto_init()
fire.Fire(main)
31 changes: 25 additions & 6 deletions qlib/finco/prompt_template.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,37 @@
from typing import Union
from pathlib import Path
from jinja2 import Template
import yaml

from qlib.finco.utils import Singleton
from qlib.finco import get_finco_path
import yaml
import os

class PormptTemplate(Singleton):

class PromptTemplate(Singleton):
def __init__(self) -> None:
super().__init__()
_template = yaml.load(open(os.path.join(get_finco_path(), "prompt_template.yaml"), "r"), Loader=yaml.FullLoader)
_template = yaml.load(open(Path.joinpath(get_finco_path(), "prompt_template.yaml"), "r"),
Loader=yaml.FullLoader)
for k, v in _template.items():
if k == "mods":
continue
self.__setattr__(k, Template(v))

for target_name, module_to_render_params in _template["mods"].items():
for module_name, params in module_to_render_params.items():
self.__setattr__(f"{target_name}_{module_name}", Template(self.__getattribute__(target_name).render(**params)))
self.__setattr__(f"{target_name}_{module_name}",
Template(self.__getattribute__(target_name).render(**params)))

def get(self, key: str):
return self.__dict__.get(key, Template(""))

def update(self, key: str, value):
self.__setattr__(key, value)

def save(self, file_path: Union[str, Path]):
if isinstance(file_path, str):
file_path = Path(file_path)
Path.mkdir(file_path.parent, exist_ok=True)

with open(file_path, 'w') as f:
yaml.dump(self.__dict__, f)
11 changes: 9 additions & 2 deletions qlib/finco/prompt_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ SummarizeTask_user : |-
Here is my information: '{{information}}'
My intention is: {{user_prompt}}. Please provide me with a summary and recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.
BackForwardTask_system : |-
Your task is adjusting system prompt in each task to fulfill user's intention
BackForwardTask_user : |-
Here is the final summary: '{{summary}}'
Tasks I have run are: {{task_finished}}, {{task}}'s system prompt is: {{system}}. User's intention is: {{user_prompt}}. you will adjust it to:
mods:
ConfigActionTask_system:
Dataset:
Expand Down Expand Up @@ -382,7 +389,7 @@ mods:
```
Reason: I choose the backtest parameters above because they are suitable for a low turnover strategy focusing on long-term returns in the China A stock market. The start and end times are set to cover a 4-year period, which is reasonable for a long-term strategy. The account value is set to 1,000,000 as a starting point, and the benchmark is set to SH000300, which represents the China A stock market.
Improve suggestion: You can try different time ranges for the backtest to evaluate the performance of the strategy in different market conditions. Also, you can adjust the costs (open_cost, close_cost, and min_cost) to better reflect the actual trading costs in the China A stock market.
ConfigActionTask_user:
Dataset:
target_component : |-
Expand All @@ -402,7 +409,7 @@ mods:
Backtest:
target_component : |-
Backtest
ImplementActionTask_system:
Dataset:
target_component : |-
Expand Down
72 changes: 40 additions & 32 deletions qlib/finco/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@

from qlib.finco.llm import APIBackend
from qlib.finco.tpl import get_tpl_path
from qlib.finco.prompt_template import PormptTemplate
from qlib.finco.prompt_template import PromptTemplate
from qlib.workflow.record_temp import HFSignalRecord, SignalRecord
from qlib.contrib.analyzer import HFAnalyzer, SignalAnalyzer
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.finco.log import FinCoLog, LogColors
from qlib.finco.conf import Config
Expand All @@ -41,7 +40,7 @@ class Task:

def __init__(self) -> None:
self._context_manager = None
self.prompt_template = PormptTemplate()
self.prompt_template = PromptTemplate()
self.executed = False
self.continuous = Config().continuous_mode
self.logger = FinCoLog()
Expand Down Expand Up @@ -96,13 +95,11 @@ def interact(self, prompt: str, **kwargs) -> Any:

@property
def system(self):
return self.prompt_template.__getattribute__(
self.__class__.__name__ + "_system"
)
return self.prompt_template.get(self.__class__.__name__ + "_system")

@property
def user(self):
return self.prompt_template.__getattribute__(self.__class__.__name__ + "_user")
return self.prompt_template.get(self.__class__.__name__ + "_user")

def __str__(self):
return self.__class__.__name__
Expand Down Expand Up @@ -150,7 +147,7 @@ class PlanTask(Task):


class SLPlanTask(PlanTask):
def __init__(self,) -> None:
def __init__(self, ) -> None:
super().__init__()

def execute(self):
Expand Down Expand Up @@ -220,20 +217,22 @@ def execute(self):
return []


class RecorderTask(Task):
class TrainTask(Task):
"""
This Recorder task is responsible for analysing data such as index and distribution.
This train task is responsible for training model configure by yaml file.
"""

def __init__(self):
super().__init__()
self._output = None

def execute(self):
workflow_config = (
self._context_manager.get_context("workflow_config")
if self._context_manager.get_context("workflow_config")
else "workflow_config.yaml"
)

workspace = self._context_manager.get_context("workspace")
workflow_path = workspace.joinpath(workflow_config)
with workflow_path.open() as f:
Expand All @@ -246,33 +245,28 @@ def execute(self):
if confirm is False:
return []

model = init_instance_by_config(workflow["task"]["model"])
dataset = init_instance_by_config(workflow["task"]["dataset"])

with R.start(experiment_name="finCo"):
model.fit(dataset)
R.save_objects(trained_model=model)

# prediction
recorder = R.get_recorder()
sr = SignalRecord(model, dataset, recorder)
sr.generate()

self._context_manager.set_context("model", model)
self._context_manager.set_context("dataset", dataset)
self._context_manager.set_context("recorder", recorder)
command = f"qrun {workflow_path}"
self._output = subprocess.check_output(command, shell=True, cwd=workspace)

return [AnalysisTask()]

def summarize(self):
if self._output is not None:
# TODO: it will be overrides by later commands
# utf8 can't decode normally on Windows
self._context_manager.set_context(
self.__class__.__name__, self._output.decode("ANSI")
)


class AnalysisTask(Task):
"""
This Recorder task is responsible for analysing data such as index and distribution.
"""

__ANALYZERS_PROJECT = {
HFAnalyzer.__name__: HFSignalRecord,
SignalAnalyzer.__name__: SignalRecord,
HFAnalyzer.__name__: HFAnalyzer,
SignalAnalyzer.__name__: SignalAnalyzer,
}
__ANALYZERS_DOCS = {
HFAnalyzer.__name__: HFAnalyzer.__doc__,
Expand Down Expand Up @@ -303,7 +297,7 @@ def execute(self):
ANALYZERS_DOCS=self.__ANALYZERS_DOCS,
),
)
analysers = response.split(",")
analysers = response.replace(" ", "").split(",")
confirm = self.interact(f"I select these analysers: {analysers}\n"
f"Are you sure you want to use? yes(Y/y), no(N/n) or prompt:")
if confirm is False:
Expand All @@ -317,15 +311,26 @@ def execute(self):
if isinstance(analysers, list) and len(analysers):
self.logger.info(f"selected analysers: {analysers}", plain=True)

workflow_config = (
self._context_manager.get_context("workflow_config")
if self._context_manager.get_context("workflow_config")
else "workflow_config.yaml"
)
workspace = self._context_manager.get_context("workspace")
workflow_path = workspace.joinpath(workflow_config)
with workflow_path.open() as f:
workflow = yaml.safe_load(f)

experiment_name = workflow["experiment_name"] if "experiment_name" in workflow else "workflow"
R.set_uri(Path.joinpath(workspace, 'mlruns').as_uri())

tasks = []
for analyser in analysers:
if analyser in self.__ANALYZERS_PROJECT.keys():
tasks.append(
self.__ANALYZERS_PROJECT.get(analyser)(
workspace=self._context_manager.get_context("workspace"),
model=self._context_manager.get_context("model"),
dataset=self._context_manager.get_context("dataset"),
recorder=self._context_manager.get_context("recorder"),
recorder=R.get_recorder(experiment_name=experiment_name),
output_dir=workspace
)
)

Expand Down Expand Up @@ -575,11 +580,14 @@ def execute(self) -> Any:
information=information, figure_path=figure_path, user_prompt=user_prompt
)

# todo: remove 'be' after test
be = APIBackend()
be.debug_mode = False
response = be.build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt=self.system.render()
)

self._context_manager.set_context("summary", response)
self.save_markdown(content=response)
self.logger.info(f"Report has saved to {self.__DEFAULT_REPORT_NAME}", title="End")

Expand Down
3 changes: 2 additions & 1 deletion qlib/finco/tpl/sl/workflow_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
experiment_name: finCo
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
Expand All @@ -14,7 +15,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
model: <MODEL>
dataset: <DATASET>
topk: 50
n_drop: 5
Expand Down
Loading

0 comments on commit 7e84f3a

Please sign in to comment.