Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backtest and backforward task #1568

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 28 additions & 26 deletions qlib/contrib/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,32 @@
from pathlib import Path
import numpy as np

from qlib.utils import class_casting

from ..data.dataset import DatasetH
from ..data.dataset.handler import DataHandlerLP
from ..log import get_module_logger
from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec

logger = get_module_logger("analysis", logging.INFO)


class AnalyzerTemp:
def __init__(self, workspace=None, **kwargs):
self.workspace = Path(workspace) if workspace else "./"
def __init__(self, recorder, output_dir=None, **kwargs):
self.recorder = recorder
self.output_dir = Path(output_dir) if output_dir else "./"

def load(self, name: str):
"""
It behaves the same as self.recorder.load_object.
But it is an easier interface because users don't have to care about `get_path` and `artifact_path`

Parameters
----------
name : str
the name for the file to be load.

Return
------
The stored records.
"""
return self.recorder.load_object(name)

def analyse(self, **kwargs):
"""
Expand All @@ -42,7 +55,10 @@ class HFAnalyzer(AnalyzerTemp):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def analyse(self, pred=None, label=None):
def analyse(self):
pred = self.load("pred.pkl")
label = self.load("label.pkl")

long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], label.iloc[:, 0], is_alpha=True)
ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
metrics = {
Expand All @@ -65,13 +81,13 @@ def analyse(self, pred=None, label=None):
table = [[k, v] for (k, v) in metrics.items()]
plt.table(cellText=table, loc="center")
plt.axis("off")
plt.savefig(self.workspace.joinpath("HFAnalyzerTable.jpeg"))
plt.savefig(self.output_dir.joinpath("HFAnalyzerTable.jpeg"))
plt.clf()

plt.scatter(np.arange(0, len(pred)), pred.iloc[:, 0])
plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
plt.title("HFAnalyzer")
plt.savefig(self.workspace.joinpath("HFAnalyzer.jpeg"))
plt.savefig(self.output_dir.joinpath("HFAnalyzer.jpeg"))
return "HFAnalyzer.jpeg"


Expand All @@ -86,24 +102,10 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

def analyse(self, dataset=None, **kwargs):
label = self.load("label.pkl")

with class_casting(dataset, DatasetH):
params = dict(segments="test", col_set="label", data_key=DataHandlerLP.DK_R)
try:
# Assume the backend handler is DataHandlerLP
raw_label = dataset.prepare(**params)
except TypeError:
# The argument number is not right
del params["data_key"]
# The backend handler should be DataHandler
raw_label = dataset.prepare(**params)
except AttributeError as e:
# The data handler is initialized with `drop_raw=True`...
# So raw_label is not available
logger.warning(f"Exception: {e}")
raw_label = None
plt.hist(raw_label)
plt.hist(label)
plt.title("SignalAnalyzer")
plt.savefig(self.workspace.joinpath("signalAnalysis.jpeg"))
plt.savefig(self.output_dir.joinpath("signalAnalysis.jpeg"))

return "signalAnalysis.jpeg"
15 changes: 15 additions & 0 deletions qlib/finco/cli_learn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import fire
from qlib.finco.workflow import LearnManager
from dotenv import load_dotenv
from qlib import auto_init


def main(prompt=None):
load_dotenv(verbose=True, override=True)
lm = LearnManager()
lm.run(prompt)


if __name__ == "__main__":
auto_init()
fire.Fire(main)
31 changes: 25 additions & 6 deletions qlib/finco/prompt_template.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,37 @@
from typing import Union
from pathlib import Path
from jinja2 import Template
import yaml

from qlib.finco.utils import Singleton
from qlib.finco import get_finco_path
import yaml
import os

class PormptTemplate(Singleton):

class PromptTemplate(Singleton):
def __init__(self) -> None:
super().__init__()
_template = yaml.load(open(os.path.join(get_finco_path(), "prompt_template.yaml"), "r"), Loader=yaml.FullLoader)
_template = yaml.load(open(Path.joinpath(get_finco_path(), "prompt_template.yaml"), "r"),
Loader=yaml.FullLoader)
for k, v in _template.items():
if k == "mods":
continue
self.__setattr__(k, Template(v))

for target_name, module_to_render_params in _template["mods"].items():
for module_name, params in module_to_render_params.items():
self.__setattr__(f"{target_name}_{module_name}", Template(self.__getattribute__(target_name).render(**params)))
self.__setattr__(f"{target_name}_{module_name}",
Template(self.__getattribute__(target_name).render(**params)))

def get(self, key: str):
return self.__dict__.get(key, Template(""))

def update(self, key: str, value):
self.__setattr__(key, value)

def save(self, file_path: Union[str, Path]):
if isinstance(file_path, str):
file_path = Path(file_path)
Path.mkdir(file_path.parent, exist_ok=True)

with open(file_path, 'w') as f:
yaml.dump(self.__dict__, f)
11 changes: 9 additions & 2 deletions qlib/finco/prompt_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ SummarizeTask_user : |-
Here is my information: '{{information}}'
My intention is: {{user_prompt}}. Please provide me with a summary and recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.

BackForwardTask_system : |-
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it be clearer if we use the following format?

BackForwardTask:
     system:
     user: 

Your task is adjusting system prompt in each task to fulfill user's intention

BackForwardTask_user : |-
Here is the final summary: '{{summary}}'
Tasks I have run are: {{task_finished}}, {{task}}'s system prompt is: {{system}}. User's intention is: {{user_prompt}}. you will adjust it to:

mods:
ConfigActionTask_system:
Dataset:
Expand Down Expand Up @@ -382,7 +389,7 @@ mods:
```
Reason: I choose the backtest parameters above because they are suitable for a low turnover strategy focusing on long-term returns in the China A stock market. The start and end times are set to cover a 4-year period, which is reasonable for a long-term strategy. The account value is set to 1,000,000 as a starting point, and the benchmark is set to SH000300, which represents the China A stock market.
Improve suggestion: You can try different time ranges for the backtest to evaluate the performance of the strategy in different market conditions. Also, you can adjust the costs (open_cost, close_cost, and min_cost) to better reflect the actual trading costs in the China A stock market.

ConfigActionTask_user:
Dataset:
target_component : |-
Expand All @@ -402,7 +409,7 @@ mods:
Backtest:
target_component : |-
Backtest

ImplementActionTask_system:
Dataset:
target_component : |-
Expand Down
72 changes: 40 additions & 32 deletions qlib/finco/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@

from qlib.finco.llm import APIBackend
from qlib.finco.tpl import get_tpl_path
from qlib.finco.prompt_template import PormptTemplate
from qlib.finco.prompt_template import PromptTemplate
from qlib.workflow.record_temp import HFSignalRecord, SignalRecord
from qlib.contrib.analyzer import HFAnalyzer, SignalAnalyzer
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.finco.log import FinCoLog, LogColors
from qlib.finco.conf import Config
Expand All @@ -41,7 +40,7 @@ class Task:

def __init__(self) -> None:
self._context_manager = None
self.prompt_template = PormptTemplate()
self.prompt_template = PromptTemplate()
self.executed = False
self.continuous = Config().continuous_mode
self.logger = FinCoLog()
Expand Down Expand Up @@ -96,13 +95,11 @@ def interact(self, prompt: str, **kwargs) -> Any:

@property
def system(self):
return self.prompt_template.__getattribute__(
self.__class__.__name__ + "_system"
)
return self.prompt_template.get(self.__class__.__name__ + "_system")

@property
def user(self):
return self.prompt_template.__getattribute__(self.__class__.__name__ + "_user")
return self.prompt_template.get(self.__class__.__name__ + "_user")

def __str__(self):
return self.__class__.__name__
Expand Down Expand Up @@ -150,7 +147,7 @@ class PlanTask(Task):


class SLPlanTask(PlanTask):
def __init__(self,) -> None:
def __init__(self, ) -> None:
super().__init__()

def execute(self):
Expand Down Expand Up @@ -220,20 +217,22 @@ def execute(self):
return []


class RecorderTask(Task):
class TrainTask(Task):
"""
This Recorder task is responsible for analysing data such as index and distribution.
This train task is responsible for training model configure by yaml file.
"""

def __init__(self):
super().__init__()
self._output = None

def execute(self):
workflow_config = (
self._context_manager.get_context("workflow_config")
if self._context_manager.get_context("workflow_config")
else "workflow_config.yaml"
)

workspace = self._context_manager.get_context("workspace")
workflow_path = workspace.joinpath(workflow_config)
with workflow_path.open() as f:
Expand All @@ -246,33 +245,28 @@ def execute(self):
if confirm is False:
return []

model = init_instance_by_config(workflow["task"]["model"])
dataset = init_instance_by_config(workflow["task"]["dataset"])

with R.start(experiment_name="finCo"):
model.fit(dataset)
R.save_objects(trained_model=model)

# prediction
recorder = R.get_recorder()
sr = SignalRecord(model, dataset, recorder)
sr.generate()

self._context_manager.set_context("model", model)
self._context_manager.set_context("dataset", dataset)
self._context_manager.set_context("recorder", recorder)
command = f"qrun {workflow_path}"
self._output = subprocess.check_output(command, shell=True, cwd=workspace)

return [AnalysisTask()]

def summarize(self):
if self._output is not None:
# TODO: it will be overrides by later commands
# utf8 can't decode normally on Windows
self._context_manager.set_context(
self.__class__.__name__, self._output.decode("ANSI")
)


class AnalysisTask(Task):
"""
This Recorder task is responsible for analysing data such as index and distribution.
"""

__ANALYZERS_PROJECT = {
HFAnalyzer.__name__: HFSignalRecord,
SignalAnalyzer.__name__: SignalRecord,
HFAnalyzer.__name__: HFAnalyzer,
SignalAnalyzer.__name__: SignalAnalyzer,
}
__ANALYZERS_DOCS = {
HFAnalyzer.__name__: HFAnalyzer.__doc__,
Expand Down Expand Up @@ -303,7 +297,7 @@ def execute(self):
ANALYZERS_DOCS=self.__ANALYZERS_DOCS,
),
)
analysers = response.split(",")
analysers = response.replace(" ", "").split(",")
confirm = self.interact(f"I select these analysers: {analysers}\n"
f"Are you sure you want to use? yes(Y/y), no(N/n) or prompt:")
if confirm is False:
Expand All @@ -317,15 +311,26 @@ def execute(self):
if isinstance(analysers, list) and len(analysers):
self.logger.info(f"selected analysers: {analysers}", plain=True)

workflow_config = (
self._context_manager.get_context("workflow_config")
if self._context_manager.get_context("workflow_config")
else "workflow_config.yaml"
)
workspace = self._context_manager.get_context("workspace")
workflow_path = workspace.joinpath(workflow_config)
with workflow_path.open() as f:
workflow = yaml.safe_load(f)

experiment_name = workflow["experiment_name"] if "experiment_name" in workflow else "workflow"
R.set_uri(Path.joinpath(workspace, 'mlruns').as_uri())

tasks = []
for analyser in analysers:
if analyser in self.__ANALYZERS_PROJECT.keys():
tasks.append(
self.__ANALYZERS_PROJECT.get(analyser)(
workspace=self._context_manager.get_context("workspace"),
model=self._context_manager.get_context("model"),
dataset=self._context_manager.get_context("dataset"),
recorder=self._context_manager.get_context("recorder"),
recorder=R.get_recorder(experiment_name=experiment_name),
output_dir=workspace
)
)

Expand Down Expand Up @@ -575,11 +580,14 @@ def execute(self) -> Any:
information=information, figure_path=figure_path, user_prompt=user_prompt
)

# todo: remove 'be' after test
be = APIBackend()
be.debug_mode = False
response = be.build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt=self.system.render()
)

self._context_manager.set_context("summary", response)
self.save_markdown(content=response)
self.logger.info(f"Report has saved to {self.__DEFAULT_REPORT_NAME}", title="End")

Expand Down
3 changes: 2 additions & 1 deletion qlib/finco/tpl/sl/workflow_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
experiment_name: finCo
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
Expand All @@ -14,7 +15,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
model: <MODEL>
dataset: <DATASET>
topk: 50
n_drop: 5
Expand Down
Loading