Skip to content

Commit

Permalink
Merge branch 'microsoft:main' into stale
Browse files Browse the repository at this point in the history
  • Loading branch information
Derek-Wds committed May 9, 2021
2 parents 143c257 + aa1f9b4 commit 81bd2ca
Show file tree
Hide file tree
Showing 13 changed files with 236 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -49,7 +49,7 @@ Your feedbacks about the features are very important.
| Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 |
| Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 |
| Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 |
| High-frequency trading | Initial opensource version under development |
| High-frequency trading | Under review: https://github.com/microsoft/qlib/pull/408 |
| Meta-Learning-based data selection | Initial opensource version under development |

Recent released features
Expand Down
5 changes: 5 additions & 0 deletions docs/component/data.rst
Expand Up @@ -182,6 +182,11 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)
.. note::

PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.


Data API
========================

Expand Down
3 changes: 3 additions & 0 deletions qlib/data/data.py
Expand Up @@ -522,6 +522,9 @@ def load_calendar(self, freq, future):
# if future calendar not exists, return current calendar
if not os.path.exists(fname):
get_module_logger("data").warning(f"{freq}_future.txt not exists, return current calendar!")
get_module_logger("data").warning(
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
)
fname = self._uri_cal.format(freq)
else:
fname = self._uri_cal.format(freq)
Expand Down
17 changes: 14 additions & 3 deletions qlib/log.py
Expand Up @@ -15,9 +15,10 @@
class MetaLogger(type):
def __new__(cls, name, bases, dict):
wrapper_dict = logging.Logger.__dict__.copy()
wrapper_dict.update(dict)
wrapper_dict["__doc__"] = logging.Logger.__doc__
return type.__new__(cls, name, bases, wrapper_dict)
for key in wrapper_dict:
if key not in dict and key != "__reduce__":
dict[key] = wrapper_dict[key]
return type.__new__(cls, name, bases, dict)


class QlibLogger(metaclass=MetaLogger):
Expand All @@ -39,6 +40,9 @@ def setLevel(self, level):
self.level = level

def __getattr__(self, name):
# During unpickling, python will call __getattr__. Use this line to avoid maximum recursion error.
if name in {"__setstate__"}:
raise AttributeError
return self.logger.__getattribute__(name)


Expand Down Expand Up @@ -159,3 +163,10 @@ def filter(self, record):
elif isinstance(self.param, list):
allow = not any([self.match_msg(p, record.msg) for p in self.param])
return allow


def set_global_logger_level(level: int):
qlib_logger = logging.root.manager.loggerDict.get("qlib", None)
if qlib_logger is not None:
for _handler in qlib_logger.handlers:
_handler.level = level
33 changes: 30 additions & 3 deletions qlib/workflow/__init__.py
Expand Up @@ -23,7 +23,10 @@ def __repr__(self):
@contextmanager
def start(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
Expand All @@ -45,8 +48,12 @@ def start(
Parameters
----------
experiment_id : str
id of the experiment one wants to start.
experiment_name : str
name of the experiment one wants to start.
recorder_id : str
id of the recorder under the experiment one wants to start.
recorder_name : str
name of the recorder under the experiment one wants to start.
uri : str
Expand All @@ -57,15 +64,24 @@ def start(
resume : bool
whether to resume the specific recorder with given name under the given experiment.
"""
run = self.start_exp(experiment_name, recorder_name, uri, resume)
run = self.start_exp(
experiment_id=experiment_id,
experiment_name=experiment_name,
recorder_id=recorder_id,
recorder_name=recorder_name,
uri=uri,
resume=resume,
)
try:
yield run
except Exception as e:
self.end_exp(Recorder.STATUS_FA) # end the experiment if something went wrong
raise e
self.end_exp(Recorder.STATUS_FI)

def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=False):
def start_exp(
self, *, experiment_id=None, experiment_name=None, recorder_id=None, recorder_name=None, uri=None, resume=False
):
"""
Lower level method for starting an experiment. When use this method, one should end the experiment manually
and the status of the recorder may not be handled properly. Here is the example code:
Expand All @@ -79,8 +95,12 @@ def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=F
Parameters
----------
experiment_id : str
id of the experiment one wants to start.
experiment_name : str
the name of the experiment to be started
recorder_id : str
id of the recorder under the experiment one wants to start.
recorder_name : str
name of the recorder under the experiment one wants to start.
uri : str
Expand All @@ -93,7 +113,14 @@ def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=F
-------
An experiment instance being started.
"""
return self.exp_manager.start_exp(experiment_name, recorder_name, uri, resume)
return self.exp_manager.start_exp(
experiment_id=experiment_id,
experiment_name=experiment_name,
recorder_id=recorder_id,
recorder_name=recorder_name,
uri=uri,
resume=resume,
)

def end_exp(self, recorder_status=Recorder.STATUS_FI):
"""
Expand Down
8 changes: 5 additions & 3 deletions qlib/workflow/exp.py
Expand Up @@ -39,12 +39,14 @@ def info(self):
output["recorders"] = list(recorders.keys())
return output

def start(self, recorder_name=None, resume=False):
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
"""
Start the experiment and set it to be active. This method will also start a new recorder.
Parameters
----------
recorder_id : str
the id of the recorder to be created.
recorder_name : str
the name of the recorder to be created.
resume : bool
Expand Down Expand Up @@ -238,14 +240,14 @@ def __init__(self, id, name, uri):
def __repr__(self):
return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info)

def start(self, recorder_name=None, resume=False):
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
logger.info(f"Experiment {self.id} starts running ...")
# Get or create recorder
if recorder_name is None:
recorder_name = self._default_rec_name
# resume the recorder
if resume:
recorder, _ = self._get_or_create_rec(recorder_name=recorder_name)
recorder, _ = self._get_or_create_rec(recorder_id=recorder_id, recorder_name=recorder_name)
# create a new recorder
else:
recorder = self.create_recorder(recorder_name)
Expand Down
14 changes: 12 additions & 2 deletions qlib/workflow/expm.py
Expand Up @@ -33,7 +33,10 @@ def __repr__(self):

def start_exp(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
Expand All @@ -45,8 +48,12 @@ def start_exp(
Parameters
----------
experiment_id : str
id of the active experiment.
experiment_name : str
name of the active experiment.
recorder_id : str
id of the recorder to be started.
recorder_name : str
name of the recorder to be started.
uri : str
Expand Down Expand Up @@ -298,7 +305,10 @@ def client(self):

def start_exp(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
Expand All @@ -308,11 +318,11 @@ def start_exp(
# Create experiment
if experiment_name is None:
experiment_name = self._default_exp_name
experiment, _ = self._get_or_create_exp(experiment_name=experiment_name)
experiment, _ = self._get_or_create_exp(experiment_id=experiment_id, experiment_name=experiment_name)
# Set up active experiment
self.active_experiment = experiment
# Start the experiment
self.active_experiment.start(recorder_name, resume)
self.active_experiment.start(recorder_id=recorder_id, recorder_name=recorder_name, resume=resume)

return self.active_experiment

Expand Down
4 changes: 4 additions & 0 deletions scripts/README.md
Expand Up @@ -15,7 +15,11 @@
### Download CN Data

```bash
# daily data
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

# 1min data (Optional for running non-high-frequency strategies)
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
```

### Downlaod US Data
Expand Down
24 changes: 24 additions & 0 deletions scripts/data_collector/contrib/README.md
@@ -0,0 +1,24 @@
# Get future trading days

> `D.calendar(future=True)` will be used
## Requirements

```bash
pip install -r requirements.txt
```

## Collector Data

```bash
# parse instruments, using in qlib/instruments.
python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
```

## Parameters

- qlib_dir: qlib data directory
- freq: value from [`day`, `1min`], default `day`



87 changes: 87 additions & 0 deletions scripts/data_collector/contrib/future_trading_date_collector.py
@@ -0,0 +1,87 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import sys
from typing import List
from pathlib import Path

import fire
import numpy as np
import pandas as pd
from loguru import logger

# get data from baostock
import baostock as bs

CUR_DIR = Path(__file__).resolve().parent
sys.path.append(str(CUR_DIR.parent.parent))


from data_collector.utils import generate_minutes_calendar_from_daily


def read_calendar_from_qlib(qlib_dir: Path) -> pd.DataFrame:
calendar_path = qlib_dir.joinpath("calendars").joinpath("day.txt")
if not calendar_path.exists():
return pd.DataFrame()
return pd.read_csv(calendar_path, header=None)


def write_calendar_to_qlib(qlib_dir: Path, date_list: List[str], freq: str = "day"):
calendar_path = str(qlib_dir.joinpath("calendars").joinpath(f"{freq}_future.txt"))

np.savetxt(calendar_path, date_list, fmt="%s", encoding="utf-8")
logger.info(f"write future calendars success: {calendar_path}")


def generate_qlib_calendar(date_list: List[str], freq: str) -> List[str]:
print(freq)
if freq == "day":
return date_list
elif freq == "1min":
date_list = generate_minutes_calendar_from_daily(date_list, freq=freq).tolist()
return list(map(lambda x: pd.Timestamp(x).strftime("%Y-%m-%d %H:%M:%S"), date_list))
else:
raise ValueError(f"Unsupported freq: {freq}")


def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"):
"""get future calendar
Parameters
----------
qlib_dir: str or Path
qlib data directory
freq: str
value from ["day", "1min"], by default day
"""
qlib_dir = Path(qlib_dir).expanduser().resolve()
if not qlib_dir.exists():
raise FileNotFoundError(str(qlib_dir))

lg = bs.login()
if lg.error_code != "0":
logger.error(f"login error: {lg.error_msg}")
return
# read daily calendar
daily_calendar = read_calendar_from_qlib(qlib_dir)
end_year = pd.Timestamp.now().year
if daily_calendar.empty:
start_year = pd.Timestamp.now().year
else:
start_year = pd.Timestamp(daily_calendar.iloc[-1, 0]).year
rs = bs.query_trade_dates(start_date=pd.Timestamp(f"{start_year}-01-01"), end_date=f"{end_year}-12-31")
data_list = []
while (rs.error_code == "0") & rs.next():
_row_data = rs.get_row_data()
if int(_row_data[1]) == 1:
data_list.append(_row_data[0])
data_list = sorted(data_list)
date_list = generate_qlib_calendar(data_list, freq=freq)
write_calendar_to_qlib(qlib_dir, date_list, freq=freq)
bs.logout()
logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31")


if __name__ == "__main__":
fire.Fire(future_calendar_collector)
5 changes: 5 additions & 0 deletions scripts/data_collector/contrib/requirements.txt
@@ -0,0 +1,5 @@
baostock
fire
numpy
pandas
loguru

0 comments on commit 81bd2ca

Please sign in to comment.