From 6bafd0a09bdf5ac343fa1a16d4d2cad2f81b2cc2 Mon Sep 17 00:00:00 2001 From: Chao Ning Date: Tue, 28 Dec 2021 15:58:49 +0000 Subject: [PATCH] Reformat example data names: use `{region}_data` for 1-day data, and `{region}_data_1min` for 1-min data (#781) * Fix high-freq data name from `yahoo_cn_1min` to `cn_data_1min` * re-format example data names using `qlib_{region}_{feq}`, e.g. qlib_cn_1d * re-format example data names using `{region}_{feq}`, e.g. us_1d and cn_1min * keep using for 1day data, and change 1min data to --- examples/benchmarks/README.md | 2 +- examples/highfreq/workflow.py | 2 +- qlib/config.py | 2 +- qlib/tests/data.py | 1 + scripts/README.md | 2 +- scripts/data_collector/fund/README.md | 4 +-- scripts/data_collector/fund/collector.py | 4 +-- scripts/data_collector/yahoo/README.md | 40 +++++++++++------------ scripts/data_collector/yahoo/collector.py | 4 +-- 9 files changed, 31 insertions(+), 30 deletions(-) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 24f8870eea..bc8652dc56 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -9,7 +9,7 @@ Here are the results of each benchmark model running on Qlib's `Alpha360` and `A The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results. diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py index e9278cdb12..dc91f9b254 100644 --- a/examples/highfreq/workflow.py +++ b/examples/highfreq/workflow.py @@ -82,7 +82,7 @@ class HighfreqWorkflow: def _init_qlib(self): """initialize qlib""" - # use yahoo_cn_1min data + # use cn_data_1min data QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF} provider_uri = QLIB_INIT_CONFIG.get("provider_uri") GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN, exists_skip=True) diff --git a/qlib/config.py b/qlib/config.py index 03b341dfe7..c29338f5a7 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -240,7 +240,7 @@ def set_conf_from_C(self, config_c): } HIGH_FREQ_CONFIG = { - "provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min", + "provider_uri": "~/.qlib/qlib_data/cn_data_1min", "dataset_cache": None, "expression_cache": "DiskExpressionCache", "region": REG_CN, diff --git a/qlib/tests/data.py b/qlib/tests/data.py index b38fd7eee3..0c169c022f 100644 --- a/qlib/tests/data.py +++ b/qlib/tests/data.py @@ -47,6 +47,7 @@ def _download_data( url = self.merge_remote_url(file_name, dataset_version) resp = requests.get(url, stream=True) + resp.raise_for_status() if resp.status_code != 200: raise requests.exceptions.HTTPError() diff --git a/scripts/README.md b/scripts/README.md index 1816ed14f4..562ac77e2e 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -19,7 +19,7 @@ python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn # 1min data (Optional for running non-high-frequency strategies) -python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min +python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min ``` ### Download US Data diff --git a/scripts/data_collector/fund/README.md b/scripts/data_collector/fund/README.md index c729b7eaa7..ca6a47f925 100644 --- a/scripts/data_collector/fund/README.md +++ b/scripts/data_collector/fund/README.md @@ -18,10 +18,10 @@ pip install -r requirements.txt ```bash # download from eastmoney.com -python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d +python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d # normalize -python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ +python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ # dump data cd qlib/scripts diff --git a/scripts/data_collector/fund/collector.py b/scripts/data_collector/fund/collector.py index cecabb33c5..f9d788fd31 100644 --- a/scripts/data_collector/fund/collector.py +++ b/scripts/data_collector/fund/collector.py @@ -279,7 +279,7 @@ def download_data( Examples --------- # get daily data - $ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d + $ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d """ super(Run, self).download_data(max_collector_count, delay, start, end, interval, check_data_length, limit_nums) @@ -296,7 +296,7 @@ def normalize_data(self, date_field_name: str = "date", symbol_field_name: str = Examples --------- - $ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ + $ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ """ super(Run, self).normalize_data(date_field_name, symbol_field_name) diff --git a/scripts/data_collector/yahoo/README.md b/scripts/data_collector/yahoo/README.md index aa358e719f..71f2b75f8e 100644 --- a/scripts/data_collector/yahoo/README.md +++ b/scripts/data_collector/yahoo/README.md @@ -44,17 +44,17 @@ pip install -r requirements.txt - examples: ```bash # cn 1d - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn # cn 1min - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min # us 1d - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1d --region us --interval 1d + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us --interval 1d # us 1min - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1min --region us --interval 1min + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data_1min --region us --interval 1min # in 1d - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1d --region in --interval 1d + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data --region in --interval 1d # in 1min - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1min --region in --interval 1min + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data_1min --region in --interval 1min ``` ### Collector *YahooFinance* data to qlib @@ -77,17 +77,17 @@ pip install -r requirements.txt - examples: ```bash # cn 1d data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN + python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN # cn 1min data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --delay 1 --interval 1min --region CN + python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --delay 1 --interval 1min --region CN # us 1d data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US + python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US # us 1min data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1min --delay 1 --interval 1min --region US + python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data_1min --delay 1 --interval 1min --region US # in 1d data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN + python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN # in 1min data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1min --delay 1 --interval 1min --region IN + python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data_1min --delay 1 --interval 1min --region IN ``` 2. normalize data: `python scripts/data_collector/yahoo/collector.py normalize_data` @@ -115,9 +115,9 @@ pip install -r requirements.txt - examples: ```bash # normalize 1d cn - python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d + python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_data --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d # normalize 1min cn - python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/qlib_cn_1d --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min + python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min ``` 3. dump data: `python scripts/dump_bin.py dump_all` @@ -135,9 +135,9 @@ pip install -r requirements.txt - examples: ```bash # dump 1d cn - python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1d --freq day --exclude_fields date,symbol + python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_data --freq day --exclude_fields date,symbol # dump 1min cn - python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1min --freq 1min --exclude_fields date,symbol + python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/cn_data_1min --freq 1min --exclude_fields date,symbol ``` ### Automatic update of daily frequency data(from yahoo finance) @@ -178,12 +178,12 @@ pip install -r requirements.txt # 1d data cn # freq=day, freq default day - qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1d", region="cn") + qlib.init(provider_uri="~/.qlib/qlib_data/cn_data", region="cn") df = D.features(D.instruments("all"), ["$close"], freq="day") # 1min data cn # freq=1min - qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1min", region="cn") + qlib.init(provider_uri="~/.qlib/qlib_data/cn_data_1min", region="cn") inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True) # get 100 symbols df = D.features(inst[:100], ["$close"], freq="1min") @@ -191,11 +191,11 @@ pip install -r requirements.txt # df = D.features(D.instruments("all"), ["$close"], freq="1min") # 1d data us - qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1d", region="us") + qlib.init(provider_uri="~/.qlib/qlib_data/us_data", region="us") df = D.features(D.instruments("all"), ["$close"], freq="day") # 1min data us - qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1min", region="cn") + qlib.init(provider_uri="~/.qlib/qlib_data/us_data_1min", region="cn") inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True) # get 100 symbols df = D.features(inst[:100], ["$close"], freq="1min") diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 3e8dc7c3f0..594661b400 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -933,7 +933,7 @@ def normalize_data( Examples --------- $ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region cn --interval 1d - $ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min + $ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min """ if self.interval.lower() == "1min": if qlib_data_1d_dir is None or not Path(qlib_data_1d_dir).expanduser().exists(): @@ -974,7 +974,7 @@ def normalize_data_1d_extend( Examples --------- - $ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d + $ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d """ _class = getattr(self._cur_module, f"{self.normalize_class_name}Extend") yc = Normalize(