Skip to content

Commit

Permalink
Reformat example data names: use {region}_data for 1-day data, and …
Browse files Browse the repository at this point in the history
…`{region}_data_1min` for 1-min data (#781)

* Fix high-freq data name from `yahoo_cn_1min` to `cn_data_1min`

* re-format example data names using `qlib_{region}_{feq}`, e.g. qlib_cn_1d

* re-format example data names using `{region}_{feq}`, e.g. us_1d and cn_1min

* keep using  for 1day data, and change 1min data to
  • Loading branch information
cning112 committed Dec 28, 2021
1 parent aed9c09 commit 6bafd0a
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 30 deletions.
2 changes: 1 addition & 1 deletion examples/benchmarks/README.md
Expand Up @@ -9,7 +9,7 @@ Here are the results of each benchmark model running on Qlib's `Alpha360` and `A

The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
<!--
> If you need to reproduce the results below, please use the **v1** dataset: `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn --version v1`
> If you need to reproduce the results below, please use the **v1** dataset: `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --version v1`
>
> In the new version of qlib, the default dataset is **v2**. Since the data is collected from the YahooFinance API (which is not very stable), the results of *v2* and *v1* may differ -->

Expand Down
2 changes: 1 addition & 1 deletion examples/highfreq/workflow.py
Expand Up @@ -82,7 +82,7 @@ class HighfreqWorkflow:

def _init_qlib(self):
"""initialize qlib"""
# use yahoo_cn_1min data
# use cn_data_1min data
QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF}
provider_uri = QLIB_INIT_CONFIG.get("provider_uri")
GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN, exists_skip=True)
Expand Down
2 changes: 1 addition & 1 deletion qlib/config.py
Expand Up @@ -240,7 +240,7 @@ def set_conf_from_C(self, config_c):
}

HIGH_FREQ_CONFIG = {
"provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min",
"provider_uri": "~/.qlib/qlib_data/cn_data_1min",
"dataset_cache": None,
"expression_cache": "DiskExpressionCache",
"region": REG_CN,
Expand Down
1 change: 1 addition & 0 deletions qlib/tests/data.py
Expand Up @@ -47,6 +47,7 @@ def _download_data(

url = self.merge_remote_url(file_name, dataset_version)
resp = requests.get(url, stream=True)
resp.raise_for_status()
if resp.status_code != 200:
raise requests.exceptions.HTTPError()

Expand Down
2 changes: 1 addition & 1 deletion scripts/README.md
Expand Up @@ -19,7 +19,7 @@
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

# 1min data (Optional for running non-high-frequency strategies)
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
```

### Download US Data
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/fund/README.md
Expand Up @@ -18,10 +18,10 @@ pip install -r requirements.txt
```bash

# download from eastmoney.com
python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d

# normalize
python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ

# dump data
cd qlib/scripts
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/fund/collector.py
Expand Up @@ -279,7 +279,7 @@ def download_data(
Examples
---------
# get daily data
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
"""

super(Run, self).download_data(max_collector_count, delay, start, end, interval, check_data_length, limit_nums)
Expand All @@ -296,7 +296,7 @@ def normalize_data(self, date_field_name: str = "date", symbol_field_name: str =
Examples
---------
$ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
$ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
"""
super(Run, self).normalize_data(date_field_name, symbol_field_name)

Expand Down
40 changes: 20 additions & 20 deletions scripts/data_collector/yahoo/README.md
Expand Up @@ -44,17 +44,17 @@ pip install -r requirements.txt
- examples:
```bash
# cn 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
# cn 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
# us 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1d --region us --interval 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us --interval 1d
# us 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1min --region us --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data_1min --region us --interval 1min
# in 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1d --region in --interval 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data --region in --interval 1d
# in 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1min --region in --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data_1min --region in --interval 1min
```

### Collector *YahooFinance* data to qlib
Expand All @@ -77,17 +77,17 @@ pip install -r requirements.txt
- examples:
```bash
# cn 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN
# cn 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --delay 1 --interval 1min --region CN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --delay 1 --interval 1min --region CN
# us 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US
# us 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1min --delay 1 --interval 1min --region US
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data_1min --delay 1 --interval 1min --region US
# in 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN
# in 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1min --delay 1 --interval 1min --region IN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data_1min --delay 1 --interval 1min --region IN
```
2. normalize data: `python scripts/data_collector/yahoo/collector.py normalize_data`

Expand Down Expand Up @@ -115,9 +115,9 @@ pip install -r requirements.txt
- examples:
```bash
# normalize 1d cn
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_data --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
# normalize 1min cn
python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/qlib_cn_1d --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
```
3. dump data: `python scripts/dump_bin.py dump_all`

Expand All @@ -135,9 +135,9 @@ pip install -r requirements.txt
- examples:
```bash
# dump 1d cn
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1d --freq day --exclude_fields date,symbol
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_data --freq day --exclude_fields date,symbol
# dump 1min cn
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1min --freq 1min --exclude_fields date,symbol
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/cn_data_1min --freq 1min --exclude_fields date,symbol
```

### Automatic update of daily frequency data(from yahoo finance)
Expand Down Expand Up @@ -178,24 +178,24 @@ pip install -r requirements.txt

# 1d data cn
# freq=day, freq default day
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1d", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data", region="cn")
df = D.features(D.instruments("all"), ["$close"], freq="day")

# 1min data cn
# freq=1min
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1min", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data_1min", region="cn")
inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True)
# get 100 symbols
df = D.features(inst[:100], ["$close"], freq="1min")
# get all symbol data
# df = D.features(D.instruments("all"), ["$close"], freq="1min")

# 1d data us
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1d", region="us")
qlib.init(provider_uri="~/.qlib/qlib_data/us_data", region="us")
df = D.features(D.instruments("all"), ["$close"], freq="day")

# 1min data us
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1min", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/us_data_1min", region="cn")
inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True)
# get 100 symbols
df = D.features(inst[:100], ["$close"], freq="1min")
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/yahoo/collector.py
Expand Up @@ -933,7 +933,7 @@ def normalize_data(
Examples
---------
$ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region cn --interval 1d
$ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min
$ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min
"""
if self.interval.lower() == "1min":
if qlib_data_1d_dir is None or not Path(qlib_data_1d_dir).expanduser().exists():
Expand Down Expand Up @@ -974,7 +974,7 @@ def normalize_data_1d_extend(
Examples
---------
$ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d
$ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d
"""
_class = getattr(self._cur_module, f"{self.normalize_class_name}Extend")
yc = Normalize(
Expand Down

0 comments on commit 6bafd0a

Please sign in to comment.