Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reformat example data names: use {region}_data for 1-day data, and {region}_data_1min for 1-min data #781

Merged
merged 4 commits into from Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/benchmarks/README.md
Expand Up @@ -9,7 +9,7 @@ Here are the results of each benchmark model running on Qlib's `Alpha360` and `A

The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
<!--
> If you need to reproduce the results below, please use the **v1** dataset: `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn --version v1`
> If you need to reproduce the results below, please use the **v1** dataset: `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --version v1`
>
> In the new version of qlib, the default dataset is **v2**. Since the data is collected from the YahooFinance API (which is not very stable), the results of *v2* and *v1* may differ -->

Expand Down
2 changes: 1 addition & 1 deletion examples/highfreq/workflow.py
Expand Up @@ -82,7 +82,7 @@ class HighfreqWorkflow:

def _init_qlib(self):
"""initialize qlib"""
# use yahoo_cn_1min data
# use cn_data_1min data
QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF}
provider_uri = QLIB_INIT_CONFIG.get("provider_uri")
GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN, exists_skip=True)
Expand Down
2 changes: 1 addition & 1 deletion qlib/config.py
Expand Up @@ -240,7 +240,7 @@ def set_conf_from_C(self, config_c):
}

HIGH_FREQ_CONFIG = {
"provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min",
"provider_uri": "~/.qlib/qlib_data/cn_data_1min",
"dataset_cache": None,
"expression_cache": "DiskExpressionCache",
"region": REG_CN,
Expand Down
1 change: 1 addition & 0 deletions qlib/tests/data.py
Expand Up @@ -47,6 +47,7 @@ def _download_data(

url = self.merge_remote_url(file_name, dataset_version)
resp = requests.get(url, stream=True)
resp.raise_for_status()
if resp.status_code != 200:
raise requests.exceptions.HTTPError()

Expand Down
2 changes: 1 addition & 1 deletion scripts/README.md
Expand Up @@ -19,7 +19,7 @@
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

# 1min data (Optional for running non-high-frequency strategies)
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
```

### Download US Data
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/fund/README.md
Expand Up @@ -18,10 +18,10 @@ pip install -r requirements.txt
```bash

# download from eastmoney.com
python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d

# normalize
python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ

# dump data
cd qlib/scripts
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/fund/collector.py
Expand Up @@ -279,7 +279,7 @@ def download_data(
Examples
---------
# get daily data
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
"""

super(Run, self).download_data(max_collector_count, delay, start, end, interval, check_data_length, limit_nums)
Expand All @@ -296,7 +296,7 @@ def normalize_data(self, date_field_name: str = "date", symbol_field_name: str =

Examples
---------
$ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_1d --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
$ python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
"""
super(Run, self).normalize_data(date_field_name, symbol_field_name)

Expand Down
40 changes: 20 additions & 20 deletions scripts/data_collector/yahoo/README.md
Expand Up @@ -44,17 +44,17 @@ pip install -r requirements.txt
- examples:
```bash
# cn 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
# cn 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
# us 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1d --region us --interval 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us --interval 1d
# us 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1min --region us --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data_1min --region us --interval 1min
# in 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1d --region in --interval 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data --region in --interval 1d
# in 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_in_1min --region in --interval 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data_1min --region in --interval 1min
```

### Collector *YahooFinance* data to qlib
Expand All @@ -77,17 +77,17 @@ pip install -r requirements.txt
- examples:
```bash
# cn 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN
# cn 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --delay 1 --interval 1min --region CN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --delay 1 --interval 1min --region CN
# us 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US
# us 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1min --delay 1 --interval 1min --region US
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_data_1min --delay 1 --interval 1min --region US
# in 1d data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region IN
# in 1min data
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_1min --delay 1 --interval 1min --region IN
python collector.py download_data --source_dir ~/.qlib/stock_data/source/in_data_1min --delay 1 --interval 1min --region IN
```
2. normalize data: `python scripts/data_collector/yahoo/collector.py normalize_data`

Expand Down Expand Up @@ -115,9 +115,9 @@ pip install -r requirements.txt
- examples:
```bash
# normalize 1d cn
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_data --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
# normalize 1min cn
python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/qlib_cn_1d --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source/cn_data_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
```
3. dump data: `python scripts/dump_bin.py dump_all`

Expand All @@ -135,9 +135,9 @@ pip install -r requirements.txt
- examples:
```bash
# dump 1d cn
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1d --freq day --exclude_fields date,symbol
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_data --freq day --exclude_fields date,symbol
# dump 1min cn
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1min --freq 1min --exclude_fields date,symbol
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/cn_data_1min --freq 1min --exclude_fields date,symbol
```

### Automatic update of daily frequency data(from yahoo finance)
Expand Down Expand Up @@ -178,24 +178,24 @@ pip install -r requirements.txt

# 1d data cn
# freq=day, freq default day
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1d", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data", region="cn")
df = D.features(D.instruments("all"), ["$close"], freq="day")

# 1min data cn
# freq=1min
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1min", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data_1min", region="cn")
inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True)
# get 100 symbols
df = D.features(inst[:100], ["$close"], freq="1min")
# get all symbol data
# df = D.features(D.instruments("all"), ["$close"], freq="1min")

# 1d data us
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1d", region="us")
qlib.init(provider_uri="~/.qlib/qlib_data/us_data", region="us")
df = D.features(D.instruments("all"), ["$close"], freq="day")

# 1min data us
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1min", region="cn")
qlib.init(provider_uri="~/.qlib/qlib_data/us_data_1min", region="cn")
inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True)
# get 100 symbols
df = D.features(inst[:100], ["$close"], freq="1min")
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/yahoo/collector.py
Expand Up @@ -933,7 +933,7 @@ def normalize_data(
Examples
---------
$ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region cn --interval 1d
$ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min
$ python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source_cn_1min --normalize_dir ~/.qlib/stock_data/normalize_cn_1min --region CN --interval 1min
"""
if self.interval.lower() == "1min":
if qlib_data_1d_dir is None or not Path(qlib_data_1d_dir).expanduser().exists():
Expand Down Expand Up @@ -974,7 +974,7 @@ def normalize_data_1d_extend(

Examples
---------
$ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_1d --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d
$ python collector.py normalize_data_1d_extend --old_qlib_dir ~/.qlib/qlib_data/cn_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN --interval 1d
"""
_class = getattr(self._cur_module, f"{self.normalize_class_name}Extend")
yc = Normalize(
Expand Down