Please pay ATTENTION that the data is collected from Yahoo Finance and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the related document
Examples of abnormal data
We have considered STOCK PRICE ADJUSTMENT, but some price series seem still very abnormal.
pip install -r requirements.txt
# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn
import qlib
from qlib.data import D
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1d", region="CN")
df = D.features(D.instruments("all"), ["$close"], freq="day")
# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1min
# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1min --freq 1min --exclude_fields date,adjclose,dividends,splits,symbol
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --interval 1min --region cn
import qlib
from qlib.data import D
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1min", region="CN")
df = D.features(D.instruments("all"), ["$close"], freq="1min")
# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --region US --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/us_1d --normalize_dir ~/.qlib/stock_data/source/us_1d_nor --region US --interval 1d
# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_us_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1d --region us
# using
import qlib
from qlib.data import D
qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1d", region="US")
df = D.features(D.instruments("all"), ["$close"], freq="day")
pythono collector.py collector_data --help
- interval: 1min or 1d
- region: CN or US