## References
- [`workflow_by_code.ipynb`](https://github.com/microsoft/qlib/blob/v0.9.2/examples/workflow_by_code.ipynb)
- [Converting CSV Format into Qlib Format](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)

In [2]:
import pathlib
import shutil

CSV_PATH = pathlib.Path.home() / ".qlib" / "csv_data" / "bitfinex_data"
PROVIDER_URI = pathlib.Path.home() / ".qlib" / "qlib_data" / "bitfinex_data"

shutil.rmtree(CSV_PATH)
shutil.rmtree(PROVIDER_URI)

In [3]:
import csv
import os
import requests
from time import strftime, localtime


def fetch_bitfinex_candles(csv_path: pathlib.PosixPath, pair: str = "BTCUSD", limit: int = 10000):
    os.makedirs(CSV_PATH, exist_ok=True)
    # See: https://docs.bitfinex.com/reference/rest-public-candles
    url = f"https://api-pub.bitfinex.com/v2/candles/trade:1D:t{pair}/hist?limit={limit}"
    headers = {"accept": "application/json"}
    response = requests.get(url, headers=headers)
    with open(csv_path / f"{pair}.csv", "w") as candles_file:
        candles_writer = csv.writer(candles_file)
        candles_writer.writerow(["MTS", "OPEN", "CLOSE", "HIGH", "LOW", "VOLUME", "IS_MISSING"])
        candles = list(response.json())
        candles.reverse()  # Ascending order
        last_candle = None
        for candle in candles:
            MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000
            if last_candle is not None:
                for t in range(last_candle[0] + MILLISECONDS_IN_DAY, candle[0], MILLISECONDS_IN_DAY):
                    candles_writer.writerow([strftime("%Y-%m-%d", localtime(t // 1000)), *last_candle[1:], True])
            candles_writer.writerow([strftime("%Y-%m-%d", localtime(candle[0] // 1000)), *candle[1:], False])
            last_candle = candle


fetch_bitfinex_candles(CSV_PATH)

In [4]:
!python /usr/src/qlib/scripts/dump_bin.py dump_all --csv_path {CSV_PATH} --qlib_dir {PROVIDER_URI} --date_field_name "MTS"

[32m2023-07-05 14:34:58.034[0m | [1mINFO    [0m | [36m__main__[0m:[36m_get_all_date[0m:[36m275[0m - [1mstart get all date......[0m
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 13.06it/s]
[32m2023-07-05 14:34:58.111[0m | [1mINFO    [0m | [36m__main__[0m:[36m_get_all_date[0m:[36m294[0m - [1mend of get all date.
[0m
[32m2023-07-05 14:34:58.111[0m | [1mINFO    [0m | [36m__main__[0m:[36m_dump_calendars[0m:[36m297[0m - [1mstart dump calendars......[0m
[32m2023-07-05 14:34:58.132[0m | [1mINFO    [0m | [36m__main__[0m:[36m_dump_calendars[0m:[36m300[0m - [1mend of calendars dump.
[0m
[32m2023-07-05 14:34:58.133[0m | [1mINFO    [0m | [36m__main__[0m:[36m_dump_instruments[0m:[36m303[0m - [1mstart dump instruments......[0m
[32m2023-07-05 14:34:58.137[0m | [1mINFO    [0m | [36m__main__[0m:[36m_dump_instruments[0m:[36m305[0m - [1mend of instruments dump.
[0m
[32m2023-07-05 14:34:58.137[0m | [1mINFO   

In [5]:
import qlib
from qlib.constant import REG_US

MARKET = "all"  # instrument
BENCHMARK = ""  # feature

qlib.init(provider_uri=PROVIDER_URI, region=REG_US)

[68325:MainThread](2023-07-05 14:35:10,253) INFO - qlib.Initialization - [config.py:417] - default_conf: client.
[68325:MainThread](2023-07-05 14:35:10,431) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[68325:MainThread](2023-07-05 14:35:10,432) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/root/.qlib/qlib_data/bitfinex_data')}


In [6]:
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.utils import flatten_dict

data_handler_config = {
    "start_time": "2014-01-01",
    "end_time": "2022-12-31",
    "fit_start_time": "2014-01-01",
    "fit_end_time": "2020-12-31",
    "instruments": MARKET,
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": ("2014-01-01", "2020-12-31"),
                "valid": ("2021-01-01", "2021-12-31"),
                "test": ("2022-01-01", "2022-12-31"),
            },
        },
    },
}

# Model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# Start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id

ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)
ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).
ModuleNotFoundError.  PyTorch models are skipped (optional: maybe installing pytorch can fix it).


[68325:MainThread](2023-07-05 14:35:12,644) INFO - qlib.timer - [log.py:128] - Time cost: 0.211s | Loading data Done
[68325:MainThread](2023-07-05 14:35:12,646) INFO - qlib.timer - [log.py:128] - Time cost: 0.001s | DropnaLabel Done
[68325:MainThread](2023-07-05 14:35:14,047) INFO - qlib.timer - [log.py:128] - Time cost: 1.401s | CSZScoreNorm Done
[68325:MainThread](2023-07-05 14:35:14,048) INFO - qlib.timer - [log.py:128] - Time cost: 1.404s | fit & process data Done
[68325:MainThread](2023-07-05 14:35:14,048) INFO - qlib.timer - [log.py:128] - Time cost: 1.615s | Init data Done
[68325:MainThread](2023-07-05 14:35:14,051) INFO - qlib.workflow - [exp.py:258] - Experiment 1 starts running ...
[68325:MainThread](2023-07-05 14:35:14,102) INFO - qlib.workflow - [recorder.py:341] - Recorder 4530c31bb0564024a101d295c128ba7e starts running under Experiment 1 ...


Training until validation scores don't improve for 50 rounds
[20]	train's l2: 0	valid's l2: 0
[40]	train's l2: 0	valid's l2: 0
Early stopping, best iteration is:
[1]	train's l2: 0	valid's l2: 0


[68325:MainThread](2023-07-05 14:35:14,511) INFO - qlib.timer - [log.py:128] - Time cost: 0.171s | waiting `async_log` Done
