diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml new file mode 100644 index 0000000000..335dc20934 --- /dev/null +++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml @@ -0,0 +1,81 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + instruments: *market + data_loader: + class: QlibDataLoader + kwargs: + config: + feature: + - ["Resi($close, 15)/$close", "Std(Abs($close/Ref($close, 1)-1)*$volume, 5)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, 5)+1e-12)", "Rsquare($close, 5)", "($high-$low)/$open", "Rsquare($close, 10)", "Corr($close, Log($volume+1), 5)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)", "Corr($close, Log($volume+1), 10)", "Rsquare($close, 20)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)", "Corr($close, Log($volume+1), 20)", "(Less($open, $close)-$low)/$open"] + - ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"] + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + freq: day + + learn_processors: + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: label +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.data.dataset.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 201d2459db..a6ca658d16 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -58,7 +58,7 @@ def __init__( instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, init_data=True, fetch_orig=True, ): @@ -71,7 +71,7 @@ def __init__( start_time of the original data. end_time : end_time of the original data. - data_loader : Tuple[dict, str, DataLoader] + data_loader : Union[dict, str, DataLoader] data loader to load the data. init_data : intialize the original data in the constructor. @@ -282,7 +282,7 @@ def __init__( instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, infer_processors=[], learn_processors=[], process_type=PTYPE_A,