# 从中间开始的工作流
### 基于前面的训练和预测结果，直接进行回测

In [1]:
import qlib
import pandas as pd
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

from qlib.data import D
import pickle

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


# 从pickle文件创建数据集

In [2]:
provider_uri = "~/.qlib/qlib_data/cn_data"
qlib.init(provider_uri=provider_uri, region=REG_CN)

benchmark = "SH000300" # 基准设为沪深300指数
# 不再需要数据处理器参数配置和任务参数配置了

# 加载数据集
with open("dataset.pkl", "rb") as file_dataset:
    dataset = pickle.load(file_dataset)

[115504:MainThread](2025-12-13 16:26:07,090) INFO - qlib.Initialization - [config.py:452] - default_conf: client.
[115504:MainThread](2025-12-13 16:26:07,094) INFO - qlib.Initialization - [__init__.py:75] - qlib successfully initialized based on client settings.
[115504:MainThread](2025-12-13 16:26:07,095) INFO - qlib.Initialization - [__init__.py:77] - data_path={'__DEFAULT_FREQ': PosixPath('/home/nbcctwya/.qlib/qlib_data/cn_data')}


# 回测实验

In [3]:
with R.start(experiment_name="backtest"):
    # 可以通过experiment_id获取记录器
    # train_recorder = R.get_recorder(experiment_id='1', recorder_id='xxx')
    # 也可以通过以往的实验名和实验运行记录id，加载已经训练好的模型pkl文件。先得到以往的训练实验记录器，再通过它加载训练好的模型文件
    train_recorder = R.get_recorder(experiment_name='train', recorder_id='dbe52f002c4f491aa3308037f274ad99')
    model = train_recorder.load_object("trained_model.pkl")

    # 回测参数配置
    port_analysis_config = {
        "executor": {
            "class": "SimulatorExecutor",
            "module_path": "qlib.backtest.executor",
            "kwargs": {
                "time_per_step": "day",
                "generate_portfolio_metrics": True,
            },
        },
        "strategy": { # 回测策略相关超参数配置
            "class": "TopkDropoutStrategy", # 策略类名称
            "module_path": "qlib.contrib.strategy.signal_strategy",
            "kwargs": {
                # "model": model, # 模型对象
                # "dataset": dataset, # 数据集
                "signal": (model, dataset), # 信号，也可以是pred_df，得到测试集的预测值score
                "topk": 50,
                "n_drop": 5,
                "only_tradable": False, # 此处进行了修改
                "risk_degree": 0.95,
            },
        },
        "backtest":{ # 回测数据参数
            "start_time": "2017-01-01", # test集开始时间
            "end_time": "2020-08-01", # test集结束时间
            "account": 100000000,
            "benchmark": benchmark, # 基准
            "exchange_kwargs": {
                "freq": "day", # 使用日线数据
                "limit_threshold": 0.095, # 涨跌停板幅度
                "deal_price": "close", # 以收盘价成交
                "open_cost": 0.0005, # 开仓佣金费率
                "close_cost": 0.0015, # 平仓佣金费率
                "min_cost": 5, # 一笔交易的最小成本
                "impact_cost": 0.01, # 冲击成本费率，比如因滑点产生的冲击成本
                "trade_unit": 100, # 成交量必须为100股的整数倍
            },
        },
    }

    # 得到以往的预测实验记录器
    predict_recorder = R.get_recorder(experiment_name='predict', recorder_id='1498b86d21ca452abe2e2a7fbac0b1b6')

    # 创建组合分析记录器，其中predict_recorder把预测值和标签值带进来
    pa_rec = PortAnaRecord(predict_recorder, port_analysis_config, "day")
    # 生成回测结果，在测试集上执行策略回测，并记录分析结果到多个pkl文件
    # 保存到predict_recorder对应目录的子目录artifacts\portfolio_analysis
    # 而不是本次实验的目录下
    pa_rec.generate()

    print('predict_recorder.experiment_id', predict_recorder.experiment_id, 'predict_recorder.id', predict_recorder.id)
    print('info', R.get_recorder().info)

[115504:MainThread](2025-12-13 16:26:15,342) INFO - qlib.workflow - [exp.py:258] - Experiment 615977727392102105 starts running ...
[115504:MainThread](2025-12-13 16:26:15,400) INFO - qlib.workflow - [recorder.py:345] - Recorder 36fa5d5328644b1793f787255a304150 starts running under Experiment 615977727392102105 ...
usage: git diff --no-index [<options>] <path> <path>

Diff output format options
    -p, --patch           generate patch
    -s, --no-patch        suppress diff output
    -u                    generate patch
    -U, --unified[=<n>]   generate diffs with <n> lines context
    -W, --function-context
                          generate diffs with <n> lines context
    --raw                 generate the diff in raw format
    --patch-with-raw      synonym for '-p --raw'
    --patch-with-stat     synonym for '-p --stat'
    --numstat             machine friendly --stat
    --shortstat           output only the last line of --stat
    -X, --dirstat[=<param1,param2>...]
          

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)
ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).
ModuleNotFoundError.  PyTorch models are skipped (optional: maybe installing pytorch can fix it).


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

[115504:MainThread](2025-12-13 16:26:15,852) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


backtest loop:   0%|          | 0/871 [00:00<?, ?it/s]

  return np.nanmean(self.data)
  return np.nanmean(self.data)
  return np.nanmean(self.data)
[115504:MainThread](2025-12-13 16:26:40,091) INFO - qlib.workflow - [record_temp.py:515] - Portfolio analysis record 'port_analysis_1day.pkl' has been saved as the artifact of the Experiment 349379308721138139
[115504:MainThread](2025-12-13 16:26:40,100) INFO - qlib.workflow - [record_temp.py:540] - Indicator analysis record 'indicator_analysis_1day.pkl' has been saved as the artifact of the Experiment 349379308721138139


'The following are analysis results of benchmark return(1day).'
                       risk
mean               0.000477
std                0.012295
annualized_return  0.113561
information_ratio  0.598699
max_drawdown      -0.370479
'The following are analysis results of the excess return without cost(1day).'
                       risk
mean               0.000302
std                0.003202
annualized_return  0.071885
information_ratio  1.455039
max_drawdown      -0.075237
'The following are analysis results of the excess return with cost(1day).'
                       risk
mean               0.000106
std                0.003202
annualized_return  0.025324
information_ratio  0.512609
max_drawdown      -0.118296
'The following are analysis results of indicators(1day).'
     value
ffr    1.0
pa     0.0
pos    0.0


[115504:MainThread](2025-12-13 16:26:40,628) INFO - qlib.timer - [log.py:127] - Time cost: 0.000s | waiting `async_log` Done


predict_recorder.experiment_id 349379308721138139 predict_recorder.id 1498b86d21ca452abe2e2a7fbac0b1b6
info {'class': 'Recorder', 'id': '36fa5d5328644b1793f787255a304150', 'name': 'mlflow_recorder', 'experiment_id': '615977727392102105', 'start_time': '2025-12-13 16:26:15', 'end_time': None, 'status': 'RUNNING'}
