In [None]:
import qlib
import pandas as pd
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

In [None]:
market = "csi300"
benchmark = "SH000300"

In [None]:
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2025-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2025-12-31",
    "instruments": market,
}

segments = {
    "train": ("2008-01-01", "2021-12-31"),
    "valid": ("2022-01-01", "2022-12-31"),
    "test": ("2023-01-01", "2025-08-01"),
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": segments,
        },
    },
}

In [None]:
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
qlib.init(provider_uri=provider_uri, region=REG_CN)

In [None]:
# model initialization
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

In [None]:
dataset.handler.fetch().tail()

In [None]:
# start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id

In [None]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 100,
            "n_drop": 10
        },
    },
    "backtest": {
        "start_time": segments['test'][0],
        "end_time": segments['test'][1],
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

In [None]:
# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "day")
    par.generate()

In [None]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D

recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

In [None]:
analysis_position.report_graph(report_normal_df)

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ["label"]

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

In [None]:
analysis_model.model_performance_graph(pred_label)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO

# 原始字符串
data_str = """topk    n_drop  annualized_return   max_drawdown
10      1       0.206183            -0.199640
20      2       0.176035            -0.165403
30      3       0.086013            -0.129996
40      4       0.122226            -0.118913
50      5       0.126513            -0.082614
60      6       0.100692            -0.072711
70      7       0.094324            -0.066742
80      8       0.092860            -0.061932
90      9       0.092678            -0.055449
100     10      0.088257            -0.051606"""

# 转换为 DataFrame
df = pd.read_csv(StringIO(data_str), sep='\s+')

# 可视化
plt.figure(figsize=(10,6))
plt.plot(df['topk'], df['annualized_return'], marker='o', label='Annualized Return')
plt.plot(df['topk'], df['max_drawdown'], marker='o', label='Max Drawdown')
plt.xlabel('TopK')
plt.ylabel('Value')
plt.title('Annualized Return & Max Drawdown vs TopK')
plt.legend()
plt.grid(True)
plt.show()

import pandas as pd
df
