In [None]:
import optuna
import pandas as pd

df = pd.read_parquet('cb_data.pq')
index = pd.read_parquet('index.pq')

In [None]:
# 基础设置
start_date = '20220801'  # 开始日期
end_date = '20240325'  # 结束日期

In [None]:
# 参数空间定义
factors = ['pre_close', 'open', 'high', 'low', 'close', 'pct_chg', 'vol',  #7
           'amount', 'volatility_stk', 'mod_conv_prem', 'remain_cap', 'conv_prem',  #12
           'turnover', 'theory_value', 'amount', 'option_value', 'dblow',  #17
           'theory_bias', 'ytm', 'cap_mv_rate', 'pure_value', 'bond_prem',  #22
           'remain_size', 'theory_conv_prem', 'pb', 'pe_ttm', 'ps_ttm']  #27

In [None]:
import itertools
from more_factor_test_origin_code import cal_cagr

def objective(trial, num_factors, hold_num, threshold_num, min, max):
    # 动态选择不同数量的唯一因子
    factor_combinations = []
    for combination in itertools.combinations(range(len(factors)), num_factors):
        factor_combinations.append(list(combination))
    factor_ids = trial.suggest_categorical('factor_ids', factor_combinations)

    rank_factors = []
    for i in range(num_factors):
        factor_info = {
            'name': factors[factor_ids[i]],
            'weight': trial.suggest_categorical(f'factor{i + 1}_weight', [1, 2, 3, 4, 5]),
            'ascending': trial.suggest_categorical(f'factor{i + 1}_ascending', [True, False])
        }
        rank_factors.append(factor_info)

    cagr = cal_cagr(df, start_date, end_date, hold_num, threshold_num, min, max, rank_factors)
    print(rank_factors, cagr)
    return cagr

In [None]:
# 创建一个研究对象并指定TPESampler
study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=1111), direction='maximize')
study.optimize(lambda trial: objective(trial, num_factors=4, hold_num=5, threshold_num=5, min=100, max=150), n_trials=2000)

In [None]:
# 打印最优参数
best_params = study.best_params
best_value = study.best_value
print("最优参数：", best_params)
print("最优参数下的目标函数值：", best_value)

In [None]:
def convert_optuna_result_to_format(result):
    # 从Optuna的结果中提取因子索引
    factor_ids = result['factor_ids']
    # 初始化因子详情列表
    formatted_factors = []

    # 获取因子数量
    num_factors = len(factor_ids)

    # 遍历每个因子的信息，按照Optuna结果进行添加
    for i in range(num_factors):
        factor_info = {
            'name': factors[factor_ids[i]],  # 根据索引获取因子名称
            'weight': result[f'factor{i + 1}_weight'],  # 获取权重
            'ascending': result[f'factor{i + 1}_ascending']  # 获取排序方向
        }
        formatted_factors.append(factor_info)

    return formatted_factors

In [None]:
factor_combination = convert_optuna_result_to_format(best_params)
factor_combination