## 打板策略 + 期权敲敲乐

In [1]:
import sys
sys.path.append('..')

import warnings

import numpy as np
import pandas as pd 
import polars as pl
import quantstats as qs
from QHData.query import Query
from secuntial import Sequential
from QHFactor.transformer import Load_X_y

from estimator import LGBMRollingClassifier
from features_selector import LGBMFeatureSelector
from QHData.data_load import DataLoader,CodesLoader
from sklearn.model_selection import train_test_split

qs.extend_pandas()
warnings.filterwarnings('ignore')

No module named 'wtpy'
Error importing QUANTAXIS module: No module named 'QUANTAXIS'


In [5]:
# 1 查询数据

seq = Sequential(name='STK_UPLIMIT500_1W')  # 初始化交易框架

q = Query()
# --------------------------------------------------------------------------------------
code_loader = CodesLoader()
code_hs300 = code_loader.load_stk_codes('hs300.json')       # 沪深300
code_zz500 = code_loader.load_stk_codes('zz500.json')       # 中证500成分股
# code_zz1000 = code_loader.load_stk_codes('zz1000.json')     # 中证1000成分股 

df_stocks = q.fetch_stock_list()    # 获取全市场股票信息
filter_codes = df_stocks[~df_stocks.name.str.contains('ST')].code.tolist()   # 过滤掉*ST/ST的股票

# --------------------------------------------------------------------------------------

data_loader = DataLoader(start_date='2020-01-01',end_date='2025-03-09',freq='weekly')
stk_data,returns = data_loader.load_stock(codes=code_zz500,return_X_y=True)     # 股票数据   
bench_market_data = data_loader.load_bench_market('000300')                     # 市场基准

# 2、计算因子
factors = seq.calculate_factors(stk_data, window=24)    # 计算因子

# 构建标签，每天前3%的票
labels = returns.groupby(level=0,group_keys=False).apply(lambda x:(x > np.percentile(x, 97)).astype(int))

[2025-03-12 22:55:18 - INFO] Model name: STK_UPLIMIT300_1W
[2025-03-12 22:55:18 - INFO] Training started at 2025-03-12 22:55:18
[2025-03-12 22:55:47 - INFO] # 2. calculate factors
[2025-03-12 22:55:47 - INFO] Factor calculation and processing started
Processing stocks: 100%|██████████| 300/300 [00:28<00:00, 10.46it/s]
[2025-03-12 22:56:42 - INFO] Factors calculated.
[2025-03-12 22:56:42 - INFO] Factors shape: (77099, 171)
[2025-03-12 22:56:42 - INFO] Number of factors: 171
[2025-03-12 22:56:42 - INFO] Factor types: {'alpha_001': dtype('float32'), 'alpha_002': dtype('float32'), 'alpha_003': dtype('float32'), 'alpha_004': dtype('float32'), 'alpha_005': dtype('float32'), 'alpha_006': dtype('float32'), 'alpha_007': dtype('float32'), 'alpha_008': dtype('float32'), 'alpha_009': dtype('float32'), 'alpha_010': dtype('float32'), 'alpha_011': dtype('float32'), 'alpha_012': dtype('float32'), 'alpha_013': dtype('float32'), 'alpha_014': dtype('float32'), 'alpha_015': dtype('float32'), 'alpha_016': 

In [None]:
# 3 特征选择

X,y = Load_X_y().transform(factors.fillna(0),labels) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)

features = seq.select_features(LGBMFeatureSelector(model_type='clf',n=20), X_train,y_train)

In [None]:
## 4 模型训练

# 构建标签正样本特征
# labels = mm_returns.groupby(level=1, group_keys=False).apply(lambda x: ((x.shift(1) < 0.9) & (x > 0.95))).astype(int)

# 获取正样本索引
uplimit_index = labels[labels.values > 0].index

# 重采样目标函数（样本均衡）
uplimit_returns = labels[returns.index.isin(uplimit_index)]
# 随机抽取负样本，确保样本均衡
samples_returns = labels[~returns.index.isin(uplimit_index)].sample(n=len(uplimit_returns), random_state=42).sort_index()

# 合并新的目标函数
targets = pd.concat([uplimit_returns, samples_returns], axis=0).sort_index()

# 构建新的x,y
train_x, train_y = Load_X_y().transform(features, targets)

# 训练模型
model = seq.train_model(LGBMRollingClassifier, train_x, train_y)


In [None]:
# 6、模型跟踪
config_data = {
    'features': features.columns.tolist()
}

# local_uri = 'http://192.168.215.4:5115'
remote_uri = 'http://192.168.31.220:5115'

run_id = seq.track_model(
    track_ui=remote_uri,
    exp_name=seq.name,
    model=model,
    params=model.get_params(),
    config=config_data,
    # metrics=performence,
    tags={'model':{seq.name}},
    # image=f'{seq.name}_performence.png',
)

# 跟新run_id到本地
seq.update_run_ids(model_name=seq.name,run_id=run_id)

In [6]:

# 6.加载模型

# 获取模型
from QHMlflow.mlflowtracker import load_model_and_config

remote_uri = 'http://192.168.31.220:5115'

# 加載模型和配置文件
run_ids = seq.load_run_ids()
model,config_file = load_model_and_config(remote_tracking_uri=remote_uri,run_id=run_ids[seq.name]) 

predict_data = factors.loc[['2025-03-09'],config_file['features']] # 待预测数据

# 预测结果
predict_result = pd.DataFrame(index=predict_data.index,data=np.round(model.predict_proba(predict_data),4),columns=[0,1])

#选择股票
selected_codes = predict_result.loc[(predict_result[1]>0.5),1].groupby(level=0,group_keys=False).apply(lambda x:x.sort_values(ascending=False).head(5))
print(selected_codes.reset_index())

selected_codes.to_csv(f'positions/{seq.name}.csv')



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

        date    code       1
0 2025-03-09  688396  0.5519
1 2025-03-09  600845  0.5479
2 2025-03-09  600760  0.5461
3 2025-03-09  000876  0.5459
4 2025-03-09  601360  0.5457


In [None]:
pred = pd.DataFrame(index=factors.index,data=model.predict_proba(factors[config_file['features']]),columns=[0,1])

sy = Load_X_y().transform(returns,pred.loc[(pred[1] > 0.5),1].groupby(level=0,group_keys=False).apply(lambda x:x.sort_values(ascending=False).head(10)))[0]
# df = sy.apply(lambda x:-0.04 if x < -0.04 else x).groupby(level=0).mean().cumsum().plot()
pf_returns = sy.apply(lambda x: -0.04 if x < -0.04 else x).groupby(level=0).mean()

# 使用 quantstats 进行分析
bench_market_returns = bench_market_data.close.pct_change().droplevel(1).dropna()
qs.reports.full(returns=pf_returns,benchmark=bench_market_returns,rf=0.02,periods_per_year=52)
qs.reports.html(returns=pf_returns,benchmark=bench_market_returns,rf=0.02,periods_per_year=52)
 

In [None]:
import duckdb

# 设置union_by_name=True来处理schema不匹配的问题 
duckdb.sql("""
    select *from read_csv_auto('positions/*_1W.csv', union_by_name=true)
""").show()