In [1]:
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import talib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.metrics import mean_squared_error
def individual(data1):
    #时间升序排列
    data=data1[::-1].reset_index(drop=True)
    trade_date = data['trade_date']
    # 计算EMA值
    ema = talib.EMA(data['close'].values, timeperiod=20)
    # 计算价格波动率
    stddev = talib.STDDEV(data['close'].values, timeperiod=20, nbdev=1)
    # 计算价格斜率
    slope = talib.LINEARREG_SLOPE(data['close'].values, timeperiod=5)
    # 计算RSI值
    rsi = talib.RSI(data['close'].values, timeperiod=14)
    # 计算威廉指标值
    wr = talib.WILLR(data['high'].values, data['low'].values, data['close'].values, timeperiod=7)
    # 计算真实成交量 (Accumulation/Distribution Line, AD)
    ad = talib.AD(data['high'], data['low'], data['close'], data['vol'])
    # 计算成交量变化率指标 (Accumulation/Distribution Oscillator, ADOSC)
    adosc = talib.ADOSC(data['high'], data['low'], data['close'], data['vol'], fastperiod=3, slowperiod=10)
    # 计算累积/派发线 (On Balance Volume, OBV)
    obv = talib.OBV(data['close'], data['vol'])
    # 计算平均趋向指数 (Average Directional Movement Index Rating, ADXR)
    adxr = talib.ADXR(data['high'], data['low'], data['close'], timeperiod=14)
    # 计算成交量震荡指标 (Chaikin Oscillator, CMO)
    cmo = talib.CMO(data['close'], timeperiod=14)
    # 将涨跌幅作为预测标签
    label = data['change']
    # 将计算结果组合成一个DataFrame
    result = pd.DataFrame({
        'trade_date': trade_date,
        'ema': ema,
        'stddev': stddev,
        'slope': slope,
        'rsi': rsi,
        'wr': wr,
        'AD': ad,
        'ADOSC': adosc,
        'OBV': obv,
        'ADXR': adxr,
        'CMO': cmo,
        'label':label
    })
    result.set_index('trade_date',inplace=True)
    # 预测的是第二天的涨跌幅
    result['label'] = result['label'].shift(1)
    result = result.iloc[1:]
    #后向填充缺失值
    result = result.fillna(method='bfill')

    # 选择所有特征列进行归一化
    float_columns = result.drop(['label'],axis=1).select_dtypes(include=['float64','int64'])
    scaler = MinMaxScaler(feature_range=(-1, 1))
    result[float_columns.columns] = scaler.fit_transform(float_columns)
    
    return result

In [3]:
#文件路径
current_directory = os.getcwd()
subdirectory_name = "data"
folder_path = os.path.join(current_directory, subdirectory_name)
file_names = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

all_data1 = pd.DataFrame()
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    data1 = pd.read_csv(file_path)
    data1 = individual(data1)
    all_data1 = pd.concat([all_data1, data1])
all_data1=all_data1.drop('label',axis=1)
#计算市场某天的总体指标（均值实现）
market_avg = all_data1.groupby('trade_date').agg( 'mean')
market_avg

Unnamed: 0_level_0,ema,stddev,slope,rsi,wr,AD,ADOSC,OBV,ADXR,CMO
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
20230104,0.191683,-0.196397,0.201843,0.560198,0.083758,0.405375,0.123062,-0.219746,0.253685,0.560198
20230105,0.191683,-0.196397,0.201843,0.560198,0.083758,0.431917,0.123062,-0.159250,0.253685,0.560198
20230106,0.191683,-0.196397,0.201843,0.560198,0.083758,0.430841,0.123062,-0.148162,0.253685,0.560198
20230109,0.191683,-0.196397,0.201843,0.560198,0.083758,0.440000,0.123062,-0.109222,0.253685,0.560198
20230110,0.191683,-0.196397,0.173426,0.560198,0.083758,0.432012,0.123062,-0.112641,0.253685,0.560198
...,...,...,...,...,...,...,...,...,...,...
20231225,-0.565984,-0.502526,0.036899,-0.281567,0.058926,-0.694837,-0.076102,-0.164853,-0.084177,-0.281567
20231226,-0.578140,-0.528678,0.049065,-0.342204,-0.100674,-0.708294,-0.051449,-0.182510,-0.058449,-0.342204
20231227,-0.585288,-0.549756,0.013373,-0.273284,0.062730,-0.683926,-0.000979,-0.172334,-0.039121,-0.273284
20231228,-0.583042,-0.580414,0.112208,-0.005059,0.509992,-0.631147,0.143122,-0.110193,-0.043804,-0.005059


In [4]:
#定义函数，组合原指标和原指标与市场指标之差
def merge(result):
    result1 = result.drop(['label'],axis=1)
    #原指标与市场指标之差
    data = result1.sub(market_avg)
    #组合
    data = data.add_prefix('sub_')
    data1 = pd.concat([data, result], axis=1)
    #填充缺失值
    data1 = data1.fillna(method='ffill')
    data1 = data1.fillna(method='bfill')
    return data1

In [5]:
#数据集预览
merge(data1)

Unnamed: 0_level_0,sub_ema,sub_stddev,sub_slope,sub_rsi,sub_wr,sub_AD,sub_ADOSC,sub_OBV,sub_ADXR,sub_CMO,...,stddev,slope,rsi,wr,AD,ADOSC,OBV,ADXR,CMO,label
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20230104,-1.191683,-0.715547,-0.387504,-0.531851,-0.769708,-0.269929,-0.096652,-0.670320,-0.659842,-0.531851,...,-0.911944,-0.185661,0.028347,-0.685950,0.135445,0.026411,-0.890066,-0.406156,0.028347,0.40
20230105,-1.191683,-0.715547,-0.387504,-0.531851,-0.769708,-0.274075,-0.096652,-0.740699,-0.659842,-0.531851,...,-0.911944,-0.185661,0.028347,-0.685950,0.157842,0.026411,-0.899949,-0.406156,0.028347,0.38
20230106,-1.191683,-0.715547,-0.387504,-0.531851,-0.769708,-0.305722,-0.096652,-0.765294,-0.659842,-0.531851,...,-0.911944,-0.185661,0.028347,-0.685950,0.125119,0.026411,-0.913456,-0.406156,0.028347,-0.05
20230109,-1.191683,-0.715547,-0.387504,-0.531851,-0.769708,-0.338643,-0.096652,-0.815718,-0.659842,-0.531851,...,-0.911944,-0.185661,0.028347,-0.685950,0.101357,0.026411,-0.924940,-0.406156,0.028347,-0.19
20230110,-1.191683,-0.715547,-0.394493,-0.531851,-0.769708,-0.343749,-0.096652,-0.804011,-0.659842,-0.531851,...,-0.911944,-0.221067,0.028347,-0.685950,0.088263,0.026411,-0.916652,-0.406156,0.028347,-0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20231225,0.960968,-0.379098,-0.353562,-0.305725,-0.925035,-0.080881,0.204417,1.081340,-0.881705,-0.305725,...,-0.881623,-0.316663,-0.587292,-0.866109,-0.775718,0.128315,0.916487,-0.965882,-0.587292,0.22
20231226,0.950272,-0.331152,-0.527711,-0.450757,-0.870256,-0.109944,0.093273,1.084030,-0.853356,-0.450757,...,-0.859830,-0.478646,-0.792961,-0.970930,-0.818238,0.041824,0.901520,-0.911805,-0.792961,-1.78
20231227,0.937420,-0.285043,-0.553095,-0.503980,-0.703124,-0.119124,0.029195,1.089623,-0.804022,-0.503980,...,-0.834799,-0.539721,-0.777264,-0.640394,-0.803050,0.028215,0.917289,-0.843143,-0.777264,-1.16
20231228,0.930841,-0.255065,-0.481980,-0.476119,-0.549400,-0.132969,-0.068349,1.047448,-0.786763,-0.476119,...,-0.835479,-0.369772,-0.481178,-0.039409,-0.764115,0.074773,0.937255,-0.830567,-0.481178,0.06


In [6]:
def train(result,buy_parameter,sale_parameter):
    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(result.drop('label',axis=1), result['label'] , test_size=0.2, random_state=42)
    #使用xgb进行回归
    model = xgb.XGBRegressor(objective ='reg:squarederror', random_state=42)

    # 在训练集上拟合模型
    model.fit(X_train, y_train)

    # 在训练集和测试集上进行预测
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    #设置购入条件，预测结果大于等于buy_parameter即可购入，实际为涨即算成功
    buy_indices = (y_test_pred >= buy_parameter)
    if not buy_indices.any():
        print('    没有购入条件')
    else:
        success_indices = (y_test > 0)
        success_rate = success_indices[buy_indices].sum() / buy_indices.sum()
        print('    多头成功率 ：'+'{:.5f}'.format(success_rate))
    
    #同理设置抛出条件
    sale_indices = (y_test_pred <= sale_parameter)
    if not sale_indices.any():
        print('    没有抛出条件')
    else:
        success_indices1 = (y_test < 0)
        success_rate1 = success_indices1[sale_indices].sum() / sale_indices.sum()
        print('    空头成功率 ：'+'{:.5f}'.format(success_rate1))

In [7]:
#根据投资策略设定参数（如预测涨幅大于0.2时可考虑买入，跌幅大于0.1时可考虑抛出）
buy_parameter = 0.2     
sale_parameter = -0.1

for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    data2 = pd.read_csv(file_path)
    print('股票'+file_name[:-4])
    data3 = merge(individual(data2))
    train(data3,buy_parameter,sale_parameter)

股票000001.SZ
    多头成功率 ：0.50000
    空头成功率 ：0.62500
股票000002.SZ
    没有购入条件
    空头成功率 ：0.60870
股票000063.SZ
    多头成功率 ：0.50000
    空头成功率 ：0.69231
股票000069.SZ
    没有购入条件
    空头成功率 ：1.00000
股票000100.SZ
    没有购入条件
    空头成功率 ：1.00000
股票000157.SZ
    多头成功率 ：0.66667
    空头成功率 ：0.50000
股票000166.SZ
    多头成功率 ：1.00000
    空头成功率 ：0.50000
股票000301.SZ
    多头成功率 ：0.66667
    空头成功率 ：0.80000
股票000333.SZ
    多头成功率 ：0.80000
    空头成功率 ：0.64000
股票000338.SZ
    多头成功率 ：0.80000
    空头成功率 ：0.53846
股票000408.SZ
    多头成功率 ：0.57143
    空头成功率 ：0.43750
股票000425.SZ
    多头成功率 ：0.66667
    空头成功率 ：0.44444
股票000538.SZ
    多头成功率 ：0.62500
    空头成功率 ：0.64286
股票000568.SZ
    多头成功率 ：0.53333
    空头成功率 ：0.70968
股票000596.SZ
    多头成功率 ：0.68421
    空头成功率 ：0.71429
股票000617.SZ
    多头成功率 ：1.00000
    空头成功率 ：0.60000
股票000625.SZ
    多头成功率 ：0.77778
    空头成功率 ：0.60000
股票000651.SZ
    多头成功率 ：0.33333
    空头成功率 ：0.65385
股票000661.SZ
    多头成功率 ：0.66667
    空头成功率 ：0.64000
股票000708.SZ
    多头成功率 ：1.00000
    空头成功率 ：0.70588
股票000723.SZ
    没有购入条件
 

    多头成功率 ：0.37500
    空头成功率 ：0.66667
股票600489.SH
    多头成功率 ：0.50000
    空头成功率 ：0.62500
股票600515.SH
    多头成功率 ：0.00000
    没有抛出条件
股票600519.SH
    多头成功率 ：0.57143
    空头成功率 ：0.66667
股票600547.SH
    多头成功率 ：0.60000
    空头成功率 ：0.66667
股票600570.SH
    多头成功率 ：0.44444
    空头成功率 ：0.66667
股票600584.SH
    多头成功率 ：0.93333
    空头成功率 ：0.55556
股票600585.SH
    多头成功率 ：0.33333
    空头成功率 ：0.50000
股票600588.SH
    多头成功率 ：0.75000
    空头成功率 ：0.66667
股票600600.SH
    多头成功率 ：0.64706
    空头成功率 ：0.84615
股票600606.SH
    没有购入条件
    没有抛出条件
股票600660.SH
    多头成功率 ：0.87500
    空头成功率 ：0.83333
股票600674.SH
    多头成功率 ：1.00000
    空头成功率 ：0.33333
股票600690.SH
    多头成功率 ：0.50000
    空头成功率 ：0.78947
股票600732.SH
    多头成功率 ：0.70000
    空头成功率 ：0.56667
股票600741.SH
    多头成功率 ：0.71429
    空头成功率 ：0.60000
股票600745.SH
    多头成功率 ：0.66667
    空头成功率 ：0.62500
股票600754.SH
    多头成功率 ：0.42857
    空头成功率 ：0.66667
股票600760.SH
    多头成功率 ：0.57895
    空头成功率 ：0.52941
股票600763.SH
    多头成功率 ：0.70588
    空头成功率 ：0.62069
股票600795.SH
    没有购入条件
    没有抛出条件
股票