In [None]:

import tushare as ts
import pandas as pd

def get_data(code, start_date, end_date):
    #数据准备/data preparation 
    #变量选取Open,High,Low,Close,Volume等，以浦发银行股票为例

    pro = ts.pro_api('5ae1c2cea081bde0fb95295e7bfa302c5b87cc7bb967ccd5c3cee38a') #token可以在新版tushare的网站上找到
    stock_data = pro.query('daily', ts_code = code, start_date = start_date, end_date = end_date)
    stock_data = stock_data[::-1] #倒序，使日期靠前的排在前面
    stock_data.reset_index(drop=True, inplace=True) #把每行的索引改为“0、1、2……”
    
    global last_open #在这里获得开盘价格，全局变量可以让其他函数使用
    last_open=stock_data.open[len(stock_data)-1]

    return stock_data



import numpy as np
from sklearn.preprocessing import MinMaxScaler

def preprocess(stock_data, day, seq_length, data_dim, output_dim, visual_window):
    xy = stock_data[['open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount']] #选取需要的features
    xy = np.array(xy.values) #转为array

    dataXY = []
    for i in range(0, len(xy) - seq_length - day + 1):
        _xy = xy[i:i + seq_length + day] #包括用于计算的seq_length天的数据，以及day天后的价格
        dataXY.append(_xy)

    #调整数据的shape
    xy_real = np.vstack(dataXY).reshape(-1, seq_length + day, data_dim)
    dataXY = xy_real

    app_dataX = []
    for i in range(len(xy) - seq_length - day + 1, len(xy) - seq_length + 1):
        _x = xy[i:i + seq_length] #包括用于计算的seq_length天的数据
        app_dataX.append(_x)

    #调整数据的shape
    x_real = np.vstack(app_dataX).reshape(-1, seq_length, data_dim)
    app_dataX = x_real

    xy_visual = np.copy(dataXY[- visual_window:]) #取最近visual_window天的数据，用于最后的画图

    np.random.shuffle(dataXY) #打乱顺序

    #切分训练集合测试集/split to train and testing
    train_size = int(len(dataXY) * 0.7) #训练集长度
    test_size = len(dataXY) - train_size #测试集长度
    xy_train, xy_test = np.array(dataXY[0:train_size]), np.array(dataXY[train_size:len(dataXY)]) #划分训练集、测试集

    #先处理训练集的数据
    scaler = MinMaxScaler()
    xy_train = xy_train.reshape((-1, data_dim)) #先变成2维，才能transform
    xy_train_new = scaler.fit_transform(xy_train) #预处理，按列操作，每列最小值为0，最大值为1
    xy_train_new = xy_train_new.reshape((-1, seq_length + day, data_dim)) #变回3维

    x_new = xy_train_new[:,0:seq_length] #features
    y_new = xy_train_new[:,-1,1] * 10 #取最后一天的收盘价，用作label，适当放大，便于训练

    trainX, trainY = x_new, y_new

    #然后处理测试集的数据
    xy_test = xy_test.reshape((-1, data_dim))
    xy_test_new = scaler.transform(xy_test) #使用训练集的scaler预处理测试集的数据
    xy_test_new = xy_test_new.reshape((-1, seq_length + day, data_dim))

    x_new = xy_test_new[:, 0:seq_length]
    y_new = xy_test_new[:, -1, 1] * 10

    #以下3项用于计算收入
    close_price = xy_test_new[:, seq_length - 1, 1]
    buy_price = xy_test_new[:, seq_length, 0]
    sell_price = xy_test_new[:, -1, 1]

    testX, testY, test_close, test_buy, test_sell = x_new, y_new, close_price, buy_price, sell_price

    #再处理应用集
    x_app = app_dataX.reshape((-1, data_dim))
    appX = scaler.transform(x_app) #用训练集的scaler进行预处理
    appX = appX.reshape((-1, seq_length, data_dim))

    #最后处理用于画图的数据
    xy_visual = xy_visual.reshape((-1, data_dim))
    xy_visual_new = scaler.transform(xy_visual) #使用训练集的scaler预处理
    xy_visual_new = xy_visual_new.reshape((-1, seq_length + day, data_dim))

    x_new = xy_visual_new[:, 0:seq_length]
    y_new = xy_visual_new[:, -1, 1] * 10

    visualX, visualY = x_new, y_new

    return trainX, trainY, testX, testY, appX, scaler, test_close, test_buy, test_sell, visualX, visualY


from keras.layers import Input, Dense, LSTM, Reshape
from keras.models import Model
from keras import regularizers, callbacks

def train(code, day, trainX, trainY, seq_length, data_dim, output_dim):
    # 构建神经网络层 1层Dense层+1层LSTM层+4层Dense层

    rnn_units = 32
    Dense_input = Input(shape=(seq_length, data_dim), name='dense_input') #输入层
    #shape: 形状元组（整型）不包括batch size。表示了预期的输入将是一批（seq_len,data_dim）的向量。
    
    Dense_output_1 = Dense(rnn_units, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense1')(Dense_input) #全连接网络

    lstm_input = Reshape(target_shape=(seq_length, rnn_units), name='reshape2')(Dense_output_1) 
    #改变Tensor形状，改变后是（None，seq_length, rnn_units）

    lstm_output = LSTM(rnn_units, activation='tanh', dropout=1.0, name='lstm')(lstm_input) #LSTM网络
    #units: Positive integer,dimensionality of the output space.
    #dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs.

    Dense_input_2 = Reshape(target_shape=(rnn_units,), name='reshape3')(lstm_output) 
    #改变Tensor形状，改变后是（None，rnn_units）

    Dense_output_2 = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense2')(Dense_input_2) #全连接网络
    Dense_output_3 = Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense3')(Dense_output_2) #全连接网络
    Dense_output_4 = Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense4')(Dense_output_3) #全连接网络
    predictions = Dense(output_dim, activation=None, kernel_regularizer=regularizers.l2(0.0), name='dense5')(Dense_output_4) #全连接网络

    model = Model(inputs=Dense_input, outputs=predictions)
    #This model will include all layers required in the computation of output given input.

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    #Configures the model for training.
    #optimizer: String (name of optimizer) or optimizer instance. See optimizers.
    #loss: String (name of objective function) or objective function.The loss value will be minimized by the model.
    #metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use  metrics=['accuracy'].

    ES = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None)

    model.fit(trainX, trainY, batch_size=256, epochs=400, verbose=0, callbacks=[ES], validation_split=0.1)
    #Trains the model for a given number of epochs (iterations on a dataset).
    #verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

    # 保存模型
    model.save(code + '(1)' + str(day) + '.h5')   # HDF5文件，pip install h5py

    return model
    

def test(model, testX, testY, scaler, day, close_price, buy_price, sell_price, visualX, visualY):

    testPredict = model.predict(testX) #查看测试结果
    testPredict2 = testPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    testY2 = testY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算

    #以下3项用于计算收入
    #今天的收盘价，用于判断买不买
    close_price2 = close_price * scaler.data_range_[1] + scaler.data_min_[1]

    #明天的开盘价，如果买需要付多少钱
    buy_price2 = buy_price * scaler.data_range_[0] + scaler.data_min_[0]

    #持有day天之后的收盘价，这时卖能卖多少钱
    sell_price2 = sell_price * scaler.data_range_[1] + scaler.data_min_[1]

    #平均误差（%）
    mean_error = np.mean(abs(testPredict2 - testY2) / testY2 * 100)
    mean_error = round(mean_error, 2)
#    print('平均误差（%）：', mean_error)

    #最大误差（%）
    max_error = np.max(abs(testPredict2 - testY2) / testY2 * 100)
    max_error = round(max_error, 2)
#    print('最大误差（%）：', max_error)

    count = 0 #绝对误差小于1%的比例
    correct = np.zeros(len(testPredict2)) #预测涨或跌的正确率
    model_income = 0 #模型能挣多少钱
    trade = 0 #计算交易频率
    max_income = 0 #最理想的状况下，能挣多少钱
    random_income = 0 #随机购买，能挣多少钱

    tolerance = 1

    for i in range(len(testY2)):
        #计算绝对误差小于 tolerance% 的比例
        if abs(testPredict2[i] - testY2[i]) / testY2[i] * 100 <= tolerance:
            count += 1

        #计算对转折点的预测正确率
        if np.sign(testPredict2[i] - close_price2[i]) == np.sign(testY2[i] - close_price2[i]):
            #如果对涨或跌的判断准确，这里用正负符号判断
            correct[i] = 1 #就加1

        #如果对“day”天后的预测价格高于今天的收盘价，就买进并持有“day”天，计算能挣多少钱
        if testPredict2[i] > close_price2[i]:
            model_income = model_income + sell_price2[i] - buy_price2[i]
            trade += 1

        #最理想的状况下，能挣多少钱
        if testY2[i] > close_price2[i]:
            max_income = max_income + sell_price2[i] - buy_price2[i]

        #随机购买，能挣多少钱
        buy = np.random.randint(0, 2) #随机产生0或1
        if buy: #如果是1就买
            random_income = random_income + sell_price2[i] - buy_price2[i]

    count = count / len(testY2) * 100
    count = round(count, 2)
#    print('误差小于' + str(tolerance) + '%的比例：', count)

    accuracy = np.sum(correct) / len(correct) * 100
    accuracy = round(accuracy, 2)
#    print('预测涨或跌的正确率：', accuracy)

#    print('模型的购买策略是，如果对%d天之后的预测值大于今天的收盘价，就在明天开市时买进1股，并且持有%d天，再卖出'%(day, day))

    frequency = trade / len(testPredict2) * 100
    model_income = round(float(model_income), 2)
    frequency = round(frequency, 2)
#    print('在%d天中，模型交易了%d次，交易频率为%g'%(len(testPredict2), trade, frequency) + '%')
#    print('按照模型进行操作所得的收入：', model_income)

    max_income = round(float(max_income), 2)
#    print('最理想状况下的收入：', max_income)

    random_income = round(float(random_income), 2)
#    print('随机购买的收入：', random_income)

    visualPredict = model.predict(visualX)
    visualPredict2 = visualPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    visualY2 = visualY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算

    #结合误差小于1%的比例、准确率、以及收入金额，计算值得买入权重
    global _p
    global _count
    global _accuracy
    global _model_income
    global _last_open

    _p = (model_income/last_open)*((count+accuracy)/200)
    _count = count
    _accuracy = accuracy
    _model_income = model_income
    _last_open = last_open
    
    return visualY2, visualPredict2

    

def apply(model, appX, scaler):
     #查看应用结果
    appPredict = model.predict(appX)
    appPredict2 = appPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    appPredict2 = appPredict2.reshape(-1)

    return appPredict2
    

    
import matplotlib.pyplot as plt

def visualize(visualY2, visualPredict2, appPredict2, visual_window, day):

    plt.figure(figsize=(16,8)) #画布大小
    plt.plot(list(range(len(visualY2))), visualY2, color='blue') #只显示最近“period”天的测试记录
    plt.plot(list(range(len(visualPredict2))), visualPredict2, color='orange')

    plt.scatter(
        list(range(len(visualPredict2), len(visualPredict2) + len(appPredict2))), appPredict2, color='red')    

    plt.xlim((0, visual_window + day)) #x坐标范围
    plt.legend(['True price', 'Model result', 'Prediction'], loc='upper left')
    plt.ylabel('price')
    plt.xlabel('time')

    plt.show()

    

import time
import datetime
import sys as sys
import gc

def main(code,day): #算法主函数
#    code = input("请输入6位代码：") #输入股票代码
#    code = code + '.SH'
#    day = input("请输入预测天数：") #输入预测多少天后的价格
#    day = int(day)
 
#    date = time.strftime('%Y%m%d',time.localtime(time.time())) #获取当天日期
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(days=days) 
    end_date = end_date.strftime('%Y%m%d')
    start_date = start_date.strftime('%Y%m%d')

    #参数设置/parameter setting
    timesteps = seq_length = 20 #时间窗/window length
    data_dim = 7 #输入数据维度/dimension of input data
    output_dim = 1 #输出数据维度/dimension of output data
    visual_window = 200

    try:
        stock_data = get_data(code, start_date, end_date)
    except:
        print('代码不正确或无法获得该股票的数据')
        return

    if len(stock_data) == 0:
        print('代码不正确或无法获得该股票的数据')
        return

    trainX, trainY, testX, testY, appX, scaler, test_close, test_buy, test_sell, visualX, visualY = preprocess(
        stock_data, day, seq_length, data_dim, output_dim, visual_window)

    try:
        # 载入模型
        from keras.models import load_model
        model = load_model(code + '(1)' + str(day) + '.h5')
    except:
        print('第一次预测%d天内'%(day) + code + '的估价，需要一点时间建模')
        model = train(
            code, day, trainX, trainY, seq_length, data_dim, output_dim)

    visualY2, visualPredict2 = test(model, testX, testY, scaler, day, test_close, test_buy, test_sell, visualX, visualY)
#    appPredict2 = apply(model, appX, scaler)
#    visualize(visualY2, visualPredict2, appPredict2, visual_window, day)


#此为增加内容：
    _dict = { #这是需要统计的数据
        'ts_code' : code,
        'predict_buy' : _p,
        'count' : _count,
        'accuracy' : _accuracy,
        'model_income' : _model_income,
        'last_open' : _last_open
    }        
#    print(_dict)

    df = pd.DataFrame([_dict])

        
#保存文件。运算量很大，为防止中途丢失数据而从头开始，每只股票运算完毕后保存一次数据。
    if file_exit == True:
        df.to_csv(file_name,mode='a',header=False)
    else:
        df.to_csv(file_name)
        
        
    for x in locals().keys():
        del locals()[x]
    gc.collect()


            
import tushare as ts
import os

def acq_code(ty): #获得需要计算的股票代码
    #获取全部股票代码
    pro = ts.pro_api('5ae1c2cea081bde0fb95295e7bfa302c5b87cc7bb967ccd5c3cee38a')
    pool = pro.stock_basic(exchange = '',list_status = 'L',adj = 'qfq',fields = 'ts_code')
    
    stock_code=[]

    #如果已经存在文件，则不会重复计算文件中已有的股票数据，这里先获取文件中已有的股票代码
    global file_exit
    file_exit = os.path.isfile(file_name)
    if file_exit == True:
        exit_code = pd.read_csv(file_name)
        exit_code = np.array(exit_code['ts_code'])
        print(exit_code)
        print('以上',len(exit_code),'条股票数据已保存在',file_name,'中，这些数据不会被重新计算。')

    #根据板块筛选股票代码。并把已计算的股票代码剔除。
    k = 1
    for i in pool.ts_code:
#        if k==5: #测试时减少循环次数，正式运行时需将该行和下一行屏蔽
#            break
            
        if ty == 1:
            if i[0:2]=='60': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 2: #深交所主板
            if i[0:3]=='000': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 3:
            if i[0:3]=='002': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 0:
            if (i[0:2]=='00' or i[0:2]=='60'): #只保留主板和中小板的股票
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1

    print('完成筛选，只保留主板和中小板的股票共：',len(stock_code),'只股票代码……')

    return stock_code




import time

def data_predict(): #主函数
    
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' #这句似乎可以避免服务器重启
    
    
    #用于统计运行时间
    start_time = time.time()
    _time = time.time()
    
    #定义全局变量
    global day
    global days
    global last_open
    global _p
    global predict_buy
    global file_name
    day = int(5) #预测的天数
    days = int(1000) #采集数据的天数
    last_open = int(100000) #最近开盘金额，预设为很大
    _p = 0 #购买权重，预设为0
    predict_buy = [] #定义购买参数的数组
    file_name = 'data_predict.csv' #运算结果保存为文件
    
    ty = int(input("请输入股票类型，0表示主板和中小板，1表示上交所主板，2表示深交所主板，3表示深交所中小板："))   
#    ty = 0
    stock_code = acq_code(ty) #获得股票代码

    k = 1 #用来统计已完成股票的数量
    for i in stock_code:
        
        total_time = time.strftime('%H:%M:%S',time.localtime(time.time()-start_time))
        this_time = time.strftime('%H:%M:%S',time.localtime(time.time()-_time))
        print('共：',len(stock_code),'只股票，正在计算第：',k,'只股票，股票代码：',i,'。上次耗时：',this_time,'秒，总耗时：',total_time,'秒。>>>')
        _time = time.time()

        main(i,day)
                
        k = k + 1
        
    print('全部完成！请查看文件：',file_name)



if __name__ == '__main__': #运行程序
    data_predict()



Using TensorFlow backend.


请输入股票类型，0表示主板和中小板，1表示上交所主板，2表示深交所主板，3表示深交所中小板：2
['000301.SZ' '000001.SZ' '000002.SZ' '000004.SZ' '000005.SZ' '000006.SZ'
 '000007.SZ' '000008.SZ' '000009.SZ' '000010.SZ' '000011.SZ' '000012.SZ'
 '000014.SZ' '000016.SZ' '000017.SZ' '000018.SZ' '000019.SZ' '000020.SZ'
 '000021.SZ' '000023.SZ' '000025.SZ' '000026.SZ' '000027.SZ' '000028.SZ'
 '000030.SZ' '000031.SZ' '000032.SZ' '000034.SZ' '000035.SZ' '000036.SZ'
 '000037.SZ' '000038.SZ' '000039.SZ' '000040.SZ' '000042.SZ' '000045.SZ'
 '000046.SZ' '000048.SZ' '000049.SZ' '000050.SZ' '000055.SZ' '000056.SZ'
 '000058.SZ' '000059.SZ' '000060.SZ' '000061.SZ' '000062.SZ' '000063.SZ'
 '000065.SZ' '000066.SZ' '000068.SZ' '000069.SZ' '000070.SZ' '000078.SZ'
 '000088.SZ' '000089.SZ' '000090.SZ' '000096.SZ' '000099.SZ' '000100.SZ'
 '000150.SZ' '000151.SZ' '000153.SZ' '000155.SZ' '000156.SZ' '000157.SZ'
 '000158.SZ' '000159.SZ' '000166.SZ' '000333.SZ' '000338.SZ' '000400.SZ'
 '000401.SZ' '000402.SZ' '000403.SZ' '000404.SZ' '000407.SZ' '000408.SZ'
 '0

第一次预测5天内000637.SZ的估价，需要一点时间建模
共： 304 只股票，正在计算第： 59 只股票，股票代码： 000638.SZ 。上次耗时： 08:00:18 秒，总耗时： 08:11:43 秒。>>>
第一次预测5天内000638.SZ的估价，需要一点时间建模
共： 304 只股票，正在计算第： 60 只股票，股票代码： 000639.SZ 。上次耗时： 08:00:15 秒，总耗时： 08:11:58 秒。>>>
第一次预测5天内000639.SZ的估价，需要一点时间建模
共： 304 只股票，正在计算第： 61 只股票，股票代码： 000650.SZ 。上次耗时： 08:00:17 秒，总耗时： 08:12:16 秒。>>>
第一次预测5天内000650.SZ的估价，需要一点时间建模


In [None]:
#这是第二次修改的版本：努力解决内存崩溃的问题


import tushare as ts
import pandas as pd

def get_data(code, start_date, end_date):
    #数据准备/data preparation 
    #变量选取Open,High,Low,Close,Volume等

    pro = ts.pro_api('5ae1c2cea081bde0fb95295e7bfa302c5b87cc7bb967ccd5c3cee38a') #token可以在新版tushare的网站上找到
    stock_data = pro.query('daily', ts_code = code, start_date = start_date, end_date = end_date)
    stock_data = stock_data[::-1] #倒序，使日期靠前的排在前面
    stock_data.reset_index(drop=True, inplace=True) #把每行的索引改为“0、1、2……”
    
#    global last_open #在这里获得开盘价格，全局变量可以让其他函数使用
#    last_open=stock_data.open[len(stock_data)-1]

    return stock_data



import numpy as np
from sklearn.preprocessing import MinMaxScaler

#数据预处理
def preprocess(stock_data, day, seq_length, data_dim, output_dim, visual_window):
    xy = stock_data[['open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount']] #选取需要的features
    xy = np.array(xy.values) #转为array

    dataXY = []
    for i in range(0, len(xy) - seq_length - day + 1):
        _xy = xy[i:i + seq_length + day] #包括用于计算的seq_length天的数据，以及day天后的价格
        dataXY.append(_xy)

    #调整数据的shape
    xy_real = np.vstack(dataXY).reshape(-1, seq_length + day, data_dim)
    dataXY = xy_real

    app_dataX = []
    for i in range(len(xy) - seq_length - day + 1, len(xy) - seq_length + 1):
        _x = xy[i:i + seq_length] #包括用于计算的seq_length天的数据
        app_dataX.append(_x)

    #调整数据的shape
    x_real = np.vstack(app_dataX).reshape(-1, seq_length, data_dim)
    app_dataX = x_real

    xy_visual = np.copy(dataXY[- visual_window:]) #取最近visual_window天的数据，用于最后的画图

    np.random.shuffle(dataXY) #打乱顺序

    #切分训练集合测试集/split to train and testing
    train_size = int(len(dataXY) * 0.7) #训练集长度
    test_size = len(dataXY) - train_size #测试集长度
    xy_train, xy_test = np.array(dataXY[0:train_size]), np.array(dataXY[train_size:len(dataXY)]) #划分训练集、测试集

    #先处理训练集的数据
    scaler = MinMaxScaler()
    xy_train = xy_train.reshape((-1, data_dim)) #先变成2维，才能transform
    xy_train_new = scaler.fit_transform(xy_train) #预处理，按列操作，每列最小值为0，最大值为1
    xy_train_new = xy_train_new.reshape((-1, seq_length + day, data_dim)) #变回3维

    x_new = xy_train_new[:,0:seq_length] #features
    y_new = xy_train_new[:,-1,1] * 10 #取最后一天的收盘价，用作label，适当放大，便于训练

    trainX, trainY = x_new, y_new

    #然后处理测试集的数据
    xy_test = xy_test.reshape((-1, data_dim))
    xy_test_new = scaler.transform(xy_test) #使用训练集的scaler预处理测试集的数据
    xy_test_new = xy_test_new.reshape((-1, seq_length + day, data_dim))

    x_new = xy_test_new[:, 0:seq_length]
    y_new = xy_test_new[:, -1, 1] * 10

    #以下3项用于计算收入
    close_price = xy_test_new[:, seq_length - 1, 1]
    buy_price = xy_test_new[:, seq_length, 0]
    sell_price = xy_test_new[:, -1, 1]

    testX, testY, test_close, test_buy, test_sell = x_new, y_new, close_price, buy_price, sell_price

    #再处理应用集
    x_app = app_dataX.reshape((-1, data_dim))
    appX = scaler.transform(x_app) #用训练集的scaler进行预处理
    appX = appX.reshape((-1, seq_length, data_dim))

    #最后处理用于画图的数据
    xy_visual = xy_visual.reshape((-1, data_dim))
    xy_visual_new = scaler.transform(xy_visual) #使用训练集的scaler预处理
    xy_visual_new = xy_visual_new.reshape((-1, seq_length + day, data_dim))

    x_new = xy_visual_new[:, 0:seq_length]
    y_new = xy_visual_new[:, -1, 1] * 10

    visualX, visualY = x_new, y_new

    return trainX, trainY, testX, testY, appX, scaler, test_close, test_buy, test_sell, visualX, visualY



from keras.layers import Input, Dense, LSTM, Reshape
from keras.models import Model
from keras import regularizers, callbacks

#计算模型
def train(code, day, trainX, trainY, seq_length, data_dim, output_dim):
    # 构建神经网络层 1层Dense层+1层LSTM层+4层Dense层

    rnn_units = 32
    Dense_input = Input(shape=(seq_length, data_dim), name='dense_input') #输入层
    #shape: 形状元组（整型）不包括batch size。表示了预期的输入将是一批（seq_len,data_dim）的向量。
    
    Dense_output_1 = Dense(rnn_units, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense1')(Dense_input) #全连接网络

    lstm_input = Reshape(target_shape=(seq_length, rnn_units), name='reshape2')(Dense_output_1) 
    #改变Tensor形状，改变后是（None，seq_length, rnn_units）

    lstm_output = LSTM(rnn_units, activation='tanh', dropout=1.0, name='lstm')(lstm_input) #LSTM网络
    #units: Positive integer,dimensionality of the output space.
    #dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs.

    Dense_input_2 = Reshape(target_shape=(rnn_units,), name='reshape3')(lstm_output) 
    #改变Tensor形状，改变后是（None，rnn_units）

    Dense_output_2 = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense2')(Dense_input_2) #全连接网络
    Dense_output_3 = Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense3')(Dense_output_2) #全连接网络
    Dense_output_4 = Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.0), name='dense4')(Dense_output_3) #全连接网络
    predictions = Dense(output_dim, activation=None, kernel_regularizer=regularizers.l2(0.0), name='dense5')(Dense_output_4) #全连接网络

    model = Model(inputs=Dense_input, outputs=predictions)
    #This model will include all layers required in the computation of output given input.

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    #Configures the model for training.
    #optimizer: String (name of optimizer) or optimizer instance. See optimizers.
    #loss: String (name of objective function) or objective function.The loss value will be minimized by the model.
    #metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use  metrics=['accuracy'].

    ES = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None)

    model.fit(trainX, trainY, batch_size=256, epochs=400, verbose=0, callbacks=[ES], validation_split=0.1)
    #Trains the model for a given number of epochs (iterations on a dataset).
    #verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

    # 保存模型
    model.save(code + '(1)' + str(day) + '.h5')   # HDF5文件，pip install h5py

    return model
    

    
#进行训练测试
def test(model, testX, testY, scaler, day, close_price, buy_price, sell_price, visualX, visualY, code):

    testPredict = model.predict(testX) #查看测试结果
    testPredict2 = testPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    testY2 = testY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算

    #以下3项用于计算收入
    #今天的收盘价，用于判断买不买
    close_price2 = close_price * scaler.data_range_[1] + scaler.data_min_[1]

    #明天的开盘价，如果买需要付多少钱
    buy_price2 = buy_price * scaler.data_range_[0] + scaler.data_min_[0]

    #持有day天之后的收盘价，这时卖能卖多少钱
    sell_price2 = sell_price * scaler.data_range_[1] + scaler.data_min_[1]

    #平均误差（%）
    mean_error = np.mean(abs(testPredict2 - testY2) / testY2 * 100)
    mean_error = round(mean_error, 2)
#    print('平均误差（%）：', mean_error)

    #最大误差（%）
    max_error = np.max(abs(testPredict2 - testY2) / testY2 * 100)
    max_error = round(max_error, 2)
#    print('最大误差（%）：', max_error)

    count = 0 #绝对误差小于1%的比例
    correct = np.zeros(len(testPredict2)) #预测涨或跌的正确率
    model_income = 0 #模型能挣多少钱
    trade = 0 #计算交易频率
    max_income = 0 #最理想的状况下，能挣多少钱
    random_income = 0 #随机购买，能挣多少钱

    tolerance = 1

    for i in range(len(testY2)):
        #计算绝对误差小于 tolerance% 的比例
        if abs(testPredict2[i] - testY2[i]) / testY2[i] * 100 <= tolerance:
            count += 1

        #计算对转折点的预测正确率
        if np.sign(testPredict2[i] - close_price2[i]) == np.sign(testY2[i] - close_price2[i]):
            #如果对涨或跌的判断准确，这里用正负符号判断
            correct[i] = 1 #就加1

        #如果对“day”天后的预测价格高于今天的收盘价，就买进并持有“day”天，计算能挣多少钱
        if testPredict2[i] > close_price2[i]:
            model_income = model_income + sell_price2[i] - buy_price2[i]
            trade += 1

        #最理想的状况下，能挣多少钱
        if testY2[i] > close_price2[i]:
            max_income = max_income + sell_price2[i] - buy_price2[i]

        #随机购买，能挣多少钱
        buy = np.random.randint(0, 2) #随机产生0或1
        if buy: #如果是1就买
            random_income = random_income + sell_price2[i] - buy_price2[i]

    count = count / len(testY2) * 100
    count = round(count, 2)
    print('误差小于' + str(tolerance) + '%的比例：', count)

    accuracy = np.sum(correct) / len(correct) * 100
    accuracy = round(accuracy, 2)
    print('预测涨或跌的正确率：', accuracy)

#    print('模型的购买策略是，如果对%d天之后的预测值大于今天的收盘价，就在明天开市时买进1股，并且持有%d天，再卖出'%(day, day))

    frequency = trade / len(testPredict2) * 100
    model_income = round(float(model_income), 2)
    frequency = round(frequency, 2)
#    print('在%d天中，模型交易了%d次，交易频率为%g'%(len(testPredict2), trade, frequency) + '%')
    print('按照模型进行操作所得的收入：', model_income)

    max_income = round(float(max_income), 2)
#    print('最理想状况下的收入：', max_income)

    random_income = round(float(random_income), 2)
#    print('随机购买的收入：', random_income)

    visualPredict = model.predict(visualX)
    visualPredict2 = visualPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    visualY2 = visualY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算

    
#此为增加内容：
    #结合误差小于1%的比例、准确率、以及收入金额，计算值得买入权重
    _dict = { #这是需要统计的数据
        'ts_code' : code,
        'predict_buy' : model_income*((count+accuracy)/200),
        'count' : count,
        'accuracy' : accuracy,
        'model_income' : model_income,
    }        

    predict_df = pd.DataFrame([_dict])
    
    
    return visualY2, visualPredict2, predict_df

    

def apply(model, appX, scaler):
     #查看应用结果
    appPredict = model.predict(appX)
    appPredict2 = appPredict / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆运算
    appPredict2 = appPredict2.reshape(-1)

    return appPredict2
    

    
import matplotlib.pyplot as plt

#画图
def visualize(visualY2, visualPredict2, appPredict2, visual_window, day):

    plt.figure(figsize=(16,8)) #画布大小
    plt.plot(list(range(len(visualY2))), visualY2, color='blue') #只显示最近“period”天的测试记录
    plt.plot(list(range(len(visualPredict2))), visualPredict2, color='orange')

    plt.scatter(
        list(range(len(visualPredict2), len(visualPredict2) + len(appPredict2))), appPredict2, color='red')    

    plt.xlim((0, visual_window + day)) #x坐标范围
    plt.legend(['True price', 'Model result', 'Prediction'], loc='upper left')
    plt.ylabel('price')
    plt.xlabel('time')

    plt.show()

    

import time
import datetime
import sys as sys
import gc

def main(code, day, c_days, file_name): #算法主函数
#    code = input("请输入6位代码：") #输入股票代码
#    code = code + '.SH'
#    day = input("请输入预测天数：") #输入预测多少天后的价格
#    day = int(day)
 
    #定义样本数据的起始日期
#    date = time.strftime('%Y%m%d',time.localtime(time.time())) #获取当天日期
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(days = c_days) 
    end_date = end_date.strftime('%Y%m%d')
    start_date = start_date.strftime('%Y%m%d')

    #参数设置/parameter setting
    timesteps = seq_length = 20 #时间窗/window length
    data_dim = 7 #输入数据维度/dimension of input data
    output_dim = 1 #输出数据维度/dimension of output data
    visual_window = 200

    try:
        stock_data = get_data(code, start_date, end_date)
    except:
        print('代码不正确或无法获得该股票的数据')
        return

    if len(stock_data) == 0:
        print('代码不正确或无法获得该股票的数据')
        return

    trainX, trainY, testX, testY, appX, scaler, test_close, test_buy, test_sell, visualX, visualY = preprocess(
        stock_data, day, seq_length, data_dim, output_dim, visual_window)

    try:
        # 载入模型
        from keras.models import load_model
        model = load_model(code + '(1)' + str(day) + '.h5')
    except:
        print('第一次预测%d天内'%(day) + code + '的估价，需要一点时间建模')
        model = train(
            code, day, trainX, trainY, seq_length, data_dim, output_dim)

    visualY2, visualPredict2, predict_df = test(model, testX, testY, scaler, day, test_close, test_buy, test_sell, visualX, visualY, code)
#    appPredict2 = apply(model, appX, scaler)
#    visualize(visualY2, visualPredict2, appPredict2, visual_window, day)

        
#保存文件。运算量很大，为防止中途丢失数据而从头开始，每只股票运算完毕后保存一次数据。
    predict_df = predict_df.sort_values('predict_buy', ascending = False) #根据字段排序
    file_exit = os.path.isfile(file_name)
    if file_exit == True:
        predict_df.to_csv(file_name,mode='a',header=False)
    else:
        predict_df.to_csv(file_name)
        
        

            
import tushare as ts
import os

def acq_code(ty, file_name): #获得需要计算的股票代码，ty是股票种类
    #获取全部股票代码
    pro = ts.pro_api('5ae1c2cea081bde0fb95295e7bfa302c5b87cc7bb967ccd5c3cee38a')
    pool = pro.stock_basic(exchange = '',list_status = 'L',adj = 'qfq',fields = 'ts_code')
    
    stock_code=[]

    #如果已经存在文件，则不会重复计算文件中已有的股票数据，这里先获取文件中已有的股票代码
    file_exit = os.path.isfile(file_name)
    if file_exit == True:
        exit_code = pd.read_csv(file_name)
        exit_code = np.array(exit_code['ts_code'])
        print(exit_code)
        print('以上',len(exit_code),'条股票数据已保存在',file_name,'中，这些数据不会被重新计算。')

    #根据板块筛选股票代码。并把已计算的股票代码剔除。
    k = 1
    for i in pool.ts_code:
#        if k==5: #测试时减少循环次数，正式运行时需将该行和下一行屏蔽
#            break
            
        if ty == 1:
            if i[0:2]=='60': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 2: #深交所主板
            if i[0:3]=='000': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 3:
            if i[0:3]=='002': #上交所主板 
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1
        elif ty == 0:
            if (i[0:2]=='00' or i[0:2]=='60'): #只保留主板和中小板的股票
                if file_exit == True:
                    if (len(exit_code)>0) and (i not in exit_code):
                        stock_code.append(i)
                        k = k + 1
                else:
                    stock_code.append(i)
                    k = k + 1

    print('完成筛选，只保留主板和中小板的股票共：',len(stock_code),'只股票代码……')

    return stock_code




import time

def data_predict(): #主函数
    
    #用于统计运行时间
    start_time = time.time()
    _time = time.time()
    
   
    day = int(5) #预测的天数
    c_days = int(1000) #采集数据的天数
    last_open = int(100000) #最近开盘金额，预设为很大
    predict_buy = [] #定义购买参数的数组
    file_name = 'data_predict.csv' #运算结果保存为文件
    
    ty = int(input("请输入股票类型，0表示主板和中小板，1表示上交所主板，2表示深交所主板，3表示深交所中小板："))   
#    ty = 0
    stock_code = acq_code(ty, file_name) #获得股票代码

    k = 1 #用来统计已完成股票的数量
    for i in stock_code:
        
        total_time = time.strftime('%H:%M:%S',time.localtime(time.time()-start_time))
        this_time = time.strftime('%H:%M:%S',time.localtime(time.time()-_time))
        print('共：{}只股票，正在计算第：{}只股票，股票代码：{}。上次耗时：{}秒，总耗时：{}秒。>>>'.format(len(stock_code), k, i, this_time, total_time))
        _time = time.time()

        main(i, day, c_days, file_name)
        time.sleep(3)
                
        k = k + 1
        
    print('全部完成！请查看文件：',file_name)



if __name__ == '__main__': #运行程序
    data_predict()


Using TensorFlow backend.


请输入股票类型，0表示主板和中小板，1表示上交所主板，2表示深交所主板，3表示深交所中小板：2
['000001.SZ' '000002.SZ' '000004.SZ' '000005.SZ' '000006.SZ' '000007.SZ'
 '000008.SZ' '000009.SZ' '000010.SZ' '000011.SZ' '000012.SZ' '000014.SZ'
 '000016.SZ' '000017.SZ' '000019.SZ' '000020.SZ' '000021.SZ' '000023.SZ'
 '000025.SZ' '000026.SZ' '000027.SZ' '000028.SZ' '000030.SZ' '000031.SZ'
 '000032.SZ' '000034.SZ' '000035.SZ' '000036.SZ' '000037.SZ' '000038.SZ'
 '000039.SZ' '000040.SZ' '000042.SZ' '000045.SZ' '000046.SZ' '000048.SZ'
 '000049.SZ' '000050.SZ' '000055.SZ' '000056.SZ' '000058.SZ' '000059.SZ'
 '000060.SZ' '000061.SZ' '000062.SZ' '000063.SZ' '000065.SZ' '000066.SZ'
 '000068.SZ' '000069.SZ' '000070.SZ' '000078.SZ' '000088.SZ' '000089.SZ'
 '000090.SZ' '000096.SZ' '000099.SZ' '000100.SZ' '000150.SZ' '000151.SZ'
 '000153.SZ' '000155.SZ' '000156.SZ' '000157.SZ' '000158.SZ' '000159.SZ'
 '000166.SZ' '000301.SZ' '000333.SZ' '000338.SZ' '000400.SZ' '000401.SZ'
 '000402.SZ' '000403.SZ' '000404.SZ' '000407.SZ' '000408.SZ' '000409.SZ'
 '0

共：373只股票，正在计算第：48只股票，股票代码：000537.SZ。上次耗时：08:00:11秒，总耗时：08:07:10秒。>>>
第一次预测5天内000537.SZ的估价，需要一点时间建模
误差小于1%的比例： 10.36
预测涨或跌的正确率： 51.81
按照模型进行操作所得的收入： 6.1
共：373只股票，正在计算第：49只股票，股票代码：000538.SZ。上次耗时：08:00:08秒，总耗时：08:07:19秒。>>>
第一次预测5天内000538.SZ的估价，需要一点时间建模
误差小于1%的比例： 22.91
预测涨或跌的正确率： 58.66
按照模型进行操作所得的收入： 87.88
共：373只股票，正在计算第：50只股票，股票代码：000539.SZ。上次耗时：08:00:13秒，总耗时：08:07:32秒。>>>
第一次预测5天内000539.SZ的估价，需要一点时间建模
误差小于1%的比例： 34.87
预测涨或跌的正确率： 60.51
按照模型进行操作所得的收入： 3.47
共：373只股票，正在计算第：51只股票，股票代码：000540.SZ。上次耗时：08:00:14秒，总耗时：08:07:47秒。>>>
第一次预测5天内000540.SZ的估价，需要一点时间建模
误差小于1%的比例： 13.68
预测涨或跌的正确率： 47.37
按照模型进行操作所得的收入： 1.04
共：373只股票，正在计算第：52只股票，股票代码：000541.SZ。上次耗时：08:00:07秒，总耗时：08:07:55秒。>>>
第一次预测5天内000541.SZ的估价，需要一点时间建模
误差小于1%的比例： 33.85
预测涨或跌的正确率： 54.87
按照模型进行操作所得的收入： 1.76
共：373只股票，正在计算第：53只股票，股票代码：000543.SZ。上次耗时：08:00:15秒，总耗时：08:08:11秒。>>>
第一次预测5天内000543.SZ的估价，需要一点时间建模
误差小于1%的比例： 24.61
预测涨或跌的正确率： 56.54
按照模型进行操作所得的收入： 2.09
共：373只股票，正在计算第：54只股票，股票代码：000544.SZ。上次耗时：08:00:13秒，总耗时：08:08:25秒。>>>
第一次预测5天内00054