测试一下每支股票的最优参数

In [1]:
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import talib
import torch.utils.data as Data

我想要用lstm的网络来预测收盘价。  
x是价格对于，N日前的收盘价的比例
y是收盘价  

# 获取数据

In [2]:
sys.path.append('../../')
import  DataSource
zz500 = DataSource.get_zz500_codes() # 我看中证500的数据
# 我这里查看第一个股票的吧
N = 60 # 多少日的数据
# 我这里仅仅是记录如下的列的涨跌幅度
rate_columns = ['open', 'high', 'low', 'close']
close_index = rate_columns.index('close')
input_size = len(rate_columns)
seq_len = N    # 多少个时间序列的股票
output_size = 1 # 我只是输出一个今天的收盘价相比较昨日的涨跌比率
num_layers = 10 # 多少个gru合并
hidden_size = 20 # 隐藏层的宽度
batch_size = 16 # 一个批次有多少个数据
lr = 0.0001

In [3]:
def get_loader(code_name, N=60, rate_columns=['open', 'high', 'low', 'close'], batch_size=16, train_rate=0.9):
    dt = DataSource.get_data(code_name)
    dt_list = []
    for j in range(N+1):
        # N+1表示要多一天，比如前面30天，我要看看第31天的收盘价相对于第一天的收盘价是什么比例
        for i in rate_columns:
            dt_tmp = pd.DataFrame({f'{i}_rate_{j}':(dt[f'{i}'].shift(-j) - dt[f'preclose'])/dt[f'preclose']})
            dt_list.append(dt_tmp)
    
    dt2 = pd.concat(dt_list[:-(len(rate_columns))], axis=1)
    # x
    x_price = dt2.to_numpy()
    x_price = x_price[:-(N+1)]
    # 更改维度
    x_shape_old = x_price.shape
    x_shape_new_1 = x_shape_old[0]
    x_shape_new_3 = len(rate_columns)
    x_shape_new_2 = int(x_shape_old[1] / x_shape_new_3)
    x_price = x_price.reshape((x_shape_new_1, x_shape_new_2, x_shape_new_3))
    # y
    y_close = dt_list[-(len(rate_columns)- close_index)][f'close_rate_{N}'].to_numpy()[:-(N+1)]
    # 然后是批次的整数倍
    _x_y_len = int(x_price.shape[0]/batch_size) * batch_size
    
    x_price = x_price[-_x_y_len:]
    y_close = y_close[-_x_y_len:]
    x_price = x_price.reshape(x_price.shape[0]//batch_size, batch_size, x_price.shape[1], x_price.shape[2])
    y_close = y_close.reshape(y_close.shape[0]//batch_size, batch_size, 1)
    # 做成加载器
    dataset = Data.TensorDataset(torch.FloatTensor(x_price), torch.FloatTensor(y_close))
    train_loader, test_loader = Data.random_split(dataset,
                                                 lengths=[int(train_rate*len(dataset)), len(dataset)- int(train_rate*len(dataset))],
                                                 generator=torch.Generator().manual_seed(0))
    return train_loader, test_loader



In [4]:

# 这里做一个网络
import torch.nn as nn
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        out, _ = self.gru(x) # gru层
        out = self.fc(out[:,-1, :]) # 全连接层
        return out

In [5]:
# 训练
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = GRUModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = torch.nn.MSELoss() # 均方差误差函数
optimizer = optim.Adam(net.parameters(), lr=lr)

In [None]:
# 这里是遍历所有的股票
for i_code in range(len(zz500)):
    train_loader, test_loader = get_loader(zz500[i_code],batch_size=batch_size, train_rate=0.9)
    # 我这里用测试集来测试
    for epoch in range(10):  # 迭代
        running_loss_2 = []
        for i, data in enumerate(train_loader, 0):
                # 获取输入
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                # 正向传播，反向传播，优化
                optimizer.zero_grad()
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
    # 保存参数
    import os
    model_path = os.path.join('./模型', f"{zz500[i_code]}.pth")
    torch.save({'model': net.state_dict()}, model_path)
    
    # 这里测试
    with torch.no_grad():
        running_loss_2 = []
        for i, data in enumerate(test_loader, 0):
            # 获取输入
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            # 正向传播，反向传播，优化
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            running_loss_2.append(loss.item())
        # 每一次显示一下损失的最大值和最小值,以及实际上的损失情况,损失是loss，这个是期望值和实际值的均方误差,然后用这个来除以最后的收盘价
        last_close = inputs[:, -1, close_index] # 取得最后的收盘价
        last_close2 = torch.reshape(last_close, outputs.shape) # 将
        real_loss_rate = torch.abs(outputs-labels)/last_close2
        real_loss_rate_2 = (sum(real_loss_rate)/len(real_loss_rate)).item() * 100
        # 
        last_close_rate = torch.abs(labels-last_close2)/last_close2
        last_close_rate_2 = (sum(last_close_rate)/len(last_close_rate)).item() * 100
        #
        print(f'{i_code+1}: {zz500[i_code]},real loss: {real_loss_rate_2:.5f}%, last close rate : {last_close_rate_2:.5f}%')
        print(outputs)
        print(labels)
        print(last_close2)
        break
        print('结果离谱')


tensor([[-0.0827],
        [-0.0437],
        [-0.0560],
        [ 0.0256],
        [ 0.0730],
        [ 0.0673],
        [ 0.0522],
        [ 0.0405],
        [ 0.1211],
        [ 0.1922],
        [ 0.1835],
        [ 0.1038],
        [ 0.1099],
        [ 0.1358],
        [ 0.1775],
        [ 0.1811]], device='cuda:0')
tensor([[0.0114],
        [0.0243],
        [0.0172],
        [0.0814],
        [0.1189],
        [0.1506],
        [0.1014],
        [0.0981],
        [0.1502],
        [0.2384],
        [0.2516],
        [0.1511],
        [0.1112],
        [0.1530],
        [0.1996],
        [0.2304]], device='cuda:0')
tensor([[0.0049],
        [0.0369],
        [0.0016],
        [0.0753],
        [0.1083],
        [0.1042],
        [0.1289],
        [0.0837],
        [0.1547],
        [0.2004],
        [0.2214],
        [0.1742],
        [0.1471],
        [0.1227],
        [0.1784],
        [0.1944]], device='cuda:0')
1: sh.600004,loss: 0.00311,real loss: 439.32381%, last close rate 