In [6]:
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import talib
import torch.utils.data as Data

In [7]:
sys.path.append('../../')
import  DataSource
import Utils
zz500 = DataSource.get_zz500_codes() # 我看中证500的数据
# 我这里查看第一个股票的吧
N = 60 # 多少日的数据
# 我这里仅仅是记录如下的列的涨跌幅度
rate_columns = ['open', 'high', 'low', 'close']
close_index = rate_columns.index('close')
input_size = len(rate_columns)
seq_len = N    # 多少个时间序列的股票
output_size = 1 # 我只是输出一个今天的收盘价相比较昨日的涨跌比率
num_layers = 2 # 多少个gru合并
hidden_size = 128 # 隐藏层的宽度
batch_size = 64 # 一个批次有多少个数据
lr = 0.0001

In [8]:
def min_max_normalization(row):
    # 最大最小值归一化
    return (row - row.min())/(row.max() - row.min())
    
def get_loader(code_name, N=60, rate_columns=['open', 'high', 'low', 'close'], batch_size=16, train_rate=0.9):
    dt = DataSource.get_data(code_name)
    dt_list = []
    for j in range(N):
        # N+1表示要多一天，比如前面30天，我要看看第31天的收盘价相对于第一天的收盘价是什么比例
        for i in rate_columns:
            dt_tmp = pd.DataFrame({f'{i}_{j}':dt[f'{i}'].shift(-j)})
            dt_list.append(dt_tmp)
    # 拼接
    dt2 = pd.concat(dt_list, axis=1)
    # 然后这里要按照行进行归一化
    row_means = dt2.mean(axis=1) # 按行取均值
    row_std = dt2.std(axis=1)    # 按行取标准差
    # normalize each row with its mean and standard deviation
    dt3 = dt2.sub(row_means, axis=0).div(row_std, axis=0)
    # dt3 = dt2.apply(min_max_normalization, axis=1)
    # x
    x_price = dt3.to_numpy()
    x_price = x_price[:-(N+1)]
    # 更改维度
    x_shape_old = x_price.shape
    x_shape_new_1 = x_shape_old[0]
    x_shape_new_3 = len(rate_columns)
    x_shape_new_2 = int(x_shape_old[1] / x_shape_new_3)
    x_price = x_price.reshape((x_shape_new_1, x_shape_new_2, x_shape_new_3))
    # y，是涨跌幅度
    dt['close2'] = dt['close'].shift(-N)
    dt['close3'] = dt['close'].shift(-(N+1))
    dt['close4'] = (dt['close3']-dt['close2'])/dt['close2']
    y_close = dt['close4'].to_numpy()[:-(N+1)]
    # 然后是批次的整数倍
    _x_y_len = int(x_price.shape[0]/batch_size) * batch_size
    #
    x_price = x_price[-_x_y_len:]
    y_close = y_close[-_x_y_len:]
    x_price = x_price.reshape(
        x_price.shape[0]//batch_size,
        batch_size,
        x_price.shape[1],
        x_price.shape[2])
    y_close = y_close.reshape(y_close.shape[0]//batch_size, batch_size, 1)
    # 做成加载器
    dataset = Data.TensorDataset(
        torch.FloatTensor(x_price),
        torch.FloatTensor(y_close))
    train_loader, test_loader = Data.random_split(
        dataset,
        lengths=[
            int(train_rate*len(dataset)),
            len(dataset)-int(train_rate*len(dataset))],
        generator=torch.Generator().manual_seed(0))
    return train_loader, test_loader

In [9]:
# 一个gru网络
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True) 
        self.fc = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu1 = nn.LeakyReLU()
        self.relu2 = nn.LeakyReLU()

    def forward(self, x):
        out, _ = self.gru(x)  # gru层
        out = self.fc(out[:, -1, :])  # 全连接层
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out

In [10]:
# 创建窗口并初始化
# 训练
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = GRUModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = torch.nn.L1Loss()  # 绝对值误差函数
optimizer = optim.Adam(net.parameters(), lr=lr)

train_loader, test_loader = get_loader(
    zz500[0],
    N=20,
    batch_size=batch_size,
    train_rate=0.9)
Utils.do_train(100, train_loader, net, optimizer, criterion)
# 接下来做测试
Utils.do_test(test_loader, net, criterion)
print('finish')

1, avg labels: 0.01473 avg loss:0.01575, max loss: 0.04490, min loss : 0.00658
2, avg labels: 0.01473 avg loss:0.01487, max loss: 0.04471, min loss : 0.00642
3, avg labels: 0.01473 avg loss:0.01483, max loss: 0.04474, min loss : 0.00639
4, avg labels: 0.01473 avg loss:0.01482, max loss: 0.04471, min loss : 0.00641
5, avg labels: 0.01473 avg loss:0.01482, max loss: 0.04470, min loss : 0.00639
6, avg labels: 0.01473 avg loss:0.01484, max loss: 0.04471, min loss : 0.00641
7, avg labels: 0.01473 avg loss:0.01483, max loss: 0.04472, min loss : 0.00642
8, avg labels: 0.01473 avg loss:0.01483, max loss: 0.04472, min loss : 0.00640
9, avg labels: 0.01473 avg loss:0.01483, max loss: 0.04472, min loss : 0.00641
10, avg labels: 0.01473 avg loss:0.01480, max loss: 0.04472, min loss : 0.00643
11, avg labels: 0.01473 avg loss:0.01483, max loss: 0.04471, min loss : 0.00645
12, avg labels: 0.01473 avg loss:0.01480, max loss: 0.04468, min loss : 0.00642
13, avg labels: 0.01473 avg loss:0.01482, max los

可以看到预测结果倾向于0，然后就是损失函数最少的。