In [145]:
# 这是用RNN模型将sin的输入转换为cos的输出
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt


In [146]:
TIME_STEP = 10                               # rnn 定义的时间步数
INPUT_SIZE = 1                              # 因为输入的是1个数字，也就是1个feature
LR = 0.01                                   # learning rate
HIDDEN_SIZE = 64                            # 隐藏层的特征数量
torch.manual_seed(1)                        # 随机数种子

<torch._C.Generator at 0x1124a2e28>

In [147]:

class RNN(nn.Module):                                   # hidden = np.tanh(np.dot(self.W_hh, hidden) + np.dot(self.W_xh, x))
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(                              # 自己定义一个RNN神经网络
            input_size = INPUT_SIZE,                    # 输入 x 的feature 维度
            hidden_size = HIDDEN_SIZE,                  # 隐状态 hidden_state 中的feature维度
            num_layers = 5,                             # RNN 的层数
            nonlinearity='relu',                        # 指定激活函数 [‘tanh’ | ’relu’]. 默认: ‘tanh’
            bias = True,                                # 如果是 False , 那么 RNN 层就不会使用偏置权重 b_ih 和 b_hh, 默认: True
            batch_first = True,                         # 如果 True, 输入Tensor的shape应该是(batch, seq, features),并且输出也是一样.
            dropout = 0,                                # 如果值非零, 那么除了最后一层外, 其它层的输出都会套上一个 dropout 层
            bidirectional = False                       # 如果 True , 将会变成一个双向 RNN, 默认为 False
        )
        self.out = nn.Linear(HIDDEN_SIZE, 1)            # 定义一个输出层，这是RNN的最后输出，只用输出output_vector

    def forward(self, x, h_state):                      # 这就是RNN每次输入的参数x和h
        # x (batch, time_step, input_size)              # 这是RNN的ｘ的维度            (批量, 序列长度, 输入的特征维度）
        # h_state (n_layers, batch, hidden_size)        # 这是hidden_state的维度       (层数×方向, 批量, 输出的特征维度）/*方向：单向是１；双向是２*/
        # r_out (batch, time_step, hidden_size)         # 这是网络实际输出的r_out的维度 (批量，序列长度，输出的特征维度X方向）
        r_out, h_state = self.rnn(x, h_state)           # RNN每次输入x, hidden_state; 输出r_out, hidden_state;
        outs = []
        for time_step in range(r_out.size(1)):
            outs.append(self.out(r_out[:, time_step, :]))

        return torch.stack(outs, dim=1), h_state        # RNN的forward输出了output_vector, hidden_state

In [148]:
rnn = RNN()
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR, weight_decay=1e-6)    # 定义了优化函数ADAM对RNN的参数反向优化，定义了 learning rate 和 权重衰减率
loss_func = nn.MSELoss()                                                    # 定义了模型的损失函数用 mean-square-error

In [150]:

h_state = None                                                          # 起始时输入给RNN的hidden_state就是None

for step in range(2000):                                                 # 计算２００次。相当于２００个顺序的时间片数据丢进去计算
    start, end = step*np.pi, (step+1)*np.pi                             # 设计一小段数据起始点
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32)        # 生成一小段数据
    x_np = np.sin(steps)                                                # 这就是用来输入的数据
    y_np = np.cos(steps)                                                # 这就是需要被预测的数据
    
    # x = Variable(torch.from_numpy(x_np[np.newaxis, :, np.newaxis]))     # x_np[np.newaxis, :, np.nexaxis]这句将x_np从1维(n)转化成3维(1,n,1);然后成为输入变量x
    # y = Variable(torch.from_numpy(y_np[np.newaxis, :, np.newaxis]))     # y_np[np.newaxis, :, np.nexaxis]这句将y_np从1维(n)转化成3维(1,n,1);然后成为真实值 y
    # print(x.size())
    # print(y.size())
    
    prediction, h_state = rnn(x, h_state)                               # 这就是一次RNN模型forward()得出来的结果
    h_state = Variable(h_state.data)                                    # 把h_state(tensor)中的数据取出来,给下一轮的rnn作为数据输入

    loss = loss_func(prediction, y)                                     # 用损失函数计算误差
    optimizer.zero_grad()                                               # 清空优化器梯度
    loss.backward()                                                     # 反向传播求解梯度
    optimizer.step()                                                    # 更新权重的参数
    
    if step%20==0:
        print("step:{}  loss:{:.9f}".format(step, loss.data.float()))

#     plt.ion()                                                           # 把实际图和预测图动态打印出来
#     plt.plot(steps, y_np, color='b')
#     plt.plot(steps, np.squeeze(prediction.data.numpy()), color='r')
#     plt.show()
#     plt.pause(0.30)

step:0  loss:0.616753757
step:20  loss:0.599726737
step:40  loss:0.598618388
step:60  loss:0.614574015
step:80  loss:0.597458482
step:100  loss:0.600382805
step:120  loss:0.597702026
step:140  loss:0.937314034
step:160  loss:0.600224555
step:180  loss:0.599694431
step:200  loss:0.590930045
step:220  loss:0.094163097
step:240  loss:0.582085848
step:260  loss:0.578835607
step:280  loss:0.589287341
step:300  loss:0.729984403
step:320  loss:0.597867250
step:340  loss:0.536698222
step:360  loss:0.505166292
step:380  loss:0.534887910
step:400  loss:35.706615448
step:420  loss:0.609800935
step:440  loss:0.600955725
step:460  loss:0.599595129
step:480  loss:0.600089133
step:500  loss:0.600345314
step:520  loss:0.602997720
step:540  loss:0.602400184
step:560  loss:0.601933479
step:580  loss:0.601537406
step:600  loss:0.601230443
step:620  loss:0.600997031
step:640  loss:0.600822091
step:660  loss:0.600693882
step:680  loss:0.600604296
step:700  loss:0.600492239
step:720  loss:2.285447359
step:7