### More RNN

In [None]:
import torch

def default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')   
    if torch.backends.mps.is_available():
        return torch.device('mps')
    return torch.device('cpu')

device = default_device()

In [None]:
from torch import nn
from tqdm import *

class DRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(DRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True) 
        # batch_first 为 True时output的tensor为（batch,seq,feature）,否则为（seq,batch,feature）
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # 初始化隐藏状态和细胞状态
        state = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        # 计算输出和最终隐藏状态
        output, _ = self.rnn(x, state)
        output = self.linear(output)
        return output

In [None]:
# 网络结构
model = DRNN(16, 16, 64, 2)
for name,parameters in model.named_parameters():
    print(name,':',parameters.size())

In [None]:
class BRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True) # bidirectional为True是双向
        self.linear = nn.Linear(hidden_size * 2, output_size)  # 双向网络，因此有双倍hidden_size
    
    def forward(self, x):
        # 初始化隐藏状态
        state = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size) # 需要双倍的隐藏层
        output, _ = self.rnn(x, state)
        output = self.linear(output)
        return output

In [None]:
# 网络结构
model = BRNN(16, 16, 64, 2)
for name,parameters in model.named_parameters():
    print(name,':',parameters.size())

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # LSTM
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        output, _ = self.lstm(x)
        output = self.linear(output)
        return output

In [None]:
# 网络结构
model = LSTM(16, 16, 64, 2)
for name,parameters in model.named_parameters():
    print(name,':',parameters.size())

In [None]:
class GRU(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True) # GRU
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        output, _ = self.gru(x)
        output = self.linear(output)
        return output

In [None]:
# 网络结构
model = GRU(16, 16, 64, 2)
for name,parameters in model.named_parameters():
    print(name,':',parameters.size())

In [None]:
import pandas_datareader as pdr
dji = pdr.DataReader('^DJI', 'stooq')
dji.head()

In [None]:
import matplotlib.pyplot as plt
plt.plot(dji['Close'])
plt.show()

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset

num = len(dji)                           # 总数据量
x = torch.tensor(dji['Close'].to_list())  # 股价列表

x = (x - torch.mean(x)) / torch.std(x)  #对数据进行归一化

seq_len = 16                               # 预测序列长度
batch_size = 16                            # 设置批大小

X_feature = torch.zeros((num - seq_len, seq_len))      # 构建特征矩阵，num-seq_len行，seq_len列，初始值均为0
Y_label = torch.zeros((num - seq_len, seq_len))        # 构建标签矩阵，形状同特征矩阵
for i in range(seq_len):
    X_feature[:, i] = x[i: num - seq_len + i]    # 为特征矩阵赋值
    Y_label[:, i] = x[i+1: num - seq_len + i + 1]    # 为标签矩阵赋值

train_loader = DataLoader(TensorDataset(
    X_feature[:num-seq_len].unsqueeze(2), Y_label[:num-seq_len]),
    batch_size=batch_size, shuffle=True)  # 构建数据加载器

In [None]:
# 定义超参数
input_size = 1
output_size = 1
num_hiddens = 64
n_layers = 2
lr = 0.001


# 建立模型
model = DRNN(input_size, output_size, num_hiddens, n_layers)
criterion = nn.MSELoss(reduction='none')
trainer = torch.optim.Adam(model.parameters(), lr)

In [None]:
# 训练轮次
num_epochs = 20
rnn_loss_history = []

for epoch in tqdm(range(num_epochs)):
    # 批量训练
    for X, Y in train_loader:
        trainer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred.squeeze(), Y.squeeze())
        loss.sum().backward()
        trainer.step()
    # 输出损失
    with torch.no_grad():
        total_loss = 0
        for X, Y in train_loader:
            y_pred = model(X)
            loss = criterion(y_pred.squeeze(), Y.squeeze())
            total_loss += loss.sum()/loss.numel()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}: Validation loss = {avg_loss:.4f}')
        rnn_loss_history.append(avg_loss)
    

In [None]:
import matplotlib.pyplot as plt
# plt.plot(loss_history, label='loss')
plt.plot(rnn_loss_history, label='RNN_loss')
plt.legend()
plt.show()

In [None]:
rnn_preds = model(X_feature.unsqueeze(2))
rnn_preds.squeeze()
time = torch.arange(1, num+1, dtype= torch.float32)  # 时间轴

plt.plot(time[:num-seq_len], x[seq_len:num], label='dji')
# plt.plot(time[:num-seq_len], preds.detach().numpy(), label='preds')
plt.plot(time[:num-seq_len], rnn_preds[:,seq_len-1].detach(), label='RNN_preds')
plt.legend()
plt.show()

In [None]:
# 定义超参数
input_size = 1
output_size = 1
num_hiddens = 64
n_layers = 2
lr = 0.001


# 建立模型
model_name = ['DRNN', 'BRNN', 'LSTM', 'GRU']
drnn = DRNN(input_size, output_size, num_hiddens, n_layers)
brnn = BRNN(input_size, output_size, num_hiddens, n_layers)
lstm = LSTM(input_size, output_size, num_hiddens, n_layers)
gru = GRU(input_size, output_size, num_hiddens, n_layers)
models = [drnn, brnn, lstm, gru]

opts = [torch.optim.Adam(drnn.parameters(), lr), 
            torch.optim.Adam(brnn.parameters(), lr), 
            torch.optim.Adam(lstm.parameters(), lr), 
            torch.optim.Adam(gru.parameters(), lr)]
criterion = nn.MSELoss(reduction='none')

num_epochs = 20
rnn_loss_history = []
lr = 0.1
for epoch in tqdm(range(num_epochs)):
    # 批量训练
    for X, Y in train_loader:
        for index, model, optimizer in zip(range(len(models)), models, opts):
            y_pred = model(X)
            loss = criterion(y_pred.squeeze(), Y.squeeze())
            trainer.zero_grad()
            loss.sum().backward()
            trainer.step()

In [None]:
for i in range(4):
    rnn_preds = models[i](X_feature.unsqueeze(2))
    bias = torch.sum(x[seq_len:num] - rnn_preds[:,seq_len-1].detach().numpy())
    print ('{} bias : {}'.format(model_name[i],str(bias)))