In [1]:
import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from tqdm import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler


In [3]:
data_dir = "./"
print(os.listdir(data_dir))

['使用LSTM和GRU进行数值预测.ipynb', '.ipynb_checkpoints', 'PJM_Load_hourly.csv', 'NI_hourly.csv', 'FE_hourly.csv', 'EKPC_hourly.csv', 'PJMW_hourly.csv', 'PJME_hourly.csv', 'est_hourly.paruqet', 'DEOK_hourly.csv', 'DUQ_hourly.csv', 'DOM_hourly.csv', 'COMED_hourly.csv', 'DAYTON_hourly.csv', 'AEP_hourly.csv', 'pjm_hourly_est.csv']


In [4]:

pd.read_csv('AEP_hourly.csv').head()

Unnamed: 0,Datetime,AEP_MW
0,2004-12-31 01:00:00,13478.0
1,2004-12-31 02:00:00,12865.0
2,2004-12-31 03:00:00,12577.0
3,2004-12-31 04:00:00,12517.0
4,2004-12-31 05:00:00,12670.0


- 我们共有12个.csv文件，其中包含小时能源趋势数据（'est_hourly.paruqet'和'pjm_hourly_est.csv' 这两个文件没啥用）
- 在接下来的步骤中，我们将按照以下顺序读取这些文件并预处理这些数据：

- 获取每个单独时间步的时间数据并将其归纳为：

  - 一天中的小时，即 0-23
  - 一周中的天数，即 1-7
  - 月份，即 1-12
  - 年中的天数，即 1-365

- 将数据缩放到0和1之间的值
  - 当特征在相对相似的范围内和/或接近正态分布时，算法往往表现更好或收敛更快
  - 缩放保留了原始分布的形状，不会降低异常值的重要性

- 将数据分组为模型输入的序列，并存储相应的标签：

  - 序列长度或回溯期是模型用来进行预测的历史数据点的数量
  - 标签将是输入序列中最后一个数据点之后的下一个时间点的数据
- 将输入和标签分割为训练和测试集

In [5]:
# 缩放器对象将存储在此字典中，以便在评估期间可以对模型的输出测试数据进行重新缩放
label_scalers = {}

train_x = []
test_x = {}
test_y = {}

# 使用tqdm_notebook遍历数据目录中的文件
for file in tqdm_notebook(os.listdir(data_dir)):
# 跳过我们不使用的文件
  if file[-4:] != ".csv" or file == "pjm_hourly_est.csv":
    continue
  # 将csv文件存储在Pandas DataFrame中
  df = pd.read_csv(data_dir + file, parse_dates=[0])
  # 将时间数据处理为合适的输入格式
  df['hour'] = df.apply(lambda x: x['Datetime'].hour, axis=1)
  df['dayofweek'] = df.apply(lambda x: x['Datetime'].dayofweek, axis=1)
  df['month'] = df.apply(lambda x: x['Datetime'].month, axis=1)
  df['dayofyear'] = df.apply(lambda x: x['Datetime'].dayofyear, axis=1)
  df = df.sort_values("Datetime").drop("Datetime", axis=1)

  # 缩放输入数据
  sc = MinMaxScaler()
  label_sc = MinMaxScaler()
  data = sc.fit_transform(df.values)
  # 获取标签（使用数据）的缩放，以便在评估期间可以将输出重新缩放为实际值
  label_sc.fit(df.iloc[:, 0].values.reshape(-1, 1))
  label_scalers[file] = label_sc

  # 定义回溯期并拆分输入/标签
  lookback = 90
  inputs = np.zeros((len(data) - lookback, lookback, df.shape[1]))
  labels = np.zeros(len(data) - lookback)

  for i in range(lookback, len(data)):
      inputs[i - lookback] = data[i - lookback:i]
      labels[i - lookback] = data[i, 0]
  inputs = inputs.reshape(-1, lookback, df.shape[1])
  labels = labels.reshape(-1, 1)

  # 将数据拆分为训练/测试部分，并将来自不同文件的所有数据合并到单个数组中
  test_portion = int(0.1 * len(inputs))
  if len(train_x) == 0:
      train_x = inputs[:-test_portion]
      train_y = labels[:-test_portion]
  else:
      train_x = np.concatenate((train_x, inputs[:-test_portion]))
      train_y = np.concatenate((train_y, labels[:-test_portion]))
  test_x[file] = (inputs[-test_portion:])
  test_y[file] = (labels[-test_portion:])




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for file in tqdm_notebook(os.listdir(data_dir)):


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




In [6]:
pd.read_csv('AEP_hourly.csv').head()

Unnamed: 0,Datetime,AEP_MW
0,2004-12-31 01:00:00,13478.0
1,2004-12-31 02:00:00,12865.0
2,2004-12-31 03:00:00,12577.0
3,2004-12-31 04:00:00,12517.0
4,2004-12-31 05:00:00,12670.0


我们共有980,185个训练数据序列

为了提高训练速度，可以分批处理数据，这样模型就不需要频繁地更新权重

In [7]:
batch_size = 1024

train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)

In [9]:
# torch.cuda.is_available() 检查并返回一个布尔值True，表示是否有可用的GPU，否则返回False
is_cuda = torch.cuda.is_available()

# 如果我们有可用的GPU，我们将把设备设置为GPU。稍后我们将在代码中使用这个设备变量
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
print(device)

cuda


定义GRU和LSTM模型的结构。
- 两种模型具有相同的结构，唯一的区别在于循环层（GRU/LSTM）以及隐藏状态的初始化。
- LSTM的隐藏状态是一个包含单元状态和隐藏状态的元组，而GRU只有一个隐藏状态

In [10]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
        super(GRUNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # 定义GRU层
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        # 定义全连接层
        self.fc = nn.Linear(hidden_dim, output_dim)
        # 定义ReLU激活函数
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        # 前向传播，GRU层输出结果和隐藏状态
        out, h = self.gru(x, h)
        # 将GRU层的输出传入全连接层
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        # 初始化隐藏状态
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        return hidden

class LSTMNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
        super(LSTMNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # 定义LSTM层
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        # 定义全连接层
        self.fc = nn.Linear(hidden_dim, output_dim)
        # 定义ReLU激活函数
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        # 前向传播，LSTM层输出结果和隐藏状态
        out, h = self.lstm(x, h)
        # 将LSTM层的输出传入全连接层
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        # 初始化隐藏状态和细胞状态
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        return hidden


- 下面的函数定义了训练过程，这样可以对两个模型进行重复操作。
- 两个模型在隐藏状态的维数、层数、训练周期数和学习率方面都是相同的，并在完全相同的数据集上进行训练和测试

- 为了比较两个模型的性能，我们还将跟踪模型的训练时间，并最终比较两个模型在测试集上的最终准确率。
- 作为准确度衡量标准，使用对称平均绝对百分比误差（sMAPE）来评估模型
- sMAPE是预测值与实际值之间的绝对差值之和除以预测值与实际值的平均值，从而得到一个百分比来衡量误差

In [None]:
def train(train_loader, learn_rate, hidden_dim=256, EPOCHS=5, model_type="GRU"):
    # 获取输入数据的维度
    input_dim = next(iter(train_loader))[0].shape[2]
    output_dim = 1
    n_layers = 2

    # 选择使用GRU还是LSTM模型
    if model_type == "GRU":
        model = GRUNet(input_dim, hidden_dim, output_dim, n_layers)
    else:
        model = LSTMNet(input_dim, hidden_dim, output_dim, n_layers)
    model.to(device)
    
    # 定义损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    
    model.train()
    print("Starting Training of {} model".format(model_type))
    epoch_times = []

    # 开始训练模型
    for epoch in range(1,EPOCHS+1):
        start_time = time.process_time()
        h = model.init_hidden(batch_size)
        avg_loss = 0.
        counter = 0
        for x, label in train_loader:
            counter += 1
            if model_type == "GRU":
                h = h.data
            else:
                h = tuple([e.data for e in h])
            model.zero_grad()
            
            # 前向传播
            out, h = model(x.to(device).float(), h)
            # 计算损失
            loss = criterion(out, label.to(device).float())
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()
            avg_loss += loss.item()
            if counter % 200 == 0:
                print("Epoch {}......Step: {}/{}....... Average Loss for Epoch: {}".format(epoch, counter, len(train_loader), avg_loss/counter))
        current_time = time.process_time()
        print("Epoch {}/{} Done, Total Loss: {}".format(epoch, EPOCHS, avg_loss/len(train_loader)))
        print("Time Elapsed for Epoch: {} seconds".format(str(current_time-start_time)))
        epoch_times.append(current_time-start_time)
    print("Total Training Time: {} seconds".format(str(sum(epoch_times))))
    return model

def evaluate(model, test_x, test_y, label_scalers):
    model.eval()
    outputs = []
    targets = []
    start_time = time.process_time()

    # 评估模型
    for i in test_x.keys():
        inp = torch.from_numpy(np.array(test_x[i]))
        labs = torch.from_numpy(np.array(test_y[i]))
        h = model.init_hidden(inp.shape[0])
        out, h = model(inp.to(device).float(), h)
        outputs.append(label_scalers[i].inverse_transform(out.cpu().detach().numpy()).reshape(-1))
        targets.append(label_scalers[i].inverse_transform(labs.numpy()).reshape(-1))
    print("Evaluation Time: {}".format(str(time.process_time()-start_time)))
    sMAPE = 0

    # 计算sMAPE
    for i in range(len(outputs)):
        sMAPE += np.mean(abs(outputs[i]-targets[i])/(targets[i]+outputs[i])/2)/len(outputs)
    print("sMAPE: {}%".format(sMAPE*100))
    return outputs, targets, sMAPE


In [14]:
lr = 0.001
gru_model = train(train_loader, lr, model_type="GRU")

Starting Training of GRU model
Epoch 1......Step: 200/957....... Average Loss for Epoch: 0.005567154412856326
Epoch 1......Step: 400/957....... Average Loss for Epoch: 0.0031107321189483626
Epoch 1......Step: 600/957....... Average Loss for Epoch: 0.0022110680652743515
Epoch 1......Step: 800/957....... Average Loss for Epoch: 0.0017360910079878523
Epoch 1/5 Done, Total Loss: 0.001492845536226795
Time Elapsed for Epoch: 207.51063615600003 seconds
Epoch 2......Step: 200/957....... Average Loss for Epoch: 0.00021200361974479164
Epoch 2......Step: 400/957....... Average Loss for Epoch: 0.00020321043080912205
Epoch 2......Step: 600/957....... Average Loss for Epoch: 0.000197148703809944
Epoch 2......Step: 800/957....... Average Loss for Epoch: 0.00019122636471365694
Epoch 2/5 Done, Total Loss: 0.0001862149163698651
Time Elapsed for Epoch: 206.89833430600004 seconds
Epoch 3......Step: 200/957....... Average Loss for Epoch: 0.000149296151685121
Epoch 3......Step: 400/957....... Average Loss f

In [None]:
lstm_model = train(train_loader, lr, model_type="LSTM")

In [None]:
gru_outputs, targets, gru_sMAPE = evaluate(gru_model, test_x, test_y, label_scalers)

In [None]:
lstm_outputs, targets, lstm_sMAPE = evaluate(lstm_model, test_x, test_y, label_scalers)

- 尽管LSTM模型可能产生较小的误差，并在性能准确性方面略胜于GRU模型，但这种差异并不显著，因此无法得出结论。
- 比较这两种模型的测试，但总体上并没有一个明确结果说明LSTM和GRU的孰优孰劣