In [1]:
1

1

In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import LSTM, GRU, LSTMCell
import os

# 一次完整的迭代
def train_once(model, criterion, opt, x, y_true):
    """对模型进行一次迭代
    :param model: 实例化后的模型
    :param criterion: 损失函数
    :param opt: 优化算法
    :param x:
    :param y:
    """
    opt.zero_grad(set_to_none=True)  # 设置梯度为None节省内存
    y_pred = model(x)
    loss = criterion(y_pred, y_true)
    loss.backward()
    opt.step()
    return y_pred.detach().cpu().numpy(), loss.item()


def test_once(model, criterion, x, y_true):
    """进行一次测试，阻止计算图追踪,节省内存，加快速度
    :param model:
    :param criterion:
    :param opt:
    :param x:
    :param y_true:
    """
    with torch.no_grad():
        y_pred = model(x)
        loss = criterion(y_pred, y_true)
        return y_pred.detach().cpu().numpy(), loss.item()


def plotloss(train_loss_list, test_loss_list, is_loss=True):
    plt.figure(figsize=(10, 7))
    plt.plot(train_loss_list, color='red', label='Train_loss' if is_loss else 'pred_values')
    plt.plot(test_loss_list, color='blue', label='Test_loss' if is_loss else 'true_values')
    plt.xlabel('Epochs')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
    if not os.path.exists('./pic'):
        os.mkdir('./pic')
    plt.savefig('./pic/loss.png' if is_loss else './picpred_true.png', dpi=400)



# 定义提前停止损失函数
class EarlyStopping():
    """
    在测试集上的损失连续几个epochs不在降低时，提前停止
    """
    def __init__(self, patiende=5, tol=0.000005):
        """
        :param patiende:连续 patiende个epoch上损失不再降低，停止迭代
        :param tol: 当前损失和旧损失的差值小于tol，就认定为模型不再提升
        """
        self.patience = patiende
        self.tol = tol
        self.counter = 0
        self.lowest_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if not self.lowest_loss:
            self.lowest_loss = val_loss
        elif self.lowest_loss - val_loss > self.tol:
            self.counter = 0
            self.lowest_loss = val_loss
        elif self.lowest_loss - val_loss <= self.tol:
            self.counter += 1
            print(f"\t NOTICE: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print("\t NOTICE: Early Stopping Actived")
                self.early_stop = True

        return self.early_stop


# %%
# 训练与测试的函数
def fit(model, batch_train, batch_test, criterion, optimizer, scheduler, epochs, patiende=5, tol=0.000005,
        save_model_path='./model/', model_name='lstm'):
    """
    对模型进行训练，在每个epoch上监控模型训练效果
    :param model:
    :param batchdata:
    :param testdata:
    :param criterion:
    :param optimizer:
    :param epochs:
    :param tol:
    :param model_name:
    :param path:
    :return:
    """
    if not os.path.exists(save_model_path):
        os.mkdir(save_model_path)
    all_train_num = len(batch_train.dataset)
    all_test_num = len(batch_test.dataset)
    print("开始训练..............................")
    print("\tall_train_num:", all_train_num, "\tall_test_num", all_test_num)
    train_loss_list = []
    test_loss_list = []
    early_stopping = EarlyStopping(patiende, tol)
    best_score = None
    best_epoch = 0
    bset_epoch_predictions, bset_epoch_true_values = [], []

    for epoch in range(1, epochs + 1):
        # 训练
        model.train()
        train_num = 0
        loss_train = 0
        for idx, (x, y) in enumerate(batch_train, 1):
            _, loss = train_once(model, criterion, optimizer, x, y)
            loss_train += loss * x.size(0)
            train_num += x.shape[0]
            # # 监控训练过程
            # if idx % 100 == 0:
            #     print(f"Epoch:{epoch}, train_num:{train_num}")

        loss_train = loss_train / all_train_num
        train_loss_list.append(loss_train)

        model.eval()
        epoch_predictions = []
        epoch_true_values = []

        loss_test = 0
        for x, y in batch_test:
            y_pred, loss = test_once(model, criterion, x, y)
            loss_test += loss * x.size(0)
            epoch_predictions.append(y_pred)
            epoch_true_values.append(y.numpy())

        epoch_predictions = np.concatenate(epoch_predictions, axis=0)
        epoch_true_values = np.concatenate(epoch_true_values, axis=0)
        rmse = np.sqrt(np.mean((epoch_predictions - epoch_true_values) ** 2))

        loss_test = loss_test / all_test_num
        plot_num = 2000
        test_loss_list.append(loss_test)
        # 对每一个epoch,打印训练和测试结果
        print(f"Epoch:{epoch}, Train_loss:{round(loss_train, 5)}, Test_loss:{round(loss_test, 5)}", "Rmse:", rmse)
        scheduler.step(loss_test)

        # 对每一个epoch，保存分数最高的权重  这里用loss做评价指标
        if not best_score or best_score > rmse:
            best_epoch = epoch
            best_score = rmse
            torch.save(model.state_dict(), os.path.join(save_model_path, 'best_' + str(epoch) + model_name + '.pt'))
            bset_epoch_predictions, bset_epoch_true_values = list(epoch_predictions.reshape(-1))[:plot_num], list(epoch_true_values.reshape(-1))[:plot_num]

        early_stop = early_stopping(loss_test)
        if early_stop: break

    print("\tBest_epoch", best_epoch, "\tBest_loss:", best_score)
    print("Done")

    print("bset_epoch_predictions:", bset_epoch_predictions, "\nbset_epoch_true_values:",bset_epoch_true_values)
    plotloss(bset_epoch_predictions, bset_epoch_true_values, is_loss=False)
    plotloss(train_loss_list, test_loss_list)



# 模型验证
def MSE(Y_ture, Y_predict):
    return (((Y_ture - Y_predict) ** 2).sum() / Y_ture.shape[0])**0.5

In [52]:
import random
import numpy as np
from torchinfo import summary
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import LSTM, GRU, LSTMCell
# 定义优化器和损失函数
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch import optim
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import os


# 数据处理
def load_data(topn=None):
    # 创建包含数据的字典
    path = 'F:/learning/nlp_base_learning/data/A榜-训练集_海上风电预测_气象变量及实际功率数据.csv'
    path = 'F:/learning/power_prediction/ans/test_0.047370373032452714.csv'

    df = pd.read_csv(path,encoding='utf-8',nrows=topn)
    df = df[df['站点编号'] == 'f1']
    df['时间'] = pd.to_datetime(df['时间'])

    df['mooth'] = df['时间'].dt.month
    df['hour'] = df['时间'].dt.hour

    cols = ['气压(Pa）', '相对湿度（%）', '云量', '10米风速（10m/s）', '10米风向（°)',
            '温度（K）', '辐照强度（J/m2）', '降水（m）', '100m风速（100m/s）', '100m风向（°)', '出力(MW)',
            'mooth', 'hour']
    df = df[df['出力(MW)'] != '<NULL>']
    df = df[cols]
    df['v^3'] = df['100m风速（100m/s）'].apply(lambda x:x*x*x)
    df['1/tmp'] = df['温度（K）'].apply(lambda x:1/(x+0.0001))
    df['new'] = df['v^3']*df['气压(Pa）']*df['1/tmp']

    for col in df.columns:
        df[col] = df[col].astype('float32')
    print(df.columns)
    # 打印输出整理后的数据框
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    cols = df.columns.tolist()
    df = scaler.fit_transform(df.values)
    return pd.DataFrame(df,columns=cols)


def creat_dataset(data, seq_len=96, label=None):
    """用特征对标签进行预测，只考虑特征之间的时序特性，没有利用标签之间的时序特性，进行建模的数据预处理方式
    :param data: 训练数据
    :param seq_len: 时间步
    :param label: 预测的标签名
    :return: 
    """
    feats = [feat for feat in data.columns if feat != label] if label else list(data.columns)
    train_data, target = [], []
    print('总数：',len(data),'步长：',seq_len)
    for i in tqdm(range(0, len(data) - seq_len)):
        x = data[i:i + seq_len][feats].values
        y = data.iloc[i + seq_len][label] if label else []
        train_data.append(x)
        target.append(y)
    return torch.tensor(np.array(train_data)), torch.tensor(np.array(target)).view(len(target), 1)


def train_test_split(train_tensor, label_tensor, ratio=0.8):
    train_size = int(len(train_tensor) * ratio)
    x_train, y_train = train_tensor[:train_size], label_tensor[:train_size]
    x_test, y_test = train_tensor[train_size:], label_tensor[train_size:]
    return x_train, y_train, x_test, y_test



In [54]:
df = load_data(100)

Index(['气压(Pa）', '相对湿度（%）', '云量', '10米风速（10m/s）', '10米风向（°)', '温度（K）',
       '辐照强度（J/m2）', '降水（m）', '100m风速（100m/s）', '100m风向（°)', '出力(MW)', 'mooth',
       'hour', 'v^3', '1/tmp', 'new'],
      dtype='object')


In [33]:
import torch
from torch import nn
from torch.nn import LSTM, GRU, LSTMCell

# 定义网路架构LSTM
# input_size   hidden_size  num_layers out_size
class LstmModel(nn.Module):
    def __init__(self,input_size=12, hidden_size=50, num_layers=1, out_size=1, bidirectional=False, batch_first=True):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.bidirectional = 2 if bidirectional else 1
        self.lstm = LSTM(input_size,hidden_size,num_layers,batch_first=batch_first, bidirectional=bidirectional)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(self.bidirectional*hidden_size, out_size)

    def forward(self,x):
        h0 = torch.randn(self.bidirectional*self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        c0 = torch.randn(self.bidirectional*self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        output, (_, _) = self.lstm(x, (h0.detach(), c0.detach()))
        # output = output.contiguous().view(x.size(0), x.size(1), 2, self.hidden_size)
        # output = torch.mean(output,dim=2)
        # print(output.size())
        output = self.dropout(output)
        return self.fc(output[:,-1,:])



# 构建双层LSTMcell
# input_size, hidden_size, num_layers, output_size
class LstmCellModel(nn.Module):
    def __init__(self, input_size=1, hidden_size1=100, hidden_size2=50, output_size=1, dropout=0.1):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size
        self.lstm0 = nn.LSTMCell(input_size, hidden_size1)
        self.lstm1 = nn.LSTMCell(hidden_size1, hidden_size2)
        self.fc = nn.Linear(hidden_size2, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        batch_size, seq_len = x.size(0), x.size(1)
        # 四个初始化参数
        h_10 = torch.randon(batch_size, seq_len, self.hidden_size1).requires_grad_()
        c_10 = torch.randon(batch_size, seq_len, self.hidden_size1).requires_grad_()
        h_11 = torch.randon(batch_size, seq_len, self.hidden_size2).requires_grad_()
        c_11 = torch.randon(batch_size, seq_len, self.hidden_size2).requires_grad_()

        outputs = []
        for t in range(seq_len):  # 遍历每个时间步
            h_10, c_10 = self.lstm0(x[:, t, :], (h_10, c_10))
            h_10, c_10 = self.dropout(h_10), self.dropout(c_10)
            h_11, c_11 = self.lstm1(h_10, (h_11, c_11))
            h_11, c_11 = self.dropout(h_11), self.dropout(c_11)

            outputs.append(h_11)

        return self.fc(outputs[-1])

In [34]:
import random
import numpy as np
from torchinfo import summary
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import LSTM, GRU, LSTMCell
# 定义优化器和损失函数
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch import optim
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import os
from sklearn.preprocessing import MinMaxScaler

In [35]:
# 设置全局的随机种子
torch.manual_seed(1412)
random.seed(1412)
np.random.seed(1412)
# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 超参数设置
input_size = 12
hidden_size = 256
num_layers = 2
output_size = 1
learning_rate = 0.1
weight_decay = 1e-4
num_epochs = 30
batch_size= 30

label = '出力(MW)'
seq_len = 96

In [55]:
df = load_data(100)
input_size = len(df.columns) - 1

Index(['气压(Pa）', '相对湿度（%）', '云量', '10米风速（10m/s）', '10米风向（°)', '温度（K）',
       '辐照强度（J/m2）', '降水（m）', '100m风速（100m/s）', '100m风向（°)', '出力(MW)', 'mooth',
       'hour', 'v^3', '1/tmp', 'new'],
      dtype='object')


In [56]:
train_tensor, label_tensor = creat_dataset(df, seq_len, label=label)

总数： 100 步长： 96


100%|██████████| 4/4 [00:00<00:00, 445.60it/s]


In [57]:
train_tensor.shape,label_tensor.shape 

(torch.Size([4, 96, 15]), torch.Size([4, 1]))

In [58]:
label_tensor

tensor([[0.7703],
        [0.7806],
        [1.0000],
        [0.9557]])

In [47]:
model = LstmModel(input_size, hidden_size, num_layers,output_size)

In [48]:
output = model(train_tensor)

In [49]:
output.shape

torch.Size([4, 1])

In [29]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# 将数据进行归一化
df = scaler.fit_transform(df.values)


array([[0.9933777 , 1.0000001 , 0.12672344, ..., 0.9452559 , 0.2447815 ,
        0.9429912 ],
       [1.        , 0.9971732 , 0.01498605, ..., 0.96645725, 0.33702087,
        0.9649913 ],
       [0.9906006 , 0.98988354, 0.04880779, ..., 0.9851136 , 0.41423035,
        0.98427457],
       ...,
       [0.61782837, 0.        , 0.276691  , ..., 0.64154255, 0.01063538,
        0.6375604 ],
       [0.6145325 , 0.0038619 , 0.63991886, ..., 0.6635642 , 0.01457214,
        0.65945977],
       [0.60543823, 0.04548311, 1.        , ..., 0.6851301 , 0.02381897,
        0.6809249 ]], dtype=float32)

In [28]:
df 

Unnamed: 0,气压(Pa）,相对湿度（%）,云量,10米风速（10m/s）,10米风向（°),温度（K）,辐照强度（J/m2）,降水（m）,100m风速（100m/s）,100m风向（°),出力(MW),mooth,hour,v^3,1/tmp,new
0,0.989288,0.704100,0.007812,0.501334,0.073666,0.936529,0.0,0.000051,0.465858,0.076415,0.388476,0.083333,0.000000,0.101102,0.962278,0.105563
1,0.989312,0.703176,0.000924,0.505687,0.065491,0.935858,0.0,0.000051,0.468700,0.068653,0.377673,0.083333,0.000000,0.102964,0.962967,0.107586
2,0.989278,0.700791,0.003009,0.509345,0.059848,0.935298,0.0,0.000051,0.471172,0.063179,0.365744,0.083333,0.000000,0.104602,0.963544,0.109360
3,0.989200,0.697440,0.011402,0.511720,0.056221,0.934832,0.0,0.000051,0.472722,0.059548,0.345937,0.083333,0.000000,0.105637,0.964025,0.110488
4,0.989088,0.693615,0.023438,0.512657,0.054131,0.934445,0.0,0.000051,0.473127,0.057338,0.352014,0.083333,0.043478,0.105909,0.964424,0.110806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.987937,0.445897,0.000000,0.388932,0.015082,0.938075,0.0,0.000011,0.410481,0.028425,0.267387,0.083333,1.000000,0.069164,0.960692,0.071998
96,0.987951,0.397702,0.000000,0.390514,0.011599,0.938197,0.0,0.000000,0.416190,0.025516,0.257259,0.083333,0.000000,0.072090,0.960567,0.075035
97,0.987955,0.376978,0.017058,0.391743,0.008970,0.938235,0.0,0.000000,0.420657,0.023387,0.214720,0.083333,0.000000,0.074436,0.960528,0.077475
98,0.987944,0.378242,0.039451,0.392986,0.007096,0.938206,0.0,0.000000,0.424269,0.021904,0.269638,0.083333,0.000000,0.076370,0.960558,0.079489
