In [None]:
#PyTorch用的包
import torch
import torch.nn as nn
import torch.optim
from torch.autograd import Variable
import pandas as pd
import os
import torch.utils.data as DataSet


#绘图、计算用的程序包
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc

import numpy as np
#将图形直接显示出来
%matplotlib inline



from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# 导入程序所需要的程序包

In [None]:
data = pd.read_excel('10.xls',sheet_name="监测数据报表",skiprows=2,parse_dates=['监测时间'])  #读取excel表
data.drop(['备注','污水排口监控点排放量(吨)','Unnamed: 6','Unnamed: 7','Unnamed: 8','Unnamed: 9'],axis=1,inplace=True)   #删除NaN列
data.drop(0,inplace=True)
data = data.set_index(['监测时间'])
data.drop(index=(data.loc[(data['PH值']=='停运')].index),inplace=True)

In [None]:
#显示函数
#解决中文显示问题
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def plot_prediction(test_result,predict_result):
    """
    test_result:真实值
    predict_result:预测值
    """
    plt.plot(test_result,color ='red',label = 'water element true value')
    plt.plot(predict_result,color ='blue',label = 'water element predicted value')
    plt.title("water element values")
    plt.xlabel("time")
    plt.ylabel("element values")
    plt.legend()
    plt.show()

In [None]:
data['化学需氧量(COD)(毫克/升)'] = data['化学需氧量(COD)(毫克/升)'].astype('float')
data['氨氮(毫克/升)'] = data['氨氮(毫克/升)'].astype('float')
data['PH值'] = data['PH值'].astype('float')

In [None]:
#绘制训练集测试集数据
data['PH值'][:'2020-06'].plot(figsize=(16,4),legend = True)
data['PH值']['2020-07':].plot(figsize=(16,4),legend = True)
data['化学需氧量(COD)(毫克/升)'][:'2020-06'].plot(figsize=(16,4),legend = True)
data['化学需氧量(COD)(毫克/升)']['2020-07':].plot(figsize=(16,4),legend = True)
data['氨氮(毫克/升)'][:'2020-06'].plot(figsize=(16,4),legend = True)
data['氨氮(毫克/升)']['2020-07':].plot(figsize=(16,4),legend = True)

plt.title("water element values")

# plt.legend("train","test")

In [None]:
# #归一化
scaler =  StandardScaler() # 然后生成一个标准化对象
train_set_scaled = scaler.fit_transform(data)  #然后对data数据进行转换

In [None]:
# 生成训练集和校验集
X = []
Y = []
# 首先，按照预测的模式，我们将原始数据生成一对一对的训练数据
n_prev = 7*24 # 滑动窗口长度为30

# 对数据中的所有数据进行循环
for i in range(len(train_set_scaled)-n_prev):
    # 往后取n_prev个note作为输入属性
    x = train_set_scaled[i:i+n_prev]
    # 将第n_prev+1个note（编码前）作为目标属性
    y = train_set_scaled[i+n_prev]
    
    # 将X和Y加入到数据集中
    X.append(x)
    Y.append(y)



# 对所有数据顺序打乱重排
idx = np.random.permutation(range(len(X)))
# 形成训练与校验数据集列表
X = [X[i] for i in idx]
Y = [Y[i] for i in idx]

# 从中切分1/10的数据出来放入校验集
validX = X[: len(X) // 10]
X = X[len(X) // 10 :]
validY = Y[: len(Y) // 10]
Y = Y[len(Y) // 10 :]

In [None]:

# 测试数据集的加载器，自动将数据切分成批，顺序随机打乱
test_data=torch.utils.data.DataLoader(dataset=validX,
                         batch_size=24,
                         shuffle=False)

test_label=torch.utils.data.DataLoader(dataset=validY,
                         batch_size=24,
                         shuffle=False)


# 训练数据集的加载器，自动将数据切分成批，顺序随机打乱
train_data=torch.utils.data.DataLoader(dataset=X,
                         batch_size=24,
                         shuffle=False)

# 训练数据集的加载器，自动将数据切分成批，顺序随机打乱
train_label=torch.utils.data.DataLoader(dataset=Y,
                         batch_size=24,
                         shuffle=False)


# 形成训练集

### input_dim = 3   #输入维度
### hidden_dim = 128 #隐层的维度
### layer_dim = 1   #多少层隐层
### output_dim = 3   #输出维度

In [None]:
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, out_size, n_layers=2):
        super(LSTMNetwork, self).__init__()
        self.n_layers = n_layers
        
        self.hidden_size = hidden_size
        self.out_size = out_size
        # 一层LSTM单元
        self.lstm = nn.LSTM(input_size, hidden_size, n_layers, batch_first = True)
        # 一个Dropout部件，以0.2的概率Dropout
        self.dropout = nn.Dropout(0.2)
        # 一个全链接层
        self.fc = nn.Linear(hidden_size, out_size)
        # 对数Softmax层
        self.softmax = nn.LogSoftmax(dim = 1)

    def forward(self, input, hidden=None):
        # 神经网络的每一步运算

        hhh1 = hidden[0] #读如隐含层的初始信息
        # 完成一步LSTM运算
        # input的尺寸为： batch_size ,day(1), input_size
        output, hhh1 = self.lstm(input, hhh1) #input:batchsize*timestep*3
        # 对神经元输出的结果进行dropout
        output = self.dropout(output)
        # 取出最后一个时刻的隐含层输出值
        # output的尺寸为：batch_size, time_step, hidden_size
        output = output[:, -1, ...]
        # 此时，output的尺寸为：batch_size, hidden_size
        # 喂入一个全链接层
        out = self.fc(output)
        # out的尺寸为：batch_size, output_size

        # 将out的最后一个维度分割成三份x, y, z分别对应对note，velocity以及time的预测
        
      
        return out

    def initHidden(self, batch_size):
        # 对隐含层单元变量全部初始化为0
        # 注意尺寸是： layer_size, batch_size, hidden_size
        out = []
        hidden1=torch.zeros(self.n_layers, batch_size, self.hidden_size).requires_grad_(True)
        cell1=torch.zeros(self.n_layers, batch_size, self.hidden_size).requires_grad_(True)
        out.append((hidden1, cell1))
        return out

In [None]:
# 定义一个LSTM，其中输入输出层的单元个数取决于每个变量的类型取值范围
lstm = LSTMNetwork(3,128,3)
optimizer = torch.optim.SGD(lstm.parameters(), lr=0.001)
num_epochs = 50
train_losses = []
test_losses = []
records = []

# 开始训练循环
for epoch in range(num_epochs):
    train_loss = []
    # 开始遍历加载器中的数据

    for batch, data in enumerate(zip(train_data,train_label)):
        init_hidden = lstm.initHidden(len(data[0]))
        x=Variable(torch.FloatTensor(data[0])).requires_grad_(True)
#         x=Variable(torch.FloatTensor(data[0])).requires_grad_(True).unsqueeze(0)
#         x=Variable(torch.FloatTensor(data[0])).requires_grad_(True).view(24,30,3)
        y=Variable(torch.FloatTensor(data[1]))
        # batch为数字，表示已经进行了第几个batch了
        # data为一个二元组，分别存储了一条数据记录的输入和标签
        # 每个数据的第一个维度都是batch_size = 30的数组
        
        lstm.train() # 标志LSTM当前处于训练阶段，Dropout开始起作用
#         init_hidden = lstm.initHidden(24) # 初始化LSTM的隐单元变量
        optimizer.zero_grad()


        outputs = lstm(x, init_hidden) #喂入LSTM，产生输出outputs

        Mse = nn.MSELoss()
        loss = Mse(outputs,y) #代入损失函数并产生loss
#         print(loss.data.numpy())
        train_loss.append(loss.data.numpy()) # 记录loss
        loss.backward() #反向传播
        optimizer.step() #梯度更新
    if epoch % 2==0:
        #在校验集上跑一遍，并计算在校验集上的分类准确率
        test_loss = []
        lstm.eval() #将模型标志为测试状态，关闭dropout的作用
        rights = []
        # 遍历加载器加载进来的每一个元素
        
        for batch, data in enumerate(zip(test_data,test_label)):
#             init_hidden = lstm.initHidden(24)
            init_hidden = lstm.initHidden(len(data[0]))
            #完成LSTM的计算
            x=Variable(torch.FloatTensor(data[0])).requires_grad_(True)
            y=Variable(torch.FloatTensor(data[1]))
            outputs = lstm(x, init_hidden)
            #outputs: (batch_size*89, batch_size*128, batch_size*11)
            #Loss=nn.L1Loss(size_average=True)
            Mse = nn.MSELoss()
            loss = Mse(outputs,y) #代入损失函数并产生loss #代入损失函数并产生lo
            test_loss.append(loss.data.numpy())
            #计算每个指标的分类准确度
            
            right1 = np.round(outputs[:,0].data,4).eq(np.round(y[:,0].data,4)).sum()
            right2 = np.round(outputs[:,1].data,4).eq(np.round(y[:,1].data,4)).sum()
            right3 = np.round(outputs[:,2].data,4).eq(np.round(y[:,2].data,4)).sum()
            rights=((right1 + right2 + right3) * 1.0 )/ (3*len(outputs))
        # 打印结果
        print('第{}轮, 训练Loss:{:.2f}, 校验Loss:{:.2f}, 校验准确度:{:.2f}'.format(epoch, 
                                                                    np.mean(train_loss),
                                                                    np.mean(test_loss),
                                                                    rights
                                                               ))
        records.append([np.mean(train_loss), np.mean(test_loss), rights])

In [None]:
torch.save(lstm,'minst_conv_checkpoint')