In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 设置中文显示
plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei']
plt.rcParams['axes.unicode_minus'] = False

import math
import sklearn.metrics as skm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,RepeatVector,TimeDistributed


In [2]:

def split_dataset(data):
    '''
    该函数实现以日为单位切分训练数据和测试数据
    '''
    # train = dataset[23:35063]
    # test = dataset[35063:-2]
    train = dataset[1702:30982]
    test = dataset[30982:36718]
    train = np.array(np.split(train, len(train)/24)) # 将数据划分为按天为单位的数据
    test = np.array(np.split(test, len(test)/24))
    # train.shape
    # train.head(10)
    # test.tail(10)
    return train, test


def evaluate_forecasts(actual, predicted):
    #统计24小时所有rmse
    scores = list()
    MAE = list()
    for i in range(actual.shape[1]):
        mse = skm.mean_squared_error(actual[:, i], predicted[:, i])
        rmse = math.sqrt(mse)
        scores.append(rmse)
        x = skm.mean_absolute_error(actual[:, i], predicted[:, i])
        MAE.append(x)
    
    s = 0 # 计算总的 RMSE
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col]) ** 2
    score = math.sqrt(s / (actual.shape[0] * actual.shape[1]))
    print('actual.shape[0]:{}, actual.shape[1]:{}'.format(actual.shape[0], actual.shape[1]))
    return score, scores, MAE

def summarize_scores(name, score, scores,MAE):
    s_scores = ', '.join(['%.1f' % s for s in scores])
    print('%s: [%.3f] %s\n' % (name, score, s_scores))
    print(MAE)
    
def sliding_window(train, sw_width=24, n_out=24, in_start=0):
    
    data = train.reshape((train.shape[0] * train.shape[1], train.shape[2])) # 将以周为单位的样本展平为以天为单位的序列
    X, y = [], []
    
    for _ in range(len(data)):
        in_end = in_start + sw_width
        out_end = in_end + n_out
        
        # 保证截取样本完整，最大元素索引不超过原序列索引，则截取数据；否则丢弃该样本
        if out_end < len(data):
            # 训练数据以滑动步长1截取
            train_seq = data[in_start:in_end, 0]
            train_seq = train_seq.reshape((len(train_seq), 1))
            X.append(train_seq)
            y.append(data[in_end:out_end, 0])
        in_start += 1
        
    return np.array(X), np.array(y)

def lstm_model(train, sw_width, in_start=0, verbose_set=0, epochs_num=20, batch_size_set=4):
    '''
    该函数定义 Encoder-Decoder LSTM 模型
    '''
    train_x, train_y = sliding_window(train, sw_width, in_start=0)
    
    n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
    
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
    
    model = Sequential()
    model.add(LSTM(200, activation='relu',
                   input_shape=(n_timesteps, n_features)))
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(200, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(100, activation='relu')))
    model.add(TimeDistributed(Dense(1)))
    
    model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    
    model.fit(train_x, train_y,
              epochs=epochs_num, batch_size=batch_size_set, verbose=verbose_set)
    return model


def forecast(model, pred_seq, sw_width):
    '''
    该函数实现对输入数据的预测
    '''
    data = np.array(pred_seq)
    data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
    
    input_x = data[-sw_width:, 0] # 获取输入数据的最后一周的数据
    input_x = input_x.reshape((1, len(input_x), 1)) # 重塑形状[1, sw_width, 1]
    
    yhat = model.predict(input_x, verbose=0) # 预测下周数据
    yhat = yhat[0] # 获取预测向量
    return yhat

def evaluate_model(model, train, test, sd_width):
    '''
    该函数实现模型评估
    '''
    history_fore = [x for x in train]
    predictions = list() # 用于保存每周的前向验证结果；
    for i in range(len(test)):
        yhat_sequence = forecast(model, history_fore, sd_width) # 预测下周的数据
        predictions.append(yhat_sequence) # 保存预测结果
        history_fore.append(test[i, :]) # 得到真实的观察结果并添加到历史中以预测下周
    
    predictions = np.array(predictions) 
    score, scores, MAE= evaluate_forecasts(test[:, :, 0], predictions)
    return score, scores, MAE

def model_plot(score, scores, days, name):
    '''
    该函数实现绘制RMSE曲线图
    '''
    plt.figure(figsize=(8,6), dpi=150)
    plt.plot(days, scores, marker='o', label=name)
    plt.grid(linestyle='--', alpha=0.5)
    plt.ylabel(r'$RMSE$', size=15)
    plt.title('Enco-Deco_LSTM ',  size=18)
    plt.legend()
    plt.show()
    
# def main_run(dataset, sw_width, days, name, in_start, verbose, epochs, batch_size):
#     '''
#     主函数：数据处理、模型训练流程
#     '''
#     # 划分训练集和测试集
#     train, test = split_dataset(dataset.values)
#     # 训练模型
#     model = lstm_model(train, sw_width, in_start, verbose_set=0, epochs_num=20, batch_size_set=4)
#     # 计算RMSE
#     score, scores = evaluate_model(model, train, test, sw_width)
#     # 打印分数
#     summarize_scores(name, score, scores)
#     # 绘图
#     model_plot(score, scores, days, name)

    
if __name__ == '__main__':
    
    
    dataset = pd.read_csv('./LD_MT200_hour.csv', header=0,
                      low_memory=False, infer_datetime_format=True, engine='c', index_col=['date'])
    values = dataset.values.astype('float32')
    dataset['ELE_SUM'] = (values[:,1] + values[:,2] + values[:,3] + values[:,4] + values[:,5] + values[:,6])
    
    
    
    
    train, test = split_dataset(dataset.values)
    
    # hours = ['0','1','2','2','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23']
    name = 'en-de-lstm'
    
    sliding_window_width= 24
    input_sequence_start=0
    
    epochs_num=70
    batch_size_set=16
    verbose_set=0
    
    model = lstm_model(train, sliding_window_width, input_sequence_start, verbose_set=0, epochs_num=20, batch_size_set=4)
    test.shape
    score, scores, MAE = evaluate_model(model, train, test, sliding_window_width)
    
    
    # db = pymysql.connect(host="localhost", user="root", password="LQW1107@python", database="modeldata",charset="utf8")
    # cursor = db.cursor()
    # sql = "insert into modelinfo values = (" + name + ',' + score + ',' +MAE 
    # cursor.execute(sql)
    
    summarize_scores(name, score, scores, MAE)
    
    # main_run(dataset, sliding_window_width, days, name, input_sequence_start,
    #          verbose_set, epochs_num, batch_size_set)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 200)               161600    
                                                                 
 repeat_vector (RepeatVector  (None, 24, 200)          0         
 )                                                               
                                                                 
 lstm_1 (LSTM)               (None, 24, 200)           320800    
                                                                 
 time_distributed (TimeDistr  (None, 24, 100)          20100     
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 24, 1)            101       
 tributed)                                                       
                                                        