In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

dataframe = pd.read_csv(r'confirmdatahk.csv', usecols=[1], engine='python')
dataset = dataframe.values
# 将整型变为float
dataset = dataset.astype('float32')

#标准化
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

#划分训练集与测试集
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

class LstmModel:
    def __init__(self,list,look_back):
        self.list=list
        self.look_back=look_back
        
    def create_dataset(self):
        dataX, dataY = [], []
        for i in range(len(self.list)-self.look_back):
            dataX.append(self.list[i:(i+look_back), 0])
            dataY.append(self.list[i+look_back, 0]) 
        return np.array(dataX),np.array(dataY)
        
train1 = LstmModel(list = train,look_back = 3)
trainX,trainY  = train1.create_dataset()
test1 = LstmModel(list = test,look_back = 3)
testX,testY  = test1.create_dataset()

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1] ,1 ))

#模型训练
model = Sequential()
model.add(LSTM(3, input_shape=(None,1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1)

#进行预测
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

#反归一化
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

#计算RMSE评价模型拟合和预测效果
trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2):len(dataset), :] = testPredict

#画图
fig = plt.subplots(1,1,figsize=(10,7),dpi=100)
plt.rcParams['font.sans-serif'] = 'KaiTi'# 正常显示中文
plt.plot(scaler.inverse_transform(dataset),color='blue', label="实际值")
plt.plot(trainPredictPlot, color='green',linestyle='--', label = '拟合值')
plt.plot(testPredictPlot,linestyle='--', color = 'red', label='预测值')
plt.legend()
plt.title("LSTM模型下香港2022年COVID-19确诊人数拟合与预测图")
plt.show()