# 机器学习作业 流感和新冠数据预测

## 流感数据 

In [45]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from scipy import optimize
import warnings
warnings.filterwarnings("ignore")

#读取ILI数据#流感数据

ilinet_full = pd.DataFrame(pd.read_csv('/ILINet.csv'))
ilinet_full_seasons =ilinet_full[0:1305] #舍弃最后一个不完全周期，这样数据全是完整的周期
data = ilinet_full_seasons['ILITOTAL'].values
weeks = ilinet_full_seasons['WEEK'].values[0:52]


In [46]:

import numpy as np
end_weeks=ilinet_full_seasons.index[ilinet_full_seasons['WEEK'] == 39].tolist()
start_weeks=ilinet_full_seasons.index[ilinet_full_seasons['WEEK'] == 40].tolist()

seasons=[]
for i in range(len(start_weeks)):
    c=start_weeks[i]
    seasons.append(data[c:c+52]) #有的年份53周，有的年份52周，我们每年都只取52周，保证数据格式一样
    
seasons=np.vstack(seasons)

## 新冠数据


In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data2 = pd.DataFrame(pd.read_csv('/train.csv'))
data2

Unnamed: 0,Id,County,Province_State,Country_Region,Population,Weight,Date,Target,TargetValue
0,1,,,Afghanistan,27657145,0.058359,2020-01-23,ConfirmedCases,0
1,2,,,Afghanistan,27657145,0.583587,2020-01-23,Fatalities,0
2,3,,,Afghanistan,27657145,0.058359,2020-01-24,ConfirmedCases,0
3,4,,,Afghanistan,27657145,0.583587,2020-01-24,Fatalities,0
4,5,,,Afghanistan,27657145,0.058359,2020-01-25,ConfirmedCases,0
5,6,,,Afghanistan,27657145,0.583587,2020-01-25,Fatalities,0
6,7,,,Afghanistan,27657145,0.058359,2020-01-26,ConfirmedCases,0
7,8,,,Afghanistan,27657145,0.583587,2020-01-26,Fatalities,0
8,9,,,Afghanistan,27657145,0.058359,2020-01-27,ConfirmedCases,0
9,10,,,Afghanistan,27657145,0.583587,2020-01-27,Fatalities,0


In [48]:
US_data = data2.loc[data2['Country_Region'] == 'US']
US_data_cases = US_data.loc[US_data['Target'] == 'ConfirmedCases']
US_data_cases = US_data_cases.loc[US_data_cases['Population'] == 324141489] #选择美国的数据进行训练与预测


In [49]:
states_train =['Alabama','Texas','Alaska', 'Arizona', 'Arkansas', 
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
       'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico',
       'North Carolina', 'North Dakota', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee',  'Utah', 'Vermont','Virginia', 'Washington', 'West Virginia',
       'Wisconsin', 'Wyoming']
states_test=['California','New York', 'Ohio']#用其他州去预测这三个周
df_train = pd.DataFrame(columns =['Date']+states_train)#columns = states
df_train["Date"] = US_data_cases["Date"].values

df_test = pd.DataFrame(columns =['Date']+states_test)#columns = states
df_test["Date"] = US_data_cases["Date"].values

In [50]:
train_data_as_array=[] #将需要的训练数据转换格式，并标准化
for state in states_train:
    state_data=US_data.loc[US_data['Province_State'] == state]
    state_pop = state_data['Population'].max()
    state_data_cases = state_data.loc[state_data['Target'] == 'ConfirmedCases']
    state_data_cases = state_data_cases.loc[state_data_cases['Population'] == state_pop]
    df_train[state]=state_data_cases['TargetValue'].values
    max_cases = df_train[state].max()
    min_cases = df_train[state].min()
    df_train[state]=(df_train[state]-min_cases)/(max_cases-min_cases)
    train_data_as_array.append(df_train[state].values)
train_data_as_array=np.vstack(train_data_as_array)

In [51]:
test_data_as_array=[] #将需要的测试数据转换格式，并标准化
for state in states_test:
    state_data=US_data.loc[US_data['Province_State'] == state]
    state_pop = state_data['Population'].max()
    state_data_cases = state_data.loc[state_data['Target'] == 'ConfirmedCases']
    state_data_cases = state_data_cases.loc[state_data_cases['Population'] == state_pop]
    df_test[state]=state_data_cases['TargetValue'].values
    max_cases = df_test[state].max()
    min_cases = df_test[state].min()
    df_test[state]=(df_test[state]-min_cases)/(max_cases-min_cases)
    test_data_as_array.append(df_test[state].values)
test_data_as_array=np.vstack(test_data_as_array)

# Seq2Seq实现对新冠数据45天的预测

In [52]:
import torch
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
maxval = seasons[0:-1].max()
minval = seasons[0:-1].min()
scaleddata = scaler.fit_transform(seasons.reshape(-1, 1)) #对数据进行标准化操作
full_data = torch.FloatTensor(scaleddata).view(-1)
train_data = full_data[:-52] #移除最后一周期数据

In [53]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")


latent_dim = 50 # LSTM hidden units
dropout = .20 

encoder_inputs = Input(shape=(None, 1)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, 1)) 

decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,initial_state=encoder_states)

decoder_dense = Dense(1) 
decoder_outputs = decoder_dense(decoder_outputs)

model_covid = Model([encoder_inputs, decoder_inputs], decoder_outputs) #创建模型

In [54]:
def create_encoder_target_pairs(data):
    e_data = []
    t_data = []
    if len(data)!=140: #每州数据的时间长度为140，因此如果data长140，是一个州的数据，否则是多个州数据，维度加1
        for i in range(len(data)):
            e_data.append(data[i][0:-45])
            t_data.append(data[i][-45:])
    else:
        e_data.append(data[0:-45])
        t_data.append(data[-45:])
    return [np.array(e_data),np.array(t_data)]

In [55]:
[e_data,t_data]=create_encoder_target_pairs(train_data_as_array)

In [56]:
def transform_series_encode(series_array): #对数据进行标准化和数据结构的修改
    series_mean = series_array.mean(axis=1).reshape(-1,1) 
    series_array = series_array.reshape((series_array.shape[0],series_array.shape[1], 1))
    
    return series_array, series_mean

def transform_series_decode(series_array, encode_series_mean):
    series_array = series_array - np.zeros([len(encode_series_mean),1])#encode_series_mean
    series_array = series_array.reshape((series_array.shape[0],series_array.shape[1], 1))
    
    return series_array

In [57]:
batch_size = 2**11
epochs = 100

encoder_input_data = e_data
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

decoder_target_data = t_data
                                            
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[:,1:,0] = decoder_target_data[:,:-1,0]
decoder_input_data[:,0,0] = encoder_input_data[:,-1,0]

model_covid.compile(Adam(), loss='mean_absolute_error') #设置模型优化器
history = model_covid.fit([encoder_input_data, decoder_input_data], decoder_target_data,
                     batch_size=batch_size,
                     epochs=epochs,
                     validation_split=0.2,verbose=0);

In [58]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error Loss')
plt.title('Loss Over Time')
plt.legend(['Train','Valid'])
plt.show()

In [59]:
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

def decode_sequence(input_seq,pred_steps):
    
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, 1))
    target_seq[0, 0, 0] = input_seq[0, -1, 0]

    decoded_seq = np.zeros((1,pred_steps,1))
    
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,0] = output[0,0,0]

        target_seq = np.zeros((1, 1, 1))
        target_seq[0, 0, 0] = output[0,0,0]

        states_value = [h, c]

    return decoded_seq

In [60]:
def get_cumulative_data(data):
    c_data = np.zeros((len(data),1))
    c_data[0]=data[0]
    for i in range(1,len(data)):
        c_data[i]=c_data[i-1]+data[i]
    return c_data

def get_cumulative_data_preds(edata, tdata):
    c_edata = get_cumulative_data(edata)
    c_data = np.zeros((len(tdata),1))
    c_data[0] = tdata[0]+c_edata[-1]
    for i in range(1,len(tdata)):
        c_data[i]=c_data[i-1]+tdata[i]
    return c_data
def get_rolling_average(edata, tdata, numsteps):
    c = 0
    pred_steps=numsteps
    rolling_preds=[edata[-1][-1]]
    for i in range(len(tdata[0])):
        encoder_input_data = np.array([np.append(edata[0],tdata[0][0:c])])
        encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)
    
        decoder_target_data = [t_data[0][c:c+pred_steps]]
        decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)
    
        rolling_preds= np.append(rolling_preds,np.mean(decode_sequence(encoder_input_data,pred_steps)))
        c=c+1
    target = np.append([edata[-1][-1]],tdata[0])
    rolling_data = pd.Series(target).rolling(window = numsteps)
    rolling_mean = rolling_data.mean()
    return rolling_preds, target, rolling_mean

In [61]:
def predict_and_plot(e_data, t_data, enc_tail_len=len(e_data[0])):
    pred_steps=len(t_data[0])
    encoder_input_data = e_data
    encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

    decoder_target_data = t_data
    decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)
    
    encode_series = encoder_input_data
    pred_series = decode_sequence(encode_series,pred_steps)
    
    encode_series = encode_series.reshape(-1,1)
    pred_series = pred_series.reshape(-1,1)   
    
    target_series = decoder_target_data.reshape(-1,1) 
    target_series =np.concatenate([encode_series[-1:],target_series])
    
    encode_series_tail = encode_series[-enc_tail_len:]
    x_encode = encode_series_tail.shape[0]
    
    #to show continuity
    preds = np.zeros((pred_steps+1,1))
    preds[0]=encode_series_tail[-1]
    preds[1:]=pred_series
    
    
    plt.figure(figsize=(20,6))  
    #plt.subplot(1, 2, 1)  
    plt.plot(range(x_encode,x_encode+pred_steps+1),target_series,color='green')
    plt.plot(range(x_encode,x_encode+pred_steps+1),preds,color='red',linestyle='--')
    plt.plot(range(1,x_encode+1),encode_series_tail, color='black')
    plt.legend(['Target Series','Predictions'])
    
    [preds_rolling, target, rolling_mean]=get_rolling_average(e_data, t_data, 7)
    #plt.subplot(1, 2, 2) 
    plt.plot(range(x_encode,x_encode+pred_steps+1),target,color='green', label = 'Target Series')
    #plt.plot(range(x_encode,x_encode+pred_steps+1),preds_rolling,color='red',linestyle='--',label = '7-Day Moving Average Predictions' )
   # plt.plot(range(x_encode,x_encode+pred_steps+1),rolling_mean,color='m', label='7-Day Moving Average Data')
    plt.legend()
    plt.plot(range(1,x_encode+1),encode_series_tail, color='black')
    return pred_series


In [62]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[0])
preds= predict_and_plot(e_data,t_data)
plt.suptitle("California")
plt.show()

In [63]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[1])
preds= predict_and_plot(e_data,t_data)
plt.suptitle('New York')
plt.show()

In [64]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[2])
preds= predict_and_plot(e_data,t_data)
plt.suptitle('Ohio')
plt.show()

# 加1： 流感数据LSTM和Seq2Seq对比

In [65]:
def predict_and_plot(e_data, t_data, enc_tail_len=len(e_data[0])):
    pred_steps=len(t_data[0])
    encoder_input_data = e_data
    encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

    decoder_target_data = t_data
    decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)
    
    encode_series = encoder_input_data
    pred_series = decode_sequence(encode_series,pred_steps)
    
    encode_series = encode_series.reshape(-1,1)
    pred_series = pred_series.reshape(-1,1)   
    
    target_series = decoder_target_data.reshape(-1,1) 
    target_series =np.concatenate([encode_series[-1:],target_series])
    
    encode_series_tail = encode_series[-enc_tail_len:]
    x_encode = encode_series_tail.shape[0]
    
    #to show continuity
    preds = np.zeros((pred_steps+1,1))
    preds[0]=encode_series_tail[-1]
    preds[1:]=pred_series
    
    
    plt.figure(figsize=(20,6))  
    plt.subplot(1, 2, 1)  
    plt.plot(range(x_encode,x_encode+pred_steps+1),target_series,color='green')
    plt.plot(range(x_encode,x_encode+pred_steps+1),preds,color='red',linestyle='--')
    plt.plot(range(1,x_encode+1),encode_series_tail, color='black')
    plt.legend(['Target Series','Predictions'])
    
    [preds_rolling, target, rolling_mean]=get_rolling_average(e_data, t_data, 7)
    plt.subplot(1, 2, 2) 
    plt.plot(range(x_encode,x_encode+pred_steps+1),target,color='green', label = 'Target Series')
    plt.plot(range(x_encode,x_encode+pred_steps+1),preds_rolling,color='red',linestyle='--',label = '7-Day Moving Average Predictions' )
    plt.plot(range(x_encode,x_encode+pred_steps+1),rolling_mean,color='m', label='7-Day Moving Average Data')
    plt.legend()
    plt.plot(range(1,x_encode+1),encode_series_tail, color='black')
    return pred_series


In [66]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[0])
preds= predict_and_plot(e_data,t_data)
plt.suptitle("California")
plt.show()

In [67]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[1])
preds= predict_and_plot(e_data,t_data)
plt.suptitle('New York')
plt.show()

In [68]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array[2])
preds= predict_and_plot(e_data,t_data)
plt.suptitle('Ohio')
plt.show()

# 加2：流感 ILI 数据 Seq2Seq训练和预测

In [69]:
#流感数据
#normalize data
normalized_seasons=(seasons-seasons.min())/(seasons.max()-seasons.min())

from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")


latent_dim = 50 # LSTM hidden units
dropout = .20 

encoder_inputs = Input(shape=(None, 1)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, 1)) 

decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,initial_state=encoder_states)

decoder_dense = Dense(1) 
decoder_outputs = decoder_dense(decoder_outputs)

model_flu = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [70]:
encoder_input_data = normalized_seasons[0:-2]
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

decoder_target_data = normalized_seasons[-2]
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

In [72]:
batch_size = 2**11
epochs = 100

encoder_input_data = normalized_seasons[0:-2]
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

decoder_target_data = normalized_seasons[-2] 
                                            
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[:,1:,0] = decoder_target_data[:,:-1,0]
decoder_input_data[:,0,0] = encoder_input_data[:,-1,0]

model_flu.compile(Adam(), loss='mean_absolute_error')
history = model_flu.fit([encoder_input_data, decoder_input_data], decoder_target_data,
                     batch_size=batch_size,
                     epochs=epochs,
                     validation_split=0.2,verbose=0); #使用模型训练流感数据

In [73]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error Loss')
plt.title('Loss Over Time')
plt.legend(['Train','Valid'])
plt.show()

In [74]:
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

def decode_sequence(input_seq):
    
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, 1))
    
    target_seq[0, 0, 0] = input_seq[0, -1, 0]

    decoded_seq = np.zeros((1,pred_steps,1))
    
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,0] = output[0,0,0]

        target_seq = np.zeros((1, 1, 1))
        target_seq[0, 0, 0] = output[0,0,0]

        states_value = [h, c]

    return decoded_seq

In [75]:
encoder_input_data = normalized_seasons[0:-1]
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

decoder_target_data = normalized_seasons[-1]
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

In [78]:
pred_steps=52
def predict_and_plot(encoder_input_data, decoder_target_data, sample_ind, enc_tail_len=51):
    encode_series = encoder_input_data
    pred_series = decode_sequence(encode_series)
    
    encode_series = encode_series.reshape(-1,1)
    pred_series = pred_series.reshape(-1,1)   
    target_series = decoder_target_data[sample_ind,:].reshape(-1,1) 
    target_series =np.concatenate([encode_series[-1:],target_series])
    encode_series_tail = encode_series[-enc_tail_len:]
    x_encode = encode_series_tail.shape[0]
    
    #to show continuity
    preds = np.zeros((pred_steps+1,1))
    preds[0]=encode_series_tail[-1]
    preds[1:]=pred_series
    
    plt.figure(figsize=(10,6))  
    plt.plot(range(1,x_encode+1),encode_series_tail, color='black')
    plt.plot(range(x_encode,x_encode+pred_steps+1),target_series,color='green')
    plt.plot(range(x_encode,x_encode+pred_steps+1),preds,color='red',linestyle='--')
    
    plt.title('Seq2Seq FLU')
    plt.xlabel('Time in Weeks')
    plt.ylabel('Normalized Weekly Case Counts')
    plt.legend(['Encoding Series','Target Series','Predictions'])
    return pred_series
preds= predict_and_plot(encoder_input_data, decoder_target_data, 23)

# 加3： ILI 训练， COVID预测

In [79]:
[e_data,t_data]=create_encoder_target_pairs(test_data_as_array) # using three states since we want few-shots预测

In [80]:
batch_size = 2**11
epochs = 100

encoder_input_data = e_data
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

decoder_target_data = t_data                   
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[:,1:,0] = decoder_target_data[:,:-1,0]
decoder_input_data[:,0,0] = encoder_input_data[:,-1,0]

In [81]:
history = model_flu.fit([encoder_input_data, decoder_input_data], decoder_target_data, epochs = 100,verbose=0);#模型已经训练了流感数据，现在训练新冠数据

In [82]:
plt.plot(history.history['loss'])

plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error Loss')
plt.title('Loss Over Time')
plt.legend(['Train'])
plt.show()

In [83]:
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

def decode_sequence(input_seq,pred_steps):
    
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, 1))
    target_seq[0, 0, 0] = input_seq[0, -1, 0]

    decoded_seq = np.zeros((1,pred_steps,1))
    
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,0] = output[0,0,0]

        target_seq = np.zeros((1, 1, 1))
        target_seq[0, 0, 0] = output[0,0,0]

        states_value = [h, c]

    return decoded_seq

In [85]:
[e_data,t_data]=create_encoder_target_pairs(train_data_as_array[31])
[preds_rolling, target, rolling_mean]=get_rolling_average(e_data, t_data, 7)
plt.plot(e_data[0], color='black')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(preds_rolling)),preds_rolling,color='red',linestyle='--', label='Predictions')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),target,color='green', label='Data')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),rolling_mean,color='m', label='7-Day Moving Average Data')

plt.title(states_train[31])
plt.legend()
plt.show()

In [86]:
[e_data,t_data]=create_encoder_target_pairs(train_data_as_array[25])
[preds, target, rolling_mean]=get_rolling_average(e_data, t_data, 7)
plt.plot(e_data[0], color='black')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(preds)),preds,color='red',linestyle='--', label='Predictions')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),target,color='green', label='Data')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),rolling_mean,color='m', label='7-Day Moving Average Data')

plt.title(states_train[25])
plt.legend()
plt.show()

In [87]:
[e_data,t_data]=create_encoder_target_pairs(train_data_as_array[30])
[preds, target, rolling_mean]=get_rolling_average(e_data, t_data, 7)
plt.plot(e_data[0], color='black')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(preds)),preds,color='red',linestyle='--', label='Predictions')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),target,color='green', label='Data')
plt.plot(range(len(e_data[0]),len(e_data[0])+len(target)),rolling_mean,color='m', label='7-Day Moving Average Data')
plt.title(states_train[30])
plt.legend()
plt.show()

# 参考资料：


1. Su, K., & Shlizerman, E. (2019). Dimension Reduction Approach for Interpretability of Sequence to Sequence Recurrent Neural Networks. arXiv preprint arXiv:1905.12176.
2. Su, K., Liu, X., & Shlizerman, E. (2020). Predict & cluster: Unsupervised skeleton based action recognition. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 9631-9640).
3. https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python/
4. https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
5. https://github.com/JEddy92/TimeSeries_Seq2Seq/blob/master/notebooks/TS_Seq2Seq_Intro.ipynb
6. https://www.kaggle.com/c/covid19-global-forecasting-week-5/data?select=test.csv
7. https://machinelearningmastery.com/moving-average-smoothing-for-time-series-forecasting-python/
8. https://wallethub.com/edu/states-with-the-fewest-coronavirus-restrictions/73818/
9. https://towardsdatascience.com/which-models-to-use-for-epidemic-prediction-25b22932c4ca
