In [18]:
import pandas as pd
import numpy as np
pd.options.plotting.backend = 'plotly'

my_state = 'Wisconsin'

In [19]:
train = pd.read_csv('train.csv')
states = train['Province_State'].unique()
state_dfs_raw = {state: train[train['Province_State'] == state] for state in states}
state_means = {}
state_stds = {}
state_dfs = {}
for s, state_df_raw in state_dfs_raw.items():
    state_df = state_df_raw.drop(columns=['Province_State', 'ID'])
    state_df['Date'] = pd.to_datetime(state_df['Date'], format='%m-%d-%Y')
    state_df = state_df.set_index('Date')

    mean, std = state_df.mean(), state_df.std()

    state_df = (state_df - mean) / std

    state_means[s] = mean
    state_stds[s] = std
    state_dfs[s] = state_df

In [20]:
state_dfs_raw[my_state].drop(columns=['Province_State', 'ID']).describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Confirmed,142.0,31293.323944,21962.523387,3341.0,12579.0,24943.5,49269.5,75603.0
Deaths,142.0,688.323944,275.18127,144.0,454.5,744.5,892.75,1122.0
Recovered,124.0,27633.58871,19163.485718,3352.0,11569.0,22838.0,42558.75,67234.0
Active,142.0,6474.260563,1934.629206,3186.0,4995.75,6008.0,7990.5,10012.0
Incident_Rate,142.0,540.668113,373.744125,64.565739,216.043668,428.40331,846.20109,1298.478
People_Tested,142.0,539974.521127,396252.468872,39257.0,153460.0,488427.5,875446.75,1253422.0
People_Hospitalized,138.0,3215.014493,1363.153386,974.0,2023.0,3190.0,4313.5,5684.0
Mortality_Rate,142.0,2.91941,1.106977,1.484068,1.812017,2.98481,3.622746,5.275676
Testing_Rate,142.0,9310.003589,6764.184474,758.652263,2635.667489,8388.716815,15035.75223,21527.46
Hospitalization_Rate,138.0,14.19364,5.946485,7.77161,9.244395,13.102896,16.512879,29.50774


In [21]:
column_names = state_dfs[my_state].columns
state_dfs[my_state]

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,Testing_Rate,Hospitalization_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-04-12,-1.272728,-1.978056,,-1.693999,-1.273873,-1.263633,-1.643993,1.256284,-1.264210,2.515655
2020-04-13,-1.268767,-1.941716,,-1.654198,-1.269374,-1.261260,-1.630055,1.420993,-1.261525,2.484440
2020-04-14,-1.262984,-1.883573,,-1.596823,-1.262807,-1.257841,-1.588973,1.682590,-1.257653,2.575319
2020-04-15,-1.255426,-1.836331,,-1.517738,-1.254224,-1.254068,-1.558163,1.805479,-1.253382,2.543761
2020-04-16,-1.248414,-1.785456,,-1.445373,-1.246261,-1.249520,-1.536155,1.955291,-1.248234,2.478000
...,...,...,...,...,...,...,...,...,...,...
2020-08-27,1.905276,1.535991,1.922741,0.554494,1.914340,1.732541,1.811231,-1.265035,1.737835,-1.079971
2020-08-28,1.943660,1.543259,1.963704,0.583440,1.953079,1.755700,,-1.278229,1.761136,
2020-08-29,1.980951,1.565063,2.005972,0.584990,1.990715,1.777895,,-1.285864,1.783468,
2020-08-30,2.005401,1.575965,2.038534,0.538470,2.015392,1.790847,,-1.291899,1.796498,


In [23]:
state_dfs[my_state].interpolate().plot()

In [24]:
state_dfs[my_state].plot()

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

# input is (n_samples, n_timesteps, n_features)
# output is (n_samples, n_features)

timesteps = 14
features = state_dfs[my_state].columns.size

model = Sequential()
model.add(LSTM(20, activation='relu', input_shape=(timesteps, features)))
model.add(Dense(features))
model.compile(optimizer='adam', loss='mse')

In [11]:
my_state_data = state_dfs[my_state].values

X_windows = (
    np.expand_dims(np.arange(timesteps), 0) +
    np.expand_dims(np.arange(my_state_data.shape[0] - timesteps), 0).T
)
y_indices = np.arange(timesteps, my_state_data.shape[0])

X = np.nan_to_num(my_state_data[X_windows])
y = np.nan_to_num(my_state_data[y_indices])

split = int(X.shape[0] * 0.8)

X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

In [12]:
model.fit(X_train, y_train, epochs=3000, verbose=1, validation_data=(X_test, y_test))

oss: 0.1500
Epoch 2811/3000
Epoch 2812/3000
Epoch 2813/3000
Epoch 2814/3000
Epoch 2815/3000
Epoch 2816/3000
Epoch 2817/3000
Epoch 2818/3000
Epoch 2819/3000
Epoch 2820/3000
Epoch 2821/3000
Epoch 2822/3000
Epoch 2823/3000
Epoch 2824/3000
Epoch 2825/3000
Epoch 2826/3000
Epoch 2827/3000
Epoch 2828/3000
Epoch 2829/3000
Epoch 2830/3000
Epoch 2831/3000
Epoch 2832/3000
Epoch 2833/3000
Epoch 2834/3000
Epoch 2835/3000
Epoch 2836/3000
Epoch 2837/3000
Epoch 2838/3000
Epoch 2839/3000
Epoch 2840/3000
Epoch 2841/3000
Epoch 2842/3000
Epoch 2843/3000
Epoch 2844/3000
Epoch 2845/3000
Epoch 2846/3000
Epoch 2847/3000
Epoch 2848/3000
Epoch 2849/3000
Epoch 2850/3000
Epoch 2851/3000
Epoch 2852/3000
Epoch 2853/3000
Epoch 2854/3000
Epoch 2855/3000
Epoch 2856/3000
Epoch 2857/3000
Epoch 2858/3000
Epoch 2859/3000
Epoch 2860/3000
Epoch 2861/3000
Epoch 2862/3000
Epoch 2863/3000
Epoch 2864/3000
Epoch 2865/3000
Epoch 2866/3000
Epoch 2867/3000
Epoch 2868/3000
Epoch 2869/3000
Epoch 2870/3000
Epoch 2871/3000
Epoch 2872/3

<tensorflow.python.keras.callbacks.History at 0x7fc8a41710a0>

In [13]:
df_pred = pd.DataFrame(model.predict(X_test), columns=column_names + '_hat')
df_true = pd.DataFrame(y_test, columns=column_names)

pd.concat([df_pred, df_true], axis=1).plot()

In [14]:
df = pd.DataFrame()
df['yhat'] = model.predict(X_test)[:, 0]
df['y'] = y_test[:, 0]

df.plot()

In [15]:
y_train

array([[-1.1556652 , -1.512908  ,  0.        , ...,  1.51891379,
        -1.19024538,  1.58686432],
       [-1.14797027, -1.48020228,  0.        , ...,  1.53710622,
        -1.18382856,  1.52620561],
       [-1.13849959, -1.41115689,  0.        , ...,  1.67196296,
        -1.17688033,  1.50641834],
       ...,
       [ 1.09434948,  0.94728852,  0.87987183, ..., -1.08781393,
         1.0886832 , -0.94862899],
       [ 1.12749686,  0.99089613,  0.92542722, ..., -1.08859849,
         1.13484406, -0.9520079 ],
       [ 1.16774724,  1.02360185,  0.97526158, ..., -1.09836336,
         1.17820683, -0.96158508]])