In [None]:
import covidcast
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

%cd ../code
from model import Model

%cd ../notebooks

In [None]:
#Load data
start_date = datetime(2020, 10, 15)
end_date = datetime(2020, 12, 15)
    
X_df = covidcast.signal(
        'fb-survey', 
        'smoothed_cli',
        start_date, end_date,
        geo_type='state')

Y_df = covidcast.signal(
        'indicator-combination',
        'confirmed_7dav_incidence_num',
        start_date, end_date,
        geo_type='state')

In [None]:
#Process data for use in Tensorflow
states = ['ak', 'al', 'ar', 'az', 'ca', 'co', 'ct', 'de', 'fl', 'ga',
       'hi', 'ia', 'id', 'il', 'in', 'ks', 'ky', 'la', 'ma', 'md', 'me',
       'mi', 'mn', 'mo', 'ms', 'mt', 'nc', 'nd', 'ne', 'nh', 'nj', 'nm',
       'nv', 'ny', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn',
       'tx', 'ut', 'va', 'vt', 'wa', 'wi', 'wv', 'wy']

df = X_df.sort_values(['geo_value', 'time_value'])
df = df[df['geo_value'].isin(states)]
X = pd.pivot_table(df,index='geo_value',columns=df.groupby(['geo_value']).cumcount().add(1), values='value').values
X = tf.transpose(X, perm=[1,0])
X = tf.expand_dims(X,axis=0) #shape=(1,number of geo_values, time series values)
X = tf.cast(X, dtype=tf.float32)

df = Y_df.sort_values(['geo_value', 'time_value'])
df = df[df['geo_value'].isin(states)]
Y = pd.pivot_table(df,index='geo_value',columns=df.groupby(['geo_value']).cumcount().add(1), values='value').values
Y = tf.transpose(Y, perm=[1,0])
Y = tf.expand_dims(Y,axis=0) #shape=(1,number of geo_values, time series values)
Y = tf.cast(Y, dtype=tf.float32)

In [None]:
#Initialize model and parameters

#You can apply constraints directly to layers by setting
#the kernel_constraints parameter for a specific layer
#
#https://www.tensorflow.org/api_docs/python/tf/keras/constraints
p = 30
m = X.shape[1] #number of geo_values
kernel_constraint = tf.keras.constraints.NonNeg()
lr = 1

model = Model(
    p = p, 
    m = m,
    kernel_constraint = kernel_constraint
)
optimizer = tf.keras.optimizers.Adam(lr = lr)

def MSE(y_true, y_pred):
    return tf.reduce_mean((y_true-y_pred)**2)

loss_object = MSE

In [None]:
#Training
@tf.function
def train_step(X,Y):
    X_padded = tf.pad(
        X,
        paddings =[[0, 0], [0, 0], [p-1, 0], [0, 0]],
    )

    with tf.GradientTape() as tape:
        Y_hat = model(X_padded, training=True)
        loss = loss_object(Y, Y_hat)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(
        zip(gradients, model.trainable_variables))


EPOCHS = 1000
MODEL_PATH = "model.ckpt"
best_loss = float('inf') 

for epoch in range(EPOCHS):
    train_step(X,Y)

    X_padded = tf.pad(
        X,
        paddings =[[0, 0], [0, 0], [p-1, 0], [0, 0]],
    )
    Y_hat = model(X_padded, training=True)
    loss = loss_object(Y, Y_hat)

    if loss < best_loss:
        best_loss = loss
        model.save_weights(MODEL_PATH)

    print("Epoch: {}, Current Loss: {}, Best Loss: {}".format(epoch, loss, best_loss))

In [None]:
#Evaluation
model.load_weights(MODEL_PATH).expect_partial() #load best weights from training

dates = X_df['time_value'].sort_values().unique()
X_padded = tf.pad(
    X[:,:,:],
    paddings=[[0, 0], [p-1, 0], [0, 0]],
)
Y_hat = model(X_padded)

for i in range(len(states)):
    y = Y_df[Y_df['geo_value']==states[i]]['value']
    x = X_df[X_df['geo_value']==states[i]]['value']
    y_hat = Y_hat[0,:,i]
    
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20,5))

    ax[0].plot(dates, x, label='smoothed_cli')
    ax[0].legend()
    ax[0].set_title('{}'.format(states[i]))
    
    ax[1].plot(dates, y, label='indicator-combination')
    ax[1].plot(dates, y_hat, label='fitted curve', linestyle='--')
    ax[1].legend()
    ax[1].set_title('{}'.format(states[i]))
    
    plt.show()