In [None]:
import os
import sys
import urllib.request

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.stattools import grangercausalitytests

import stellargraph as sg

In [None]:
csv_data = pd.read_csv("https://raw.githubusercontent.com/scalation/data/master/COVID/CLEANED_35_Updated.csv", index_col=1)

In [None]:
params = ["deathIncrease", "hospitalizedIncrease"]
x = csv_data[params].iloc[44:]

In [None]:

adj = None
for i in params:
    vals = []
    for j in params:
        if i==j:
            vals.append(1)
        else:
#             data = x[[i, j]]
#             gc_res = grangercausalitytests(data, [10])
#             vals.append(gc_res[10][0]["ssr_ftest"][0])
            vals.append(stats.pearsonr(x[i], x[j])[0])

    df = pd.DataFrame([vals], index=[i], columns = params)
    if isinstance(adj, pd.DataFrame):
        adj = adj.append(df)
    else:
        adj = df
adj

In [None]:
covid_data = x.transpose()

In [None]:
num_nodes, time_len = covid_data.shape
print("No. of states:", num_nodes, "\nNo of timesteps:", time_len)

covid_data.head()

In [None]:
def train_test_split(data, train_portion):
    time_len = data.shape[1]
    train_size = int(time_len * (train_portion - 0.1))
    val_size = train_size + int(time_len * 0.1) + 1
    train_data = np.array(data.iloc[:, :train_size])
    val_data = np.array(data.iloc[:, train_size:val_size])
    test_data = np.array(data.iloc[:, val_size:])
    return train_data, val_data, test_data

In [None]:
train_rate = 0.6277

In [None]:
train_data, val_data, test_data = train_test_split(covid_data, train_rate)
print("Train data: ", train_data.shape)
print("Val data: ", val_data.shape)
print("Test data: ", test_data.shape)


In [None]:
def scale_data(train_data, val_data, test_data):
    max_deaths = train_data.max()
    min_deaths = train_data.min()
    train_scaled = (train_data - min_deaths) / (max_deaths - min_deaths)
    val_scaled = (val_data - min_deaths) / (max_deaths - min_deaths)
    test_scaled = (test_data - min_deaths) / (max_deaths - min_deaths)
    return train_scaled, val_scaled, test_scaled

In [None]:
train_scaled, val_scaled, test_scaled = scale_data(train_data, val_data, test_data)

In [None]:
seq_len = 10
pre_len = 1

In [None]:
   
def sequence_data_preparation(seq_len, pre_len, train_data, val_data, test_data):
    trainX, trainY, valX, valY, testX, testY = [], [], [], [], [], []

    for i in range(train_data.shape[1] - int(seq_len + pre_len - 1)):
        a = train_data[:, i : i + seq_len + pre_len]
        trainX.append(a[:, :seq_len])
        trainY.append(a[:, -1])

    for i in range(val_data.shape[1] - int(seq_len + pre_len - 1)):
        b = val_data[:, i : i + seq_len + pre_len]
        valX.append(b[:, :seq_len])
        valY.append(b[:, -1])

    for i in range(test_data.shape[1] - int(seq_len + pre_len - 1)):
        b = test_data[:, i : i + seq_len + pre_len]
        testX.append(b[:, :seq_len])
        testY.append(b[:, -1])

    trainX = np.array(trainX)
    trainY = np.array(trainY)
    valX = np.array(valX)
    valY = np.array(valY)
    testX = np.array(testX)
    testY = np.array(testY)

    return trainX, trainY, valX, valY, testX, testY

In [None]:
from stellargraph.layer import GCN_LSTM

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
def s_mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 200

In [None]:
gcn_lstm = GCN_LSTM(
        seq_len=seq_len,
        adj=adj,
        gc_layer_sizes=[32, 16],
        gc_activations=["relu", "relu"],
        lstm_layer_sizes=[150],
        lstm_activations=["tanh"],
    )
x_input, x_output = gcn_lstm.in_out_tensors()
model = Model(inputs=x_input, outputs=x_output)
model.summary()

In [None]:
for i in range(1, 15):
    seq_len = 10
    pre_len = i
    trainX, trainY, valX, valY, testX, testY = sequence_data_preparation(seq_len, pre_len, train_scaled, val_scaled, test_scaled)
    gcn_lstm = GCN_LSTM(
        seq_len=seq_len,
        adj=adj,
        gc_layer_sizes=[32, 16],
        gc_activations=["relu", "relu"],
        lstm_layer_sizes=[50],
        lstm_activations=["tanh"],
    )
    x_input, x_output = gcn_lstm.in_out_tensors()
    model = Model(inputs=x_input, outputs=x_output)
    model.compile(optimizer="adam", loss="mae", metrics=['mse'])
    history = model.fit(
        trainX,
        trainY,
        epochs=150,
        batch_size=10,
        verbose=0,
        validation_data=(valX, valY),
    )
    ythat = model.predict(trainX)
    yhat = model.predict(testX)
    max_deaths = train_data.max()
    min_deaths = train_data.min()

    train_rescref = np.array((trainY * (max_deaths - min_deaths)) + min_deaths)
    test_rescref = np.array((testY * (max_deaths - min_deaths)) + min_deaths)

    train_rescpred = np.array((ythat * (max_deaths - min_deaths)) + min_deaths)
    test_rescpred  = np.array((yhat * (max_deaths - min_deaths)) + min_deaths)
    pred = test_rescpred[:,0]
    true = test_rescref[:,0]
    print(s_mean_absolute_percentage_error(true,pred))