In [None]:
import os
import sys
import urllib.request

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

In [None]:
import stellargraph as sg

In [None]:
adj_path = r"dataset\adj\adj1.csv"
x = pd.read_csv("dataset/state_X_time_matrix.csv", index_col=0)
y = pd.read_csv(adj_path, index_col=0)
sensor_dist_adj = np.matrix(y.to_numpy())

In [None]:
x.drop(x.columns[list(range(70))], axis = 1, inplace = True)

In [None]:
state_data = x.sort_index()

In [None]:
state_data.head()
state_data = state_data.drop([72,73,74,75,76])

In [None]:
num_nodes, time_len = state_data.shape
print("No. of states:", num_nodes, "\nNo of timesteps:", time_len)

state_data.head()

In [None]:
def train_test_split(data, train_portion):
    time_len = data.shape[1]
    train_size = int(time_len * train_portion)
    train_data = np.array(data.iloc[:, :train_size])
    test_data = np.array(data.iloc[:, train_size:])
    return train_data, test_data

In [None]:
train_rate = 0.6

In [None]:
train_data, test_data = train_test_split(state_data, train_rate)
print("Train data: ", train_data.shape)
print("Test data: ", test_data.shape)
print("Adj data: ", sensor_dist_adj.shape)

In [None]:
def scale_data(train_data, test_data):
    max_deaths = train_data.max()
    min_deaths = train_data.min()
    train_scaled = (train_data - min_deaths) / (max_deaths - min_deaths)
    test_scaled = (test_data - min_deaths) / (max_deaths - min_deaths)
    return train_scaled, test_scaled

In [None]:
train_scaled, test_scaled = scale_data(train_data, test_data)

In [None]:
seq_len = 10
pre_len = 1

In [None]:
def sequence_data_preparation(seq_len, pre_len, train_data, test_data):
    trainX, trainY, testX, testY = [], [], [], []

    for i in range(train_data.shape[1] - int(seq_len + pre_len - 1)):
        a = train_data[:, i : i + seq_len + pre_len]
        trainX.append(a[:, :seq_len])
        trainY.append(a[:, -1])

    for i in range(test_data.shape[1] - int(seq_len + pre_len - 1)):
        b = test_data[:, i : i + seq_len + pre_len]
        testX.append(b[:, :seq_len])
        testY.append(b[:, -1])

    trainX = np.array(trainX)
    trainY = np.array(trainY)
    testX = np.array(testX)
    testY = np.array(testY)

    return trainX, trainY, testX, testY

In [None]:
train_data

In [None]:
trainX, trainY, testX, testY = sequence_data_preparation(
    seq_len, pre_len, train_scaled, test_scaled
)
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

## StellarGraph Graph Convolution and LSTM model

In [None]:
from stellargraph.layer import GCN_LSTM

In [None]:
gcn_lstm = GCN_LSTM(
    seq_len=seq_len,
    adj=sensor_dist_adj,
    gc_layer_sizes=[32, 8],
    gc_activations=["relu", "relu"],
    lstm_layer_sizes=[150],
    lstm_activations=["tanh"],
)

In [None]:
x_input, x_output = gcn_lstm.in_out_tensors()

In [None]:
model = Model(inputs=x_input, outputs=x_output)

In [None]:
model.compile(optimizer="adam", loss="mae", metrics=['mse'])

In [None]:
model.summary()

In [None]:
history = model.fit(
    trainX,
    trainY,
    epochs=250,
    batch_size=10,
    verbose=0,
)

In [None]:
print(
    "Train loss: ",
    history.history["loss"][-1],
    "\nTest loss:",
#     history.history["val_loss"][-1],
)

In [None]:
sg.utils.plot_history(history)

In [None]:
ythat = model.predict(trainX)
yhat = model.predict(testX)

## Rescale values

Rescale the predicted values to the original value range of the timeseries.

In [None]:
max_deaths = train_data.max()
min_deaths = train_data.min()

train_rescref = np.array((trainY * (max_deaths - min_deaths)) + min_deaths)
test_rescref = np.array((testY * (max_deaths - min_deaths)) + min_deaths)

train_rescpred = np.array((ythat * (max_deaths - min_deaths)) + min_deaths)
test_rescpred  = np.array((yhat * (max_deaths - min_deaths)) + min_deaths)

In [None]:
testX.shape
test_rescpred.shape
test_rescref[0]
test_rescpred[0]
# yhat.shape

### Naive prediction benchmark (using latest observed value)

In [None]:
## Naive prediction benchmark (using previous observed value)

testnpred = np.array(testX)[
    :, :, -1
]
testnpredc = (testnpred) * max_deaths

In [None]:
## Performance measures

seg_mael = []
seg_masel = []
seg_nmael = []

for j in range(testX.shape[-1]):

    seg_mael.append(
        np.mean(np.abs(test_rescref.T[j] - test_rescpred.T[j]))
    )  # Mean Absolute Error for NN
    seg_nmael.append(
        np.mean(np.abs(test_rescref.T[j] - testnpredc.T[j]))
    )  # Mean Absolute Error for naive prediction
    if seg_nmael[-1] != 0:
        seg_masel.append(
            seg_mael[-1] / seg_nmael[-1]
        )  # Ratio of the two: Mean Absolute Scaled Error
    else:
        seg_masel.append(np.NaN)

print("Total (ave) MAE for NN: " + str(np.mean(np.array(seg_mael))))
print("Total (ave) MAE for naive prediction: " + str(np.mean(np.array(seg_nmael))))

#### Plot of actual and predicted deaths on a sample sensor

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
true_sum = []
pred_sum = []
for i in range(len(test_rescpred)):
    pred_sum.append(test_rescpred[i].sum())
    true_sum.append(test_rescref[i].sum())

x = mean_absolute_percentage_error(true_sum,pred_sum)

In [None]:
##all test result visualization
fig1 = plt.figure(figsize=(15, 8))
#    ax1 = fig1.add_subplot(1,1,1)
a_pred = pred_sum
a_true = true_sum
plt.plot(a_pred, "r-", label="prediction")
plt.plot(a_true, "b-", label="true")
plt.xlabel("time")
plt.ylabel("Deaths")
plt.legend(loc="best", fontsize=10)
plt.show()