# Imports & dependencies

In [1]:
# Note: I took the dataset taxi_data.h5 from Baidu

In [2]:
# Date: Feb-22th-2023
# Description: This source code creates hetergogenous graphs for taxi deamnd and supply prediciton 
# (with 3 edge types) and then uses them to train and test with GNN and LSTM models for predicting the demand 
# and suply values
# by: Mahmoud Nazzal

In [3]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch_geometric.transforms as T
from torch_geometric.nn import HeteroConv, GCNConv, SAGEConv, GATConv, Linear
import torch.optim as optim
import networkx as nx
from torch_geometric.data import HeteroData
# # resources:
# 1.https://pytorch-geometric.readthedocs.io/en/2.0.0/notes/heterogeneous.html?highlight=HeteroGNN#using-the-heterogenous-convolution-wrapper 
# 2.https://levelup.gitconnected.com/forecasting-walmart-quarterly-revenue-pytorch-lstm-example-b4e4b20862a7

In [4]:
# this routine converts adjacency matrices into adjacency lists
def A_to_edge_index(A):
    G=nx.from_numpy_matrix(A)
    edge_index=list(G.edges)
    z=torch.tensor(np.transpose(edge_index))
    return z

In [5]:
# if a GPU is aviaable, use it, o.w., use the CPU
cuda_device = 0
if torch.cuda.is_available():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(cuda_device)

# Reading the data

In [6]:
import pandas as pd
import numpy as np
# 1. load the data from the pytorch files:
# adj=torch.load('adj.pt')
# adj2=adj[2]
adj_0=torch.load('adj_0.pt')
adj_1=torch.load('adj_1.pt')
adj_2=torch.load('adj_2.pt')

# adj_0=adj_0[0:10, 0:10]
# adj_1=adj_1[0:10, 0:10]
# adj_2=adj_2[0:10, 0:10]

# 2. convert the adj  matrixes to adjacency lists
edge_list0=A_to_edge_index(adj_0)
edge_list1=A_to_edge_index(adj_1)
edge_list2=A_to_edge_index(adj_2)
# 3. load the training and testing node feature matrices
data0=torch.load('data0.pt')
data1=torch.load('data1.pt')
X_train_new=torch.load('X_train_new.pt')
X_test_new=torch.load('X_test_new.pt')
# print(X_train_new.shape)
# print(X_test_new.shape)

In [7]:
print(adj_0)

[[1.         0.16438753 0.         ... 0.         0.         0.        ]
 [0.16438753 1.         0.         ... 0.         0.         0.        ]
 [0.         0.         1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.92816226 0.89872131]
 [0.         0.         0.         ... 0.92816226 1.         0.98325835]
 [0.         0.         0.         ... 0.89872131 0.98325835 1.        ]]


In [8]:
# X_train_new=X_train_new[0:10,:,:]
# X_test_new=X_test_new[0:10,:,:]

print(X_train_new.shape)
print(X_test_new.shape)

torch.Size([266, 216, 180])
torch.Size([266, 216, 180])


In [9]:
import numpy.matlib


In [10]:
from scipy.linalg import block_diag

In [11]:
# obtain data for the cars in the first 10 regions
arrz=[]
for kk in range(100):
    arr = np.empty((0,216))
    arr_tru=np.empty((0,216))
    t_s=kk
    for i in range(40):
        n_nodes_in_reg=np.sum(data1[t_s, i])

        message_nodes_in_reg=X_train_new[i,:, t_s]
        X_temp=np.matlib.repmat(message_nodes_in_reg, n_nodes_in_reg, 1)
        arr = np.vstack((arr, X_temp))

        message_nodes_in_reg_tru=X_train_new[i,:,t_s+1]
        X_temp2=np.matlib.repmat(message_nodes_in_reg_tru, n_nodes_in_reg, 1)
        arr_tru = np.vstack((arr_tru, X_temp2))

    # X=np.asarray(X)   
#     print(arr.shape)
    arrz.append(arr.shape[0])
#     print(arr_tru.shape)   

In [12]:
# print(arrz)
# # hist()

In [13]:
# import matplotlib.pyplot as plt
# import numpy as np


# hist_array, bin_array = np.histogram(np.array(arrz))
# # Set some configurations for the chart
# # plt.figure(figsize=[10, 5])
# plt.xlim(min(bin_array), max(bin_array))
# # plt.grid(axis='y', alpha=0.75)
# plt.xlabel('Edge Values', fontsize=20)
# plt.ylabel('Histogram Values', fontsize=20)
# plt.title('Histogram Chart', fontsize=25)

# # Create the chart
# # plt.bar(bin_array[:-1], hist_array, width=0.5, color='blue')
# # Display the chart
# plt.show()

In [14]:
# obtain adj info for the cars in the first 10 regions
t_s=0
n_nodes_in_reg_list=[]
A = np.empty((0,0))
for i in range(40):
        n_nodes_in_reg=np.sum(data1[t_s, i])
        block=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
        A=block_diag(A, block)
A=A.astype(float)       
print(A.shape)

edge_list0=A_to_edge_index(A)
print(edge_list0.shape)

(1916, 1916)
torch.Size([2, 77583])


In [15]:
# print(A[0:10,0:500])

In [16]:
def get_graph_per_ts(t_s):
# obtain data for the cars in the first 10 regions
    arr = np.empty((0,216))
    arr_tru=np.empty((0,216))
#     t_s=0
    for i in range(40):
        n_nodes_in_reg=np.sum(data1[t_s, i])

        message_nodes_in_reg=X_train_new[i,:, t_s]
        X_temp=np.matlib.repmat(message_nodes_in_reg, n_nodes_in_reg, 1)
        arr = np.vstack((arr, X_temp))

        message_nodes_in_reg_tru=X_train_new[i,:,t_s+1]
        X_temp2=np.matlib.repmat(message_nodes_in_reg_tru, n_nodes_in_reg, 1)
        arr_tru = np.vstack((arr_tru, X_temp2))

    # X=np.asarray(X)   
#     print(arr.shape)
#     print(arr_tru.shape)  


    # obtain adj info for the cars in the first 10 regions
#     t_s=0
    n_nodes_in_reg_list=[]
    A0 = np.empty((0,0))
    A1 = np.empty((0,0))
    A2 = np.empty((0,0))
    for i in range(40):
            n_nodes_in_reg=np.sum(data1[t_s, i])
            block0=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A0=block_diag(A0, block0)
            
            block1=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A1=block_diag(A1, block1)
            
            block2=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A2=block_diag(A2, block2)
    
#     A0=A0.astype(float) 
#     print(A.shape)

    edge_list0=A_to_edge_index(A0)
    edge_list1=A_to_edge_index(A1)
    edge_list2=A_to_edge_index(A2)
#     print(edge_list0.shape)
#     print(edge_list1.shape)
#     print(edge_list2.shape)
    data = HeteroData()
    
    
    arr=torch.from_numpy(arr)
    mean1, std1 =arr.mean(axis=0), arr.std(axis=0)
    data['taxi'].x =  (arr - mean1) / std1
    
    
    arr_tru=torch.from_numpy(arr_tru)
    mean2, std2 =arr_tru.mean(axis=0), arr_tru.std(axis=0)
    data['taxi'].y =  (arr_tru - mean2) / std2
    
    
#     data['taxi'].x= arr
    data['taxi', 'near', 'taxi'].edge_index = edge_list0
    data['taxi', 'connected', 'taxi'].edge_index = edge_list1
    data['taxi', 'OD_similar', 'taxi'].edge_index = edge_list2
    temp=np.array([True])
    temp2=np.tile(temp, 266)
    data['taxi'].test_mask=torch.from_numpy(temp2)
#     data['taxi'].y=torch.from_numpy(arr_tru)
    return data

In [17]:
data=get_graph_per_ts(0)

In [18]:
print(data.edge_index_dict)

{('taxi', 'near', 'taxi'): tensor([[   0,    0,    0,  ..., 1914, 1914, 1915],
        [   0,    1,    2,  ..., 1914, 1915, 1915]]), ('taxi', 'connected', 'taxi'): tensor([[   0,    0,    0,  ..., 1914, 1914, 1915],
        [   0,    1,    2,  ..., 1914, 1915, 1915]]), ('taxi', 'OD_similar', 'taxi'): tensor([[   0,    0,    0,  ..., 1914, 1914, 1915],
        [   0,    1,    2,  ..., 1914, 1915, 1915]])}


In [19]:
# # Creating a Heterogeneous Graph (with three edge types), as a "data" object
# from torch_geometric.data import HeteroData
# data = HeteroData()
# data['taxi'].x= arr 
# data['taxi', 'near', 'taxi'].edge_index = edge_list0
# data['taxi', 'connected', 'taxi'].edge_index = edge_list0
# data['taxi', 'OD_similar', 'taxi'].edge_index = edge_list0
# temp=np.array([True])
# temp2=np.tile(temp, 266)
# data['taxi'].test_mask=torch.from_numpy(temp2)

In [20]:
# def bring_ts_data():
#     # obtain data for the cars in the first 10 regions
#     arr = np.empty((0,216))
#     arr_tru=np.empty((0,216))
#     t_s=6
#     for i in range(20):
#         n_nodes_in_reg=np.sum(data1[t_s, i])

#         message_nodes_in_reg=X_train_new[i,:, t_s]
#         X_temp=np.matlib.repmat(message_nodes_in_reg, n_nodes_in_reg, 1)
#         arr = np.vstack((arr, X_temp))

#         message_nodes_in_reg_tru=X_train_new[i,:,t_s+1]
#         X_temp2=np.matlib.repmat(message_nodes_in_reg_tru, n_nodes_in_reg, 1)
#         arr_tru = np.vstack((arr_tru, X_temp2))

#     # X=np.asarray(X)   
#     print(arr_tru.shape)
#     print(arr_tru.shape)   
    
#     return data

In [21]:
# # normalize the data
# mean0, std0 = X_train_new[:, :, 0].mean(axis=0), X_train_new[:, :, 0] .std(axis=0)
# X_train_new[:, :, 0]  = (X_train_new[:, :, 0]  - mean0) / std0
# X_train_new[:, :, 1] =  (X_train_new[:, :, 1]  - mean0) / std0

In [22]:
# verify the graph properties
print(data)

HeteroData(
  [1mtaxi[0m={
    x=[1916, 216],
    y=[1916, 216],
    test_mask=[266]
  },
  [1m(taxi, near, taxi)[0m={ edge_index=[2, 77583] },
  [1m(taxi, connected, taxi)[0m={ edge_index=[2, 77583] },
  [1m(taxi, OD_similar, taxi)[0m={ edge_index=[2, 77583] }
)


In [23]:
# extract the metadata from the "data" object
node_types, edge_types = data.metadata()
print("Node types:", node_types)
print("Edge types:", edge_types)

Node types: ['taxi']
Edge types: [('taxi', 'near', 'taxi'), ('taxi', 'connected', 'taxi'), ('taxi', 'OD_similar', 'taxi')]


## Initialize the GNN Model

In [24]:
class HeteroGNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_layers):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HeteroConv({
                ('taxi', 'near', 'taxi'): GCNConv(-1, hidden_channels),
                ('taxi', 'connected', 'taxi'): SAGEConv((-1, -1), hidden_channels),
                ('taxi', 'OD_similar', 'taxi'): GATConv((-1, -1), hidden_channels),
            }, aggr='sum')
            self.convs.append(conv)
        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: x.relu() for key, x in x_dict.items()}
        return self.lin(x_dict['taxi'])

###  Initialize a sample of the GNN model

In [25]:
print(type(data['taxi'].x))

<class 'torch.Tensor'>


In [26]:
Model_GNN = HeteroGNN(hidden_channels=300, out_channels=216,
                  num_layers=3)
Model_GNN = Model_GNN.double()

Model_GNN=Model_GNN.to(device)
data=data.to(device)

with torch.no_grad():  # Initialize lazy modules.
    Model_GNN.eval()
    out = Model_GNN(data.x_dict, data.edge_index_dict)    
    
print(Model_GNN)# print a model summary
# print(out.shape)# print the shape of outputs with this initial model 

HeteroGNN(
  (convs): ModuleList(
    (0): HeteroConv(num_relations=3)
    (1): HeteroConv(num_relations=3)
    (2): HeteroConv(num_relations=3)
  )
  (lin): Linear(300, 216, bias=True)
)


In [27]:
print(out.shape)

torch.Size([1916, 216])


# Train the GNN Model

In [28]:
print(data['taxi'].x.shape[0])

1916


In [29]:
# # Obtain training samples:
# from torch_geometric.data import HeteroData
# data = HeteroData()
# data['taxi'].x= X_train_new[:, :, 0] 
# data['taxi', 'near', 'taxi'].edge_index = edge_list0
# data['taxi', 'connected', 'taxi'].edge_index = edge_list2
# data['taxi', 'OD_similar', 'taxi'].edge_index = edge_list2
temp=np.array([True])
temp2=np.tile(temp, data['taxi'].x.shape[0])
data['taxi'].train_mask=torch.from_numpy(temp2)
# # for k in np.arange(180)
# data['taxi'].y=X_train_new[:, :, 1] 

In [35]:
torch.cuda.set_device(1)
import torch.nn as nn
mse_loss = nn.MSELoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data=data.to(device)
Model_GNN=Model_GNN.to(device)

with torch.no_grad():  # Initialize lazy modules.
    out = Model_GNN(data.x_dict, data.edge_index_dict)

optimizer = torch.optim.Adam(Model_GNN.parameters(), lr=0.005, weight_decay=0.001)
           
def train(Model_GNN,data):
    Model_GNN.train()
    optimizer.zero_grad()
    out = Model_GNN(data.x_dict, data.edge_index_dict)
    mask = data['taxi'].train_mask
#     loss = F.cross_entropy(out[mask], data['domain_node'].y[mask])
    loss = mse_loss(out[mask], data['taxi'].y)
    loss.backward()
    optimizer.step()
    return float(loss)

# @torch.no_grad()
# def test(model,data):
#     model.eval()
#     pred = model(data.x_dict, data.edge_index_dict).argmax(dim=-1)
#     accs = []
#     for split in ['train_mask', 'val_mask', 'test_mask']:
#         mask = data[split]
#         acc = (pred[mask] == data['domain_node'].y[mask]).sum() / mask.sum()
#         accs.append(float(acc))
#     return accs

In [45]:
for epoch in range(1, 180):
    data=get_graph_per_ts(0)
    temp=np.array([True])
    temp2=np.tile(temp, data['taxi'].x.shape[0])
    data['taxi'].train_mask=torch.from_numpy(temp2)
    data=data.to(device)
    loss = train(Model_GNN, data)
    if epoch % 20 == 0:
        print(loss)

0.9994780793319415
0.9994780793319415
0.9994780793319415
0.9994780793319415
0.9994780793319415
0.9994780793319415


KeyboardInterrupt: 

In [None]:
# for epoch in range(1, 179):
#     data=get_graph_per_ts(1)
#     temp=np.array([True])
#     temp2=np.tile(temp, data['taxi'].x.shape[0])
#     data['taxi'].train_mask=torch.from_numpy(temp2)
#     data=data.to(device)
#     loss = train(Model_GNN, data)
#     if epoch % 10 == 0:
#         print(loss)

In [None]:
mse = nn.MSELoss()
lossl1 = nn.L1Loss()
from sklearn.metrics import mean_absolute_percentage_error

from sklearn.metrics import mean_squared_error
import math
from sklearn.metrics import mean_absolute_percentage_error


In [None]:
def get_graph_per_ts(t_s):
# obtain data for the cars in the first 10 regions
    arr = np.empty((0,216))
    arr_tru=np.empty((0,216))
#     t_s=0
    for i in range(40):
        n_nodes_in_reg=np.sum(data1[t_s, i])

        message_nodes_in_reg=X_train_new[i,:, t_s]
        X_temp=np.matlib.repmat(message_nodes_in_reg, n_nodes_in_reg, 1)
        arr = np.vstack((arr, X_temp))

        message_nodes_in_reg_tru=X_train_new[i,:,t_s+1]
        X_temp2=np.matlib.repmat(message_nodes_in_reg_tru, n_nodes_in_reg, 1)
        arr_tru = np.vstack((arr_tru, X_temp2))

    # X=np.asarray(X)   
#     print(arr.shape)
#     print(arr_tru.shape)  


    # obtain adj info for the cars in the first 10 regions
#     t_s=0
    n_nodes_in_reg_list=[]
    A0 = np.empty((0,0))
    A1 = np.empty((0,0))
    A2 = np.empty((0,0))
    for i in range(40):
            n_nodes_in_reg=np.sum(data1[t_s, i])
            block0=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A0=block_diag(A0, block0)
            
            block1=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A1=block_diag(A1, block1)
            
            block2=np.matlib.repmat(adj_0[i,i], n_nodes_in_reg, n_nodes_in_reg);
            A2=block_diag(A2, block2)
    
#     A0=A0.astype(float) 
#     print(A.shape)

    edge_list0=A_to_edge_index(A0)
    edge_list1=A_to_edge_index(A1)
    edge_list2=A_to_edge_index(A2)
#     print(edge_list0.shape)
#     print(edge_list1.shape)
#     print(edge_list2.shape)
    data = HeteroData()
    
    
    arr=torch.from_numpy(arr)
#     mean1, std1 =arr.mean(axis=0), arr.std(axis=0)
    data['taxi'].x =  (arr - 0) / 1
    
    
    arr_tru=torch.from_numpy(arr_tru)
#     mean2, std2 =arr_tru.mean(axis=0), arr_tru.std(axis=0)
    data['taxi'].y =  (arr_tru - 0) / 1
    
    
#     data['taxi'].x= arr
    data['taxi', 'near', 'taxi'].edge_index = edge_list0
    data['taxi', 'connected', 'taxi'].edge_index = edge_list1
    data['taxi', 'OD_similar', 'taxi'].edge_index = edge_list2
    temp=np.array([True])
    temp2=np.tile(temp, 266)
    data['taxi'].test_mask=torch.from_numpy(temp2)
#     data['taxi'].y=torch.from_numpy(arr_tru)
    return data

In [None]:



data=get_graph_per_ts(144)
temp=np.array([True])
temp2=np.tile(temp, data['taxi'].x.shape[0])
data['taxi'].test_mask=torch.from_numpy(temp2)
data=data.to(device)

In [None]:
mean1, std1 =data['taxi'].x.mean(axis=0), data['taxi'].x.std(axis=0)
data['taxi'].x =  (data['taxi'].x - mean1) / std1
# X_test_new[:, :, 1] =  (X_test_new[:, :, 1]  - mean1) / std1
# # test_array = (data_array[(num_train + num_val) :] - mean) / std

# data['taxi'].x= X_test_new[:, :, 0] 
# # for k in np.arange(180)
# data['taxi'].y=X_test_new[:, :, 1] 

In [None]:
with torch.no_grad():  # Initialize lazy modules.
#     Model_GNN.eval()
    out = Model_GNN(data.x_dict, data.edge_index_dict)  
    
    
# out[out<0]=0 
estim=out.cpu()
estim=(estim*std1.cpu()+mean1.cpu())

org=data['taxi'].y.cpu()
# org=(org+mean1)

print(estim)
print(org)
rmse=math.sqrt(mse(estim, org))
print("rmse", rmse)
print("mae", lossl1(estim, org))
print("mape", mean_absolute_percentage_error(estim, org))

In [None]:
# mean1, std1 = X_test_new[:, :, 0].mean(axis=0), X_test_new[:, :, 0] .std(axis=0)
# X_test_new[:, :, 0] =  (X_test_new[:, :, 0]  - mean1) / std1
# X_test_new[:, :, 1] =  (X_test_new[:, :, 1]  - mean1) / std1
# # test_array = (data_array[(num_train + num_val) :] - mean) / std

# data['taxi'].x= X_test_new[:, :, 0] 
# # for k in np.arange(180)
# data['taxi'].y=X_test_new[:, :, 1] 
data=data.to(device)
Model_GNN=Model_GNN.to(device)

In [None]:
print(data.x_dict)

In [None]:
with torch.no_grad():  # Initialize lazy modules.
    Model_GNN.eval()
    out = Model_GNN(data.x_dict, data.edge_index_dict)  

# out[out<0]=0 
# estim=out.cpu()
# estim=(estim+mean1)

# org=data['taxi'].y.cpu()
# org=(org+mean1)

# print(estim)
# print(org)
# rmse=math.sqrt(mse(estim, org))
# print("rmse", rmse)
# print("mae", lossl1(estim, org))
# print("mape", mean_absolute_percentage_error(estim, org))

In [None]:
with torch.no_grad():  # Initialize lazy modules.
    Model_GNN.eval()
    out = Model_GNN(data.x_dict, data.edge_index_dict)  

out[out<0]=0 
estim=out.cpu()
estim=(estim*std1.cpu()+mean1.cpu())

org=data['taxi'].y.cpu()
org=(org*std1+mean1)

print(estim)
print(org)
rmse=math.sqrt(mse(estim, org))
print(rmse)
print(mean_absolute_percentage_error(estim, org))

### Initialize LSTM model

In [None]:
# X_arr = [] 
# Y_arr = []



# for i in range(0,10):
#     list1 = []
#     for j in range(i,i+10):
#         list1.append(X_test_new[j, :, 0])
#     X.append(list1)
#     Y.append(X_test_new[:, :, 1])

In [None]:
# print(X_test_new[:, :, 0].shape)

In [None]:
# #train test split
# # X = np.array(X)
# # Y = np.array(Y)
# x_train = X_test_new[0:10,:,0]
# y_train = X_test_new[10:11,:,1]
# x_test = X_test_new[20:21,:,0]
# y_test = X_test_new[20:21,:,1]
# # y_train = Y[:360]
# # y_test = Y[360:]

In [None]:
# x_train=np.array(np.transpose(x_train))
# y_train=np.array(np.transpose(y_train))

In [None]:
# print(x_train.shape)
# print(y_train.shape)

In [None]:
# #dataset
# from torch.utils.data import Dataset

# class timeseries(Dataset):
#     def __init__(self,x,y):
#         self.x = torch.tensor(x,dtype=torch.float32)
#         self.y = torch.tensor(y,dtype=torch.float32)
#         self.len = x.shape[0]

#     def __getitem__(self,idx):
#         return self.x[idx],self.y[idx]
  
#     def __len__(self):
#         return self.len

# dataset = timeseries(x_train,y_train)
# #dataloader
# from torch.utils.data import DataLoader 
# train_loader = DataLoader(dataset,shuffle=True,batch_size=1)

In [None]:
# #neural network
# from torch import nn

# class neural_network(nn.Module):
#     def __init__(self):
#         super(neural_network,self).__init__()
#         self.lstm = nn.LSTM(input_size=216,hidden_size=5)
#         self.fc1 = nn.Linear(in_features=5,out_features=216)

#     def forward(self,x):
#         output,_status = self.lstm(x)
#         output = output[:,-1,:]
#         output = self.fc1(torch.relu(output))
#         return output

# model = neural_network()

In [None]:
# # optimizer , loss
# criterion = torch.nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
# epochs = 100

In [None]:
# print(data[:][0].shape)

In [None]:
# #training loop
# for i in range(epochs):
#     for j,data in enumerate(train_loader):
#         y_pred = model(data[:][0].view(-1,1,1)).reshape(-1)
#         loss = criterion(y_pred,data[:][1])
#         loss.backward()
#         optimizer.step()
#     if i%50 == 0:
#         print(i,"th iteration : ",loss)

In [None]:
# class LSTM(nn.Module):
#     """
#     input_size - will be 1 in this example since we have only 1 predictor (a sequence of previous values)
#     hidden_size - Can be chosen to dictate how much hidden "long term memory" the network will have
#     output_size - This will be equal to the prediciton_periods input to get_x_y_pairs
#     """
#     def __init__(self, input_size, hidden_size, output_size):
#         super(LSTM, self).__init__()
#         self.hidden_size = hidden_size
        
#         self.lstm = nn.LSTM(input_size, hidden_size)
        
#         self.linear = nn.Linear(hidden_size, output_size)
        
#     def forward(self, x, hidden=None):
#         if hidden==None:
#             self.hidden = (torch.zeros(1,1,self.hidden_size),
#                            torch.zeros(1,1,self.hidden_size))
#         else:
#             self.hidden = hidden
            
#         """
#         inputs need to be in the right shape as defined in documentation
#         - https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        
#         lstm_out - will contain the hidden states from all times in the sequence
#         self.hidden - will contain the current hidden state and cell state
#         """
#         lstm_out, self.hidden = self.lstm(x.view(len(x),1,-1), 
#                                           self.hidden)
        
#         predictions = self.linear(lstm_out.view(len(x), -1))
        
#         return predictions[-1], self.hidden

In [None]:
# model = LSTM(input_size=1, hidden_size=50, output_size=1)
# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# print(x_train.shape)
# print(y_train.shape)

In [None]:
# print(y.shape)

In [None]:
# epochs = 5
# model.train()
# for epoch in range(epochs+1):
#     for x,y in zip(x_train, y_train):
#         y_hat, _ = model(x, None)
#         optimizer.zero_grad()
#         loss = criterion(y_hat, y)
#         loss.backward()
#         optimizer.step()

# Train the LSTM Model

In [None]:
# model_LSTM = LSTM(input_size=1, hidden_size=50, output_size=12*9)
# # criterion = nn.MSELoss()
# # optimizer = optim.Adam(model_LSTM.parameters(), lr=0.001)

In [None]:
# # take the message from the GMM model:
# with torch.no_grad():  # Initialize lazy modules.
#      out_GNN = Model_GNN(data.x_dict, data.edge_index_dict)
# print(out_GNN.shape)
# GT=data['taxi'].y
# print(GT.shape)

In [None]:
# device = torch.device('cpu')

In [None]:
# print(device)

# Performance evaluation

# Test with the GNN model

# Fully decentralized

In [None]:
import copy

In [None]:

# data.x_dict=data.x_dict+10*np.random.randn((266,216))    
aa=data.x_dict['taxi']
noise = 5+1*np.random.normal(0, 1, aa.shape)
bb=aa.cpu()
data.x_dict['taxi']=bb
print(data.x_dict['taxi']-aa)

with torch.no_grad():  # Initialize lazy modules.
#     Model_GNN.eval()
    out = Model_GNN(data.x_dict, data.edge_index_dict)  
    
    
out[out<0]=0 
estim=out.cpu()
estim=(estim+mean1)

org=data['taxi'].y.cpu()+0*noise
org=(org+mean1)

print(estim)
print(org)
rmse=math.sqrt(mse(estim, org))
print(rmse)
print(lossl1(estim, org))
print(mean_absolute_percentage_error(estim, org))

In [None]:
print(aa.shape)