<h1>
<center>Dynamic Graph Convolution Neural Networks </center>
</h1>

## Generals 

<font size="3"> 
Packages import and system configurations. 
</font>

In [None]:
#Data
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
import shutil
from numpy import *
#Graph Counstruction
import torch
import tensorflow as tf
from torch_geometric_temporal.signal import StaticGraphTemporalSignal,DynamicGraphTemporalSignal
import torch
import torchmetrics

import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN2,TGCN2,DCRNN,MPNNLSTM,A3TGCN,GCLSTM,TGCN
from torch.nn import Linear
from torch.nn import ReLU
import torch.nn as nn
from torch.nn.init import kaiming_uniform_
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

<font size="3"> 
Define necessary paths. 
</font>

In [None]:
data_intermediate_path ='io/input/data_intermediate/'

train_node_features_path = data_intermediate_path + 'node_features/train/'
train_node_labels_path = data_intermediate_path + 'node_labels/train/'
train_edges_path = data_intermediate_path + 'edges/train/'
train_edge_weights_path = data_intermediate_path + 'edge_weights/train/'

val_node_labels_path = data_intermediate_path + 'node_labels/val/'
val_node_features_path = data_intermediate_path + 'node_features/val/'
val_edges_path = data_intermediate_path + 'edges/val/'
val_edge_weights_path = data_intermediate_path + 'edge_weights/val/'

test_node_labels_path = data_intermediate_path + 'node_labels/test/'
test_node_features_path = data_intermediate_path + 'node_features/test/'
test_edges_path = data_intermediate_path + 'edges/test/'
test_edge_weights_path = data_intermediate_path + 'edge_weights/test/'

chunk_size = 100000

## Core Functionality

<font size="3"> 
Load numpy arrays on chunks. 
</font>

In [None]:
def load_object(path):
    num_chunks = len([f for f in os.listdir(path) if f.startswith('chunk_') and f.endswith('.npy')])
    # Load array from chunks
    chunks = []
    for i in range(num_chunks):
        filename = f"{path}chunk_{i}.npy"
        chunk = np.load(filename,allow_pickle=True)
        chunks.append(chunk)
    arr_reconstructed = np.concatenate(chunks, axis=0)
    return arr_reconstructed

<font size="3"> 
Load all graph information and print shapes
</font>

In [None]:
train_node_features = load_object(train_node_features_path)
train_node_labels = load_object(train_node_labels_path)
train_edges = load_object(train_edges_path)
train_edge_weights = load_object(train_edge_weights_path)

val_node_features = load_object(val_node_features_path)
val_node_labels = load_object(val_node_labels_path)
val_edges = load_object(val_edges_path) ##
val_edge_weights = load_object(val_edge_weights_path) ##

test_node_features = load_object(test_node_features_path)
test_node_labels = load_object(test_node_labels_path)
test_edges = load_object(test_edges_path)
test_edge_weights = load_object(test_edge_weights_path)

print('Train Node Features Train',train_node_features.shape)
print('Train Node Labels Train',train_node_labels.shape)
print('Train Edges Shape',train_edges.shape)
print('Train Edges Weights Shapes',train_edge_weights.shape)
print('\n')
print('Validation Node Features Train',val_node_features.shape)
print('Validation Node Labels Train',val_node_labels.shape)
print('Validation Edges Shape',val_edges.shape)
print('Validation Edges Weights Shapes',val_edge_weights.shape)
print('\n')
print('Test Node Features Train',test_node_features.shape)
print('Test Node Labels Train',test_node_labels.shape)
print('Test Edges Shape',test_edges.shape)
print('Test Edges Weights Shapes',test_edge_weights.shape)

## Convert Graph info to loaders

<font size="3"> 
Zero edge weights according thresholds and exclude self edges
<br>
<br>
This is used to optimize the edges according to our problem
</font>

In [None]:
edge_threshold = 0.3

train_edge_weights[train_edge_weights == 1] = 0 #We exclude self edges
train_edge_weights[train_edge_weights < edge_threshold] = 0
train_edge_weights = train_edge_weights * train_edge_weights

val_edge_weights[val_edge_weights == 1] = 0 #We exclude self edges
val_edge_weights[val_edge_weights < edge_threshold] = 0
val_edge_weights = val_edge_weights * val_edge_weights

test_edge_weights[test_edge_weights == 1] = 0 #We exclude self edges
test_edge_weights[test_edge_weights < edge_threshold] = 0
test_edge_weights = test_edge_weights * test_edge_weights

<font size="3"> 
This is a costum loader that we use to have at the same loader, features, targets edges and edge-weigths
</font>

In [None]:
def dynamic_data_loader(graph,batch_size):
    features = np.array(graph.features)
    targets = np.array(graph.targets)
    edges = np.array(graph.edge_indices)
    edge_attr = np.array(graph.edge_weights)
    features_tensor = torch.from_numpy(features).type(torch.FloatTensor)
    targets_tensor = torch.from_numpy(targets).type(torch.FloatTensor)
    edges_tensor = torch.from_numpy(edges).type(torch.LongTensor)
    edge_attr_tensor = torch.from_numpy(edge_attr).type(torch.FloatTensor)
    dataset_new = torch.utils.data.TensorDataset(features_tensor, edges_tensor, edge_attr_tensor, targets_tensor)
    
    class CustomDataLoader(torch.utils.data.DataLoader):
        def __init__(self, dataset, batch_size, drop_last):
            super().__init__(dataset, batch_size=batch_size, drop_last=drop_last)

        def collate_fn(self, data):
            features, edges, edge_attr, targets = zip(*data)
            batch_features = torch.stack(features)
            batch_edges = torch.stack(edges)
            batch_edge_attr = torch.stack(edge_attr)
            batch_targets = torch.stack(targets)
            return batch_features, batch_edges, batch_edge_attr, batch_targets
    graph_loader = CustomDataLoader(dataset_new, batch_size=batch_size, drop_last=False)
    return graph_loader

<font size="3"> 
We use this function to apply our loader
</font

In [None]:
def graph_data_loader(batch_size,train_edges,train_edge_weights,train_node_features,train_node_labels,
                      test_edges,test_edge_weights,test_node_features,test_node_labels):

    graph_train = DynamicGraphTemporalSignal(edge_indices=train_edges,edge_weights=train_edge_weights,
                                   features=train_node_features,targets=train_node_labels)



    graph_test = DynamicGraphTemporalSignal(edge_indices=test_edges,edge_weights=test_edge_weights,
                                   features=test_node_features,targets=test_node_labels)


    train_loader = dynamic_data_loader(graph_train,batch_size)
    test_loader = dynamic_data_loader(graph_test,batch_size)
    return train_loader,test_loader


## Graph Neural Networks Model

<font size="3"> 
Graph neural networks architecture 
</font

In [None]:
class TemporalGNN(torch.nn.Module):
    def __init__(self, node_features, periods, batch_size):
        super(TemporalGNN, self).__init__()
        
        self.tgnn = A3TGCN2(in_channels=node_features, periods=periods, out_channels=64, batch_size=batch_size)
        
        self.hidden2 = Linear(64, 32)        
#         kaiming_uniform_(self.hidden2.weight, nonlinearity='relu') bidirectional=True
        self.act2 = ReLU()
           
        self.hidden3 = Linear(32, 16)
        self.act3 = ReLU()        

        self.linear = torch.nn.Linear(16, periods)       

    def forward(self, x, edge_index, edge_weight):
        h = self.tgnn(x, edge_index, edge_weight)
        h = F.relu(h)

        h = self.hidden2(h)
        h = self.act2(h)
        
        h = self.hidden3(h)
        h = self.act3(h)
        
        out = self.linear(h) 
        return out

<font size="3"> 
Use the model to calculate metrics while predict using a test data loader
</font

In [None]:
def calculate_evalaution_metrics(model,loader,loss_fn):
    model.eval()
    predictions=[]
    ground_truth=[]

    loss_list, mae_list ,r2_list, rmse_list = [], [], [], []
    for snapsot in loader:
        y_hat = model(snapsot[0], snapsot[1][-1], snapsot[2][-1])
        y_true = y_hat
        y_pred = snapsot[3]

        loss = loss_fn(y_hat, snapsot[3])
        loss_list.append(loss.item())
        
        mae = torch.mean(torch.abs(y_pred - y_true))
        mae_list.append(mae.detach().numpy())
        
        r2 = torchmetrics.functional.r2_score(y_pred.view(-1), y_true.view(-1))
        r2_list.append(r2.detach().numpy())

        rmse = torch.sqrt(torch.mean(torch.pow(y_pred - y_true, 2)))
        rmse_list.append(rmse.detach().numpy())
        
    loss = sum(loss_list) / len(loss_list)
    mae = sum(mae_list) / len(mae_list)
    r2 = sum(r2_list) / len(r2_list)
    rmse = sum(rmse_list) / len(rmse_list)
    return loss,mae,r2,rmse

## Training

In [None]:
def model_training(node_features,periods,batch_size,lr,epochs,train_loader,val_loader,loss_fn):
    model = TemporalGNN(node_features=node_features, periods=periods, batch_size=batch_size)
    optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    model.train()
    train_loss_ls,train_mae_ls,train_r2_ls,train_rmse_ls = [],[],[],[]
    val_loss_ls,val_mae_ls,val_r2_ls,val_rmse_ls = [],[],[],[]
    for epoch in range(epochs):
        loss_list, mae_list ,r2_list, rmse_list = [], [], [], []
        step = 0
        for snapsot in tqdm(train_loader):
            y_hat = model(snapsot[0], snapsot[1][0], snapsot[2][0])
            loss = loss_fn(y_hat, snapsot[3])
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            step= step+ 1
            loss_list.append(loss.item())
            # (!) ---> All the above code is for metrics calculation
            y_true = y_hat
            y_pred = snapsot[3]
            mae = torch.mean(torch.abs(y_pred - y_true))
            mae_list.append(mae.detach().numpy())
            r2 = torchmetrics.functional.r2_score(y_pred.view(-1), y_true.view(-1))
            r2_list.append(r2.detach().numpy())
            rmse = torch.sqrt(torch.mean(torch.pow(y_pred - y_true, 2)))
            rmse_list.append(rmse.detach().numpy())

        train_loss = sum(loss_list) / len(loss_list)
        train_mae = sum(mae_list) / len(mae_list)
        train_r2 = sum(r2_list) / len(r2_list)
        train_rmse = sum(rmse_list) / len(rmse_list)
        val_loss,val_mae,val_r2,val_rmse = calculate_evalaution_metrics(model,val_loader,loss_fn)
        print("Epoch {}, Train || MSE: {:.7f}, MAE: {:.7f}, R2: {:.7f}, RMSE: {:.7f}".format(epoch+1,train_loss,train_mae,train_r2,train_rmse))
        print("Epoch {}, Evaluation || MSE: {:.7f}, MAE: {:.7f}, R2: {:.7f}, RMSE: {:.7f}".format(epoch+1,val_loss,val_mae,val_r2,val_rmse))
        train_loss_ls.append(train_loss)
        train_mae_ls.append(train_mae)
        train_r2_ls.append(train_r2)
        train_rmse_ls.append(train_rmse)
        val_loss_ls.append(val_loss)
        val_mae_ls.append(val_mae)
        val_r2_ls.append(val_r2)
        val_rmse_ls.append(val_rmse)
    metrics = {'train_loss_ls': train_loss_ls,'train_mae_ls': train_mae_ls,'train_r2_ls': train_r2_ls,
               'train_rmse_ls': train_rmse_ls,'eval_loss_ls': val_loss_ls,'eval_mae_ls': val_mae_ls,
               'eval_r2_ls': val_r2_ls,'eval_rmse_ls': val_rmse_ls}
    return model,metrics   

## Plot results

In [None]:
def smooth_curve(points, factor=0):
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points

In [None]:
def plot_single_metric(metric,metric_label,set_label):
    smooth_metric = smooth_curve(metric)
    plt.plot(range(1, len(smooth_metric) + 1), smooth_metric,label=set_label)
    plt.title('Loss during Epochs')
    plt.xlabel('Epochs')
    plt.ylabel(metric_label)
    plt.legend()
    plt.show()

In [None]:
def plot_all_metrics_train_val(metrics,evaluation_mode):
    smooth_mse_train = smooth_curve(metrics['train_loss_ls'])
    smooth_mse_val = smooth_curve(metrics['eval_loss_ls'])
    smooth_mae_train = smooth_curve(metrics['train_mae_ls'])
    smooth_mae_val = smooth_curve(metrics['eval_mae_ls'])
    smooth_r2_train = smooth_curve(metrics['train_r2_ls'])
    smooth_r2_val = smooth_curve(metrics['eval_r2_ls'])
    smooth_rmse_train = smooth_curve(metrics['train_rmse_ls'])
    smooth_rmse_val = smooth_curve(metrics['eval_rmse_ls'])

    fig, axs = plt.subplots(nrows=2, ncols=2,figsize=(12, 8))
    # Plot the first metric on the top-left subplot
    axs[0, 0].plot(range(1, len(smooth_mse_train) + 1), smooth_mse_train,label='Train')
    axs[0, 0].plot(range(1, len(smooth_mse_val) + 1), smooth_mse_val,label=evaluation_mode)
    axs[0, 0].set_xlabel('Epochs')
    axs[0, 0].set_ylabel('MSE')
    axs[0, 0].set_title('MSE')
    axs[0, 0].legend()
    # Plot the second metric on the top-right subplot
    axs[0, 1].plot(range(1, len(smooth_mae_train) + 1), smooth_mae_train,label='Train')
    axs[0, 1].plot(range(1, len(smooth_mae_val) + 1), smooth_mae_val,label=evaluation_mode)
    axs[0, 1].set_xlabel('Epochs')
    axs[0, 1].set_ylabel('MAE')
    axs[0, 1].set_title('MAE')
    axs[0, 1].legend()
    # Plot the third metric on the bottom-left subplot
    axs[1, 0].plot(range(1, len(smooth_r2_train) + 1), smooth_r2_train,label='Train')
    axs[1, 0].plot(range(1, len(smooth_r2_val) + 1), smooth_r2_val,label=evaluation_mode)
    axs[1, 0].set_xlabel('Epochs')
    axs[1, 0].set_ylabel('R2')
    axs[1, 0].set_title('R2')
    axs[1, 0].legend()
    # Plot the fourth metric on the bottom-right subplot
    axs[1, 1].plot(range(1, len(smooth_rmse_train) + 1), smooth_rmse_train,label='Train')
    axs[1, 1].plot(range(1, len(smooth_rmse_val) + 1), smooth_rmse_val,label=evaluation_mode)
    axs[1, 1].set_xlabel('Epochs')
    axs[1, 1].set_ylabel('RMSE')
    axs[1, 1].set_title('RMSE')
    axs[1, 1].legend()
    # Adjust the spacing between subplots
    plt.tight_layout()
    # Show the plot
    plt.show()

## Pipeline Execution

### Hyperparameter Optimization using Validation set

In [None]:
batch_size = 16
recurent_steps = batch_size
node_features = train_node_features.shape[2]
periods = 1
lr = 0.001
epochs = 2
loss_fn = torch.nn.MSELoss()


train_loader,val_loader = graph_data_loader(batch_size,train_edges,train_edge_weights,train_node_features,train_node_labels,
                                             val_edges,val_edge_weights,val_node_features,val_node_labels)

model,metrics_val = model_training(node_features,periods,recurent_steps,lr,epochs,train_loader,val_loader,loss_fn)


In [None]:
plot_all_metrics_train_val(metrics_val,'Validation')
plot_single_metric(metrics_val['eval_mae_ls'],'MAE','Validation')

### Model's Training

In [None]:
batch_size = 8
recurent_steps = batch_size/2
node_features = train_node_features.shape[2]
periods = 1
lr = 0.001
epochs = 2
loss_fn = torch.nn.MSELoss()

train_edges_f = np.concatenate((train_edges, val_edges), axis=0)
train_edge_weights_f = np.concatenate((train_edge_weights, val_edge_weights), axis=0)
train_node_features_f = np.concatenate((train_node_features, val_node_features), axis=0)
train_node_labels_f = np.concatenate((train_node_labels, val_node_labels), axis=0)

In [None]:
train_loader,test_loader = graph_data_loader(batch_size,train_edges_f,train_edge_weights_f,train_node_features_f,
                                train_node_labels_f,test_edges,test_edge_weights,test_node_features,test_node_labels)

model,metrics_test = model_training(node_features,periods,recurent_steps,lr,epochs,train_loader,test_loader,loss_fn)


### Evaluation on Test set

In [None]:
mse,mae,r2,rmse = calculate_evalaution_metrics(model,test_loader,loss_fn)
print("Evaluation on Test || MSE: {:.7f}, MAE: {:.7f}, R2: {:.7f}, RMSE: {:.7f}".format(mse,mae,r2,rmse))

In [None]:
plot_all_metrics_train_val(metrics_test,'Test')
plot_single_metric(metrics_test['val_mae_ls'],'MAE','Test')