# Temporal GNN Model: Scratch
Overall purpose here...

In [1]:
import torch
from torch_geometric_temporal.signal import temporal_signal_split
from torch_geometric.utils import is_undirected 

  from .autonotebook import tqdm as notebook_tqdm


### 1. Load the Temporal Graph Dataset
Before defining the GNN model we load the dataset that we created in a previous tutorial and split it according to a predefined ratio.

In [None]:
tg_dataset = torch.load('../tg_dataset.pt')
for i in tg_dataset:
    print(i)

In [None]:
train_dataset, test_dataset = temporal_signal_split(tg_dataset, train_ratio=0.8)

### 2. Define the GNN Model

In [2]:
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN
from torch_geometric.nn.conv import GATv2Conv
from tqdm import tqdm

In [None]:
epochs = 1
lr = 0.01
optimizer = 'adam'
n_heads_sal = 4
n_heads_tal = 16
mini_batch = 3

In [None]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self,node_features):
        super(RecurrentGCN,self).__init__()
        self.recurrent = DCRNN(node_features,992,1)
        self.linear = torch.nn.Linear(992,1)
    
    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

### 2. Define Custom Temporal Attention Layer

In [3]:
from torch_geometric.nn.conv import MessagePassing
from typing import Optional, Tuple, Union

import torch
from torch import Tensor
from torch.nn import Parameter, Sigmoid
from torch_sparse import SparseTensor, set_diag

import math

In [None]:
def scaled_dot_product(q, k, v, mask=None):
    '''
    copied from: https://uvadlc-notebooks.readthedocs.io/en/latest/tutorial_notebooks/tutorial6/Transformers_and_MHAttention.html
    '''
    d_k = q.size()[-1]
    attn_logits = torch.matmul(q,k.transpose(-2,-1))
    attn_logits = attn_logits / math.sqrt(d_k)
    if mask is not None:
        attn_logits = attn_logits.masked_fill(mask == 0, -9e15)
    attention = F.softmax(attn_logits, dim=-1)
    values = torch.matmul(attention, v)
    return values, attention

In [None]:
def apply_window():
    pass

In [None]:
def positional_encoding():
    pass

In [None]:
class MultiheadAttentionLayer(torch.nn.Module):
    '''
    Needs: window_size
    How to handle batches of data?
    Implementation inspired by DySAT's Temporal Attention Layer
    https://github.com/aravindsankar28/DySAT/blob/master/models/DySAT/layers.py
    '''
    
    def __init__(
        self,
        in_channels: int, # num features i.e. 2048 without SAL
        out_channels: int,
        n_heads: int = 1,
        window_size = 3,
        positional_enc = True,
        **kwargs
    ):
        super(TemporalAttnLayer,self).__init__(**kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.n_heads = n_heads
        self.window_size = window_size
        
        # projection matrix init xavier
        # define the matrices as Parameters Parameter(torch.Tensor(....))
        # matmuls + multi-head (concat?)
        
        # mask M?
        
        # softmax
        # final multiplication
        
        if positional_enc:
            self.pos_enc = torch.nn.Parameter(torch.randn(n_heads,in_channels,window_size))
        
        
        self.W_Q = torch.nn.Parameter()
        
        def forward(self, h, edge_index, edge_attr):
            #self.att = Parameter(torch.Tensor(1, heads, out_channels))
            for i in range(n_heads):
                PE_head_i = self.pos_enc[i]
                x_i = h + PE_head_i # 500x3
                X = x_i.T # transpose to get shape 3x500
                

                X = ...



        #self.reset_parameters()
        
    

In [None]:
W_Q = torch.nn.Parameter(torch.randn(16,500,3)) # n_heads,in_channels,window_size

In [None]:
W_Q.size()

In [None]:
W_Q[2].T

In [4]:
import numpy as np
np.array([[1,2],[0,0]]) + np.array([[1,2],[9,9]])

array([[2, 4],
       [9, 9]])

In [None]:
#from torch.nn import MultiheadAttention

In [None]:
class DyGLIP(torch.nn.Module):
    def __init__(self,in_feat_size):
        super(DyGLIP,self).__init__()
        # create full edge list for bi-directional message passing. so far only unidirectional
        # Spatial Attention Layers
        self.sal1 = GATv2Conv(in_feat_size,out_channels=128,heads=4,edge_dim=1) # wegen multi head: *4
        self.sal2 = GATv2Conv(in_channels=128*4,out_channels=128,heads=4,edge_dim=1)
        
        # Temporal Attention Layers TBD
        #self.attention_tal = 1  # temporal attention
    
    def forward(self, x, edge_index, edge_attr):
        # 1. Add positional encoding to x: row-wise one-hot vector
        # x = x & one-hot
        h1 = self.sal1(x,edge_index,edge_attr)
        h2 = self.sal2(h1,edge_index,edge_attr)
        # Use list to keep track of the historic structural GAT features to use that as 
        # input for the temporal GAT
        
        return h2

In [None]:
model = DyGLIP(in_feat_size=2048)  # should be set to 2048
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [5]:
import numpy as np
from numpy.linalg import norm
f1 = range(1,9)
f2 = range(9,17)
f3 = range(17,25)
f4 = range(25,33)
A = np.array([f1,f2,f3,f4]) / 1# 4 nodes with 8 features
A

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
       [ 9., 10., 11., 12., 13., 14., 15., 16.],
       [17., 18., 19., 20., 21., 22., 23., 24.],
       [25., 26., 27., 28., 29., 30., 31., 32.]])

In [None]:
norm(A,axis=1)

In [6]:
# outer product
scaling = np.outer(norm(A,axis=1), norm(A,axis=1))
scaling

array([[ 204.        ,  513.3887416 ,  833.31626649, 1155.05844008],
       [ 513.3887416 , 1292.        , 2097.13328141, 2906.83332856],
       [ 833.31626649, 2097.13328141, 3404.        , 4718.27934739],
       [1155.05844008, 2906.83332856, 4718.27934739, 6540.        ]])

In [7]:
E = np.array([[0,1,2,3],[1,3,3,0]])
E

array([[0, 1, 2, 3],
       [1, 3, 3, 0]])

In [8]:
from torch_geometric.utils import to_dense_adj,to_undirected
E = torch.tensor(E)
#K = to_dense_adj(E).clone().detach().numpy()
#K = np.reshape(K, (4,4))
#K
print(to_undirected(E,edge_attr=torch.Tensor([5000,6000,7000,8000]))) # label list
E,L = to_undirected(E,edge_attr=torch.Tensor([5000,6000,7000,8000]))

(tensor([[0, 0, 1, 1, 2, 3, 3, 3],
        [1, 3, 0, 3, 3, 0, 1, 2]]), tensor([5000., 8000., 5000., 6000., 7000., 8000., 6000., 7000.]))


In [None]:
L

In [None]:
COS = A@A.T
COS

In [None]:
klklk = COS / scaling
klklk

In [None]:
E = E.T
E

In [9]:
for i in range(len(E)): 
    print(f'{i}. Edge between node {E[i][0]} and node {E[i][1]} has similarity value:')
    print(COS[E[i][0],E[i][1]])

0. Edge between node 0 and node 0 has similarity value:


NameError: name 'COS' is not defined

Tensor

In [10]:
A

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
       [ 9., 10., 11., 12., 13., 14., 15., 16.],
       [17., 18., 19., 20., 21., 22., 23., 24.],
       [25., 26., 27., 28., 29., 30., 31., 32.]])

In [11]:
tmp_tensor = torch.tensor(A)
tmp_tensor

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12., 13., 14., 15., 16.],
        [17., 18., 19., 20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29., 30., 31., 32.]], dtype=torch.float64)

In [12]:
tmp_tensor.requires_grad = True
tmp_tensor

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12., 13., 14., 15., 16.],
        [17., 18., 19., 20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29., 30., 31., 32.]], dtype=torch.float64,
       requires_grad=True)

In [13]:
tensor = torch.tensor(A, requires_grad=True)
#tensor = torch.tensor(A)
tensor

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12., 13., 14., 15., 16.],
        [17., 18., 19., 20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29., 30., 31., 32.]], dtype=torch.float64,
       requires_grad=True)

In [14]:
norm_row = torch.linalg.norm(tensor,dim=1)
norm_row

tensor([14.2829, 35.9444, 58.3438, 80.8703], dtype=torch.float64,
       grad_fn=<CopyBackwards>)

In [15]:
alpha = torch.outer(norm_row,norm_row)
alpha

tensor([[ 204.0000,  513.3887,  833.3163, 1155.0584],
        [ 513.3887, 1292.0000, 2097.1333, 2906.8333],
        [ 833.3163, 2097.1333, 3404.0000, 4718.2793],
        [1155.0584, 2906.8333, 4718.2793, 6540.0000]], dtype=torch.float64,
       grad_fn=<MulBackward0>)

In [None]:
nominator = tensor @ tensor.T
nominator

In [None]:
torch.transpose(tensor,0,1)

In [16]:
# Take this.
nom = torch.matmul(tensor,torch.transpose(tensor,0,1))
nom

tensor([[ 204.,  492.,  780., 1068.],
        [ 492., 1292., 2092., 2892.],
        [ 780., 2092., 3404., 4716.],
        [1068., 2892., 4716., 6540.]], dtype=torch.float64,
       grad_fn=<MmBackward0>)

In [None]:
cos_distance = nominator / alpha
cos_distance

In [17]:
cosine_distance = nom / alpha
cosine_distance

tensor([[1.0000, 0.9583, 0.9360, 0.9246],
        [0.9583, 1.0000, 0.9976, 0.9949],
        [0.9360, 0.9976, 1.0000, 0.9995],
        [0.9246, 0.9949, 0.9995, 1.0000]], dtype=torch.float64,
       grad_fn=<DivBackward0>)

In [18]:
sig = Sigmoid()
scores = sig(cosine_distance)
scores

tensor([[0.7311, 0.7228, 0.7183, 0.7160],
        [0.7228, 0.7311, 0.7306, 0.7301],
        [0.7183, 0.7306, 0.7311, 0.7310],
        [0.7160, 0.7301, 0.7310, 0.7311]], dtype=torch.float64,
       grad_fn=<SigmoidBackward0>)

In [None]:
ones = torch.ones(scores.size())
zeros = torch.zeros(scores.size())
ones

In [None]:
torch.where(scores >0.5,scores,scores)

In [23]:
scores[0:2][1] = 0.1
scores[0][2] = 0.500100000000000001
scores

tensor([[0.7311, 0.7228, 0.5001, 0.7160],
        [0.1000, 0.1000, 0.1000, 0.1000],
        [0.7183, 0.7306, 0.7311, 0.7310],
        [0.7160, 0.7301, 0.7310, 0.7311]], dtype=torch.float64,
       grad_fn=<CopySlices>)

In [None]:
scores  = torch.round(scores.clone())
scores

In [19]:
E

tensor([[0, 0, 1, 1, 2, 3, 3, 3],
        [1, 3, 0, 3, 3, 0, 1, 2]])

In [21]:
E = E.T

In [30]:
lisst = []
for i in range(len(E)): 
    print(f'{i}. Edge between node {E[i][0]} and node {E[i][1]} has similarity value:')
    #print(scores[E[i][0],E[i][1]])
    lisst.append(scores[E[i][0],E[i][1]])
    #print(lisst)
labells = torch.stack(lisst, dim=0)
labells

0. Edge between node 0 and node 1 has similarity value:
1. Edge between node 0 and node 3 has similarity value:
2. Edge between node 1 and node 0 has similarity value:
3. Edge between node 1 and node 3 has similarity value:
4. Edge between node 2 and node 3 has similarity value:
5. Edge between node 3 and node 0 has similarity value:
6. Edge between node 3 and node 1 has similarity value:
7. Edge between node 3 and node 2 has similarity value:


tensor([0.7228, 0.7160, 0.1000, 0.1000, 0.7310, 0.7160, 0.7301, 0.7310],
       dtype=torch.float64, grad_fn=<StackBackward0>)

In [None]:
len(E)

In [None]:
x = torch.tensor([torch.tensor(1.0),torch.tensor(2.0),torch.tensor(3.0)],requires_grad=True)
x

In [None]:
torch.cat([x, x, x])

In [None]:
lisst

In [None]:
neki = []
for i in range(4):
    neki.append(torch.tensor(float(i),requires_grad=True))
torch.stack(neki, dim=0)

In [None]:
neki[0] 


In [None]:
mask = 

In [None]:
mask = torch.where(scores >0.5,scores,scores)

>>> torch.masked_select(x, mask)

In [28]:
scores

tensor([[0.7311, 0.7228, 0.5001, 0.7160],
        [0.1000, 0.1000, 0.1000, 0.1000],
        [0.7183, 0.7306, 0.7311, 0.7310],
        [0.7160, 0.7301, 0.7310, 0.7311]], dtype=torch.float64,
       grad_fn=<CopySlices>)

In [29]:
E

tensor([[0, 1],
        [0, 3],
        [1, 0],
        [1, 3],
        [2, 3],
        [3, 0],
        [3, 1],
        [3, 2]])

In [41]:
mask = to_dense_adj(E.T).type(torch.bool)
mask 

tensor([[[False,  True, False,  True],
         [ True, False, False,  True],
         [False, False, False,  True],
         [ True,  True,  True, False]]])

In [37]:
torch.masked_select(scores, mask)

tensor([0.7228, 0.7160, 0.1000, 0.1000, 0.7310, 0.7160, 0.7301, 0.7310],
       dtype=torch.float64, grad_fn=<MaskedSelectBackward0>)

In [None]:
def cos_dist_edge_embeddings(features,edge_list):
    norm_row = norm(features,axis=1)
    scaling = np.outer(norm_row,norm_row)
    cos_dist = (features@features.T) / scaling
    # Sigmoid
    sig = Sigmoid()
    scores = sig(torch.Tensor(cos_dist))
    E = edge_list.T
    
    print(scores)
    scores[3][2] = 0.4444
    print(scores)
    scores[scores>0.5] = 1
    print(scores)
    scores[scores<=0.5] = 0
    print(scores)

    labels = []
    for i in range(len(E)): 
        print(f'{i}. Edge between node {E[i][0]} and node {E[i][1]} has score value:')
        print(scores[E[i][0],E[i][1]])
        #labels.append(int(scores[E[i][0],E[i][1]].numpy()))
        labels.append(scores[E[i][0],E[i][1]])
    labels = torch.Tensor(labels)
    print(f'Score matrix: \n{scores}\n')
    #print(f'Predictions: \n{preds}\n')
    print(f'Labels: {labels}')

In [None]:
cos_dist_edge_embeddings(A,E)

In [None]:
model.train()

for epoch in tqdm(range(epochs)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        if time > 0:
            # updated edge list to undirected 
            # directed edge_list: snapshot.edge_index used for calculating the cosine distance 
            # and loss (explot symmetry)
            # for normal message passing we need the undirected graph, i.e. the edge list with 2* elements
            print(time)
            print(snapshot.edge_index)
            #y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
            pred = model(snapshot.x,snapshot.edge_index,snapshot.edge_attr)
            print(snapshot.edge_index.shape)
            print(snapshot.x.shape)
            print(pred.shape)
        #cost = cost + torch.mean((y_hat-snapshot.y)**2)
        #cost = cost/(time+1)
        #cost.backward()
        #optimizer.step()
        #optimizer.zero_grad()