In [1]:
import pathpyG as pp
from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index
import torch

from tqdm import tqdm

In [16]:
t_sp = pp.TemporalGraph.from_csv('sociopatterns_highschool_2013.tedges').to_undirected()
print(t_sp)
print(torch.unique(t_sp.data.t).size(0))

Temporal Graph with 327 nodes, 11636 unique edges and 377016 events in [1385982080.0, 1386345600.0]

Graph attributes
	src		<class 'torch.Tensor'> -> torch.Size([377016])
	dst		<class 'torch.Tensor'> -> torch.Size([377016])
	t		<class 'torch.Tensor'> -> torch.Size([377016])

1157




In [2]:
t = pp.TemporalGraph.from_edge_list([(0,1,0), (0,2,0), (1,2,1), (1,3,1), (3,4,2)])
print(t)

Temporal Graph with 5 nodes, 5 unique edges and 5 events in [0.0, 2.0]

Graph attributes
	src		<class 'torch.Tensor'> -> torch.Size([5])
	dst		<class 'torch.Tensor'> -> torch.Size([5])
	t		<class 'torch.Tensor'> -> torch.Size([5])





In [232]:
print(t.data.edge_index)
print(t.data.t)
unique_t, reverse_idx = torch.unique(t.data.t, sorted=True, return_inverse=True)
print(unique_t)
print(reverse_idx)

tensor([[0, 0, 1, 1, 3],
        [1, 2, 2, 3, 4]])
tensor([0., 0., 1., 1., 2.])
tensor([0., 1., 2.])
tensor([0, 0, 1, 1, 2])


In [21]:
# old code with explosive memory usage due to computation of all second-order edges irrespective of time stamps
def lift_order_not_efficient(g: pp.TemporalGraph, delta=1):
    # first-order edge index
    edge_index, timestamps = sort_edge_index(g.data.edge_index, g.data.t)
    node_sequence = torch.arange(g.data.num_nodes, device=edge_index.device).unsqueeze(1)

    #print(edge_index)
    #print(timestamps)

    # TOOD: memory-efficient generation of temporal event DAG, where time-stamped edges are nodes
    outdegree = degree(edge_index[0], dtype=torch.long, num_nodes=g.data.num_nodes)
    # Map outdegree to each destination node to create an edge for each combination
    # of incoming and outgoing edges for each destination node
    outdegree_per_dst = outdegree[edge_index[1]]
    num_new_edges = outdegree_per_dst.sum()
    #print(num_new_edges)
    # Create sources of new higher-order edges
    # Issue here: we should only use the outdegree using time-respecting edges
    ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)
    ho_edge_src_t = timestamps[ho_edge_srcs]
    #print(ho_edge_srcs)
    #print(ho_edge_src_t)

    # Create destination nodes that start the indexing after the cumulative sum of the outdegree
    # of all previous nodes in the ordered sequence of nodes
    ptrs = cumsum(outdegree, dim=0)[:-1]
    ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)
    idx_correction = torch.arange(num_new_edges, dtype=torch.long, device=edge_index.device)
    idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]
    ho_edge_dsts += idx_correction

    ho_edge_dst_t = timestamps[ho_edge_dsts]
    #print(ho_edge_dsts)
    #print(ho_edge_dst_t)
    filter = ho_edge_dst_t-ho_edge_src_t<=delta

    ho_index = torch.stack([ho_edge_srcs[filter], ho_edge_dsts[filter]], dim=0)
    #print(ho_index)
    return ho_index.size(1), ho_index

In [10]:
# new memory-efficient code
def lift_order_efficient(g: pp.TemporalGraph, delta: int = 1):

    # first-order edge index
    edge_index, timestamps = g.data.edge_index, g.data.t

    #print(edge_index)
    #print(timestamps)

    indices = torch.arange(0, edge_index.size(1), device=g.data.edge_index.device)

    unique_t, reverse_idx = torch.unique(timestamps, sorted=True, return_inverse=True)
    second_order = []
    count = 0

    # lift order: find possible continuations for edges in each time stamp
    for i in tqdm(range(unique_t.size(0))):
        t = unique_t[i]
        #print('timestamp index ', i)
        #print('timestamp ', t)
        
        # find indices of all source edges that occur at unique timestamp t
        src_time_mask = (timestamps == t)
        src_edges = edge_index[:,src_time_mask]
        src_edge_idx = indices[src_time_mask]
        #print(src_edges)
        #print(src_edge_idx)

        # find indices of all edges that can continue edges at tine t for given delta
        dst_time_mask = (timestamps > t) & (timestamps <= t+delta)
        dst_edges = edge_index[:,dst_time_mask]        
        dst_edge_idx = indices[dst_time_mask]
        #print(dst_edges)
        #print(dst_edge_idx)

        if dst_edge_idx.size(0)>0 and src_edge_idx.size(0)>0:

            # compute second-order edges between src and dst idx for all edges where dst in src_edges matches src in dst_edges        
            x = torch.cartesian_prod(src_edge_idx, dst_edge_idx).t()
            src_edges = torch.index_select(edge_index, dim=1, index=x[0])
            dst_edges = torch.index_select(edge_index, dim=1, index=x[1])
            #print(src_edges)
            #print(dst_edges)
            ho_edge_index = x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]
            second_order.append(ho_edge_index)
            #print(ho_edge_index) 
            
            # #print('dst', dst)
            # src_mask = (edge_index[:,mask][0]==dst)
            # ctd = edge_index[:,mask][:,src_mask]
            # #print('continuations', ctd)
            # ctd_indices = torch.where(edge_index[:,mask][0]==dst)[0]        
            # #print('ctd indx', ctd_indices)
            # count += ctd_indices.size(0)
    ho_index = torch.cat(second_order, dim=1)    
    return ho_index.size(1), ho_index

In [14]:
lift_order_efficient(t, delta=1)

100%|██████████| 3/3 [00:00<00:00, 3077.26it/s]


(3,
 tensor([[0, 0, 3],
         [2, 3, 4]]))

In [15]:
lift_order_not_efficient(t, delta=1)

(3,
 tensor([[0, 0, 3],
         [2, 3, 4]]))

In [17]:
lift_order_efficient(t_sp, delta=300)

100%|██████████| 1157/1157 [00:08<00:00, 135.27it/s]


(3693050,
 tensor([[     0,      0,      0,  ..., 376991, 376991, 376991],
         [   835,    885,    933,  ..., 376995, 377000, 377004]]))

In [22]:
lift_order_not_efficient(t_sp, delta=300)

: 

In [259]:
x = torch.cartesian_prod(torch.tensor([0,1]), torch.tensor([1,3])).t()
# edge 0 = 0->1
# edge 1 = 1->2
# edge 2 = 0->1

# combination 0,1:     0->1, 1->2
# combination 0,2:     0->1, 0->1
print(x)

tensor([[0, 0, 1, 1],
        [1, 3, 1, 3]])


In [260]:
src_edges = torch.index_select(t.data.edge_index, dim=1, index=x[0])
print(src_edges)

tensor([[0, 0, 0, 0],
        [1, 1, 2, 2]])


In [261]:
dst_edges = torch.index_select(t.data.edge_index, dim=1, index=x[1])
print(dst_edges)

tensor([[0, 1, 0, 1],
        [2, 3, 2, 3]])


In [262]:
 #select all indices where 
torch.where(src_edges[1,:] == dst_edges[0,:])[0]
x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]

tensor([[0],
        [3]])

  0%|          | 0/1157 [00:00<?, ?it/s]

100%|██████████| 1157/1157 [00:07<00:00, 148.83it/s]


(3693050,
 tensor([[     0,      0,      0,  ..., 376991, 376991, 376991],
         [   835,    885,    933,  ..., 376995, 377000, 377004]]))