In [1]:
import pathpyG as pp
from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index
import torch

from scipy.sparse.csgraph import bellman_ford, dijkstra
import numpy as np

from collections import defaultdict


from tqdm import tqdm

In [4]:
t_sp = pp.io.read_csv_temporal_graph('sociopatterns_highschool_2013.tedges', header=False).to_undirected()
print(t_sp)
print(torch.unique(t_sp.data.t).size(0))

Temporal Graph with 327 nodes, 11636 unique edges and 754032 events in [1385982080.0, 1386345600.0]

Graph attributes
	dst		<class 'torch.Tensor'> -> torch.Size([754032])
	src		<class 'torch.Tensor'> -> torch.Size([754032])
	t		<class 'torch.Tensor'> -> torch.Size([754032])

1157




In [5]:
t_ants = pp.io.read_csv_temporal_graph('../data/ants_2_2_val.tedges', header=False)
print(t_ants)

Temporal Graph with 68 nodes, 752 unique edges and 2090 events in [899.0, 1796.0]

Graph attributes
	dst		<class 'torch.Tensor'> -> torch.Size([2090])
	src		<class 'torch.Tensor'> -> torch.Size([2090])
	t		<class 'torch.Tensor'> -> torch.Size([2090])





In [3]:
c = pp.algorithms.centrality.temporal_closeness_centrality(t_ants, delta=60)
print(c)

100%|██████████| 594/594 [00:00<00:00, 5479.47it/s]

Created temporal event DAG with 1181 nodes and 4023 edges
[[ 0.  1. inf ... inf inf inf]
 [ 1.  0. inf ... inf inf inf]
 [ 5.  3.  0. ...  2. inf inf]
 ...
 [inf inf inf ...  0. inf inf]
 [inf inf inf ... inf  0. inf]
 [inf inf inf ...  1. inf  0.]]
{'JJJJ': 1399.0180458430464, 'WGG_': 1491.1753968253968, '_Y_B': 1461.7166666666667, 'HHHH': 996.0666666666666, 'WGRB': 1834.2047619047619, 'WYWY': 1540.441666666667, 'WY_G': 761.1371794871794, 'XXXX': 1670.8789682539682, 'LLLL': 1182.7095238095237, 'FFFF': 1062.2448773448773, 'WYG_': 1978.7333333333331, 'WW__': 1790.2027777777776, 'WRWB': 1743.196428571429, 'AAAA': 581.3047619047619, 'WGYW': 1155.8297619047619, 'WBYY': 968.8944444444444, '_R__': 880.7575396825396, 'WYBG': 1448.1039682539683, 'W__W': 1546.319877344877, 'RRRR': 924.1214285714285, 'WYRW': 1601.938095238095, 'WYYB': 865.6825396825396, 'WG_W': 1494.8178571428573, 'WRR_': 1195.2853174603176, 'W__G': 867.9182900432901, '_WRR': 622.8873015873016, 'WY_R': 1549.3750000000002, '_YYY'




In [47]:
tedges = [('a', 'b', 1), ('b', 'c', 5), ('c', 'd', 9), ('c', 'e', 9),
              ('c', 'f', 11), ('f', 'a', 13), ('a', 'g', 18), ('b', 'f', 21),
              ('a', 'g', 26), ('c', 'f', 27), ('h', 'f', 27), ('g', 'h', 28),
              ('a', 'c', 30), ('a', 'b', 31), ('c', 'h', 32), ('f', 'h', 33),
              ('b', 'i', 42), ('i', 'b', 42), ('c', 'i', 47), ('h', 'i', 50)]
t = pp.TemporalGraph.from_edge_list(tedges)
c = pp.algorithms.centrality.temporal_closeness_centrality(t, 5)
print(c)

100%|██████████| 17/17 [00:00<00:00, 5773.07it/s]

Created temporal event DAG with 38 nodes and 47 edges
(9, 38)
(9, 9)
[[ 0.  1.  1.  3.  3. inf  1.  2. inf]
 [inf  0.  1.  2.  2.  1. inf inf  1.]
 [ 2. inf  0.  1.  1.  1.  3.  1.  1.]
 [inf inf inf  0. inf inf inf inf inf]
 [inf inf inf inf  0. inf inf inf inf]
 [ 1. inf inf inf inf  0.  2.  1. inf]
 [inf inf inf inf inf inf  0.  1. inf]
 [inf inf inf inf inf  1. inf  0.  1.]
 [inf  1. inf inf inf inf inf inf  0.]]
{'a': 12.0, 'b': 16.0, 'c': 16.0, 'd': 14.666666666666666, 'e': 14.666666666666666, 'f': 24.0, 'g': 14.666666666666666, 'h': 28.0, 'i': 24.0}





In [49]:
t = pp.TemporalGraph.from_edge_list([(0,1,0), (0,2,0), (1,2,1), (1,3,1), (3,4,2), (1,4,3)])
print(t)

Temporal Graph with 5 nodes, 6 unique edges and 6 events in [0.0, 3.0]

Graph attributes
	dst		<class 'torch.Tensor'> -> torch.Size([6])
	t		<class 'torch.Tensor'> -> torch.Size([6])
	src		<class 'torch.Tensor'> -> torch.Size([6])





In [5]:
c = pp.algorithms.centrality.temporal_closeness_centrality(t, delta=1)
print(c)

100%|██████████| 4/4 [00:00<00:00, 262.99it/s]

Created temporal event DAG with 17 nodes and 15 edges
{0.0: 0.0, 1.0: 4.0, 2.0: 8.0, 3.0: 6.0, 4.0: 9.333333333333332}





In [2]:
# old code with explosive memory usage due to computation of all second-order edges irrespective of time stamps
def lift_order_not_efficient(g: pp.TemporalGraph, delta=1):
    # first-order edge index
    edge_index, timestamps = sort_edge_index(g.data.edge_index, g.data.t)
    node_sequence = torch.arange(g.data.num_nodes, device=edge_index.device).unsqueeze(1)
    print(edge_index)
    # second-order edge index with time-respective filtering
    null_model_edge_index = pp.MultiOrderModel.lift_order_edge_index(edge_index, num_nodes=node_sequence.size(0))    
    # Update node sequences
    node_sequence = torch.cat([node_sequence[edge_index[0]], node_sequence[edge_index[1]][:, -1:]], dim=1)
    # Remove non-time-respecting higher-order edges
    time_diff = timestamps[null_model_edge_index[1]] - timestamps[null_model_edge_index[0]]
    non_negative_mask = time_diff > 0
    delta_mask = time_diff <= delta
    time_respecting_mask = non_negative_mask & delta_mask
    edge_index = null_model_edge_index[:, time_respecting_mask]
    return edge_index

In [3]:
# new memory-efficient code
def lift_order_efficient(g: pp.TemporalGraph, delta: int = 1):

    # first-order edge index
    edge_index, timestamps = g.data.edge_index, g.data.t
    # print(edge_index)

    indices = torch.arange(0, edge_index.size(1), device=g.data.edge_index.device)

    unique_t = torch.unique(timestamps, sorted=True)
    second_order = []

    # lift order: find possible continuations for edges in each time stamp
    for i in tqdm(range(unique_t.size(0))):
        t = unique_t[i]
        #print('timestamp index ', i)
        #print('timestamp ', t)
        
        # find indices of all source edges that occur at unique timestamp t
        src_time_mask = (timestamps == t)
        src_edges = edge_index[:,src_time_mask]
        src_edge_idx = indices[src_time_mask]
        #print(src_edges)
        #print(src_edge_idx)

        # find indices of all edges that can possibly continue edges occurring at time t for the given delta
        dst_time_mask = (timestamps > t) & (timestamps <= t+delta)
        dst_edges = edge_index[:,dst_time_mask]        
        dst_edge_idx = indices[dst_time_mask]
        #print(dst_edges)
        #print(dst_edge_idx)

        if dst_edge_idx.size(0)>0 and src_edge_idx.size(0)>0:

            # compute second-order edges between src and dst idx for all edges where dst in src_edges matches src in dst_edges        
            x = torch.cartesian_prod(src_edge_idx, dst_edge_idx).t()
            src_edges = torch.index_select(edge_index, dim=1, index=x[0])
            dst_edges = torch.index_select(edge_index, dim=1, index=x[1])
            #print(src_edges)
            #print(dst_edges)
            ho_edge_index = x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]
            second_order.append(ho_edge_index)
            #print(ho_edge_index) 
            
            # #print('dst', dst)
            # src_mask = (edge_index[:,mask][0]==dst)
            # ctd = edge_index[:,mask][:,src_mask]
            # #print('continuations', ctd)
            # ctd_indices = torch.where(edge_index[:,mask][0]==dst)[0]        
            # #print('ctd indx', ctd_indices)
            # count += ctd_indices.size(0)
    ho_index = torch.cat(second_order, dim=1)    
    return ho_index

In [5]:
def fo_nodes(ho_edge, g):
    src_edge = ho_edge[0]
    dst_edge = ho_edge[1]
    return g.data.edge_index[:,src_edge][0], g.data.edge_index[:,dst_edge][0], g.data.edge_index[:,dst_edge][1]


def temporal_shortest_paths_all(g: pp.TemporalGraph, delta: int):
    # generate temporal event DAG
    edge_index = lift_order_efficient(g, delta)

    # Add indices of first-order nodes as src and dst of paths in TEG
    src_edges_src = g.data.edge_index[0,:] + g.data.edge_index.size(1)
    src_edges_dst = torch.arange(0, g.data.edge_index.size(1))    
    dst_edges_src = torch.arange(0, g.data.edge_index.size(1))
    dst_edges_dst = g.data.edge_index[1,:] + 2*g.data.edge_index.size(1)

    src_edges = torch.stack([src_edges_src, src_edges_dst])
    dst_edges = torch.stack([dst_edges_src, dst_edges_dst])
    edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)

    event_graph = pp.Graph.from_edge_index(edge_index)
    
    # initialize distance matrix 
    dist = torch.full((g.n, event_graph.n), float("inf"), device=g.data.edge_index.device)

    # predecessor lists
    pred = defaultdict(lambda: defaultdict(list))

    # Fastest known single source SP in DAG (Cormen, Leiserson): single scan of edges in DAG
    # trick: index of second-order nodes = topological sorting of event DAG assuming that edges are given in chronological order    
    # scan second-order nodes in topological order and relax distances between first-order nodes

    # TODO: correct algorithm
    for src in tqdm(g.nodes):
        dist[g.mapping.to_idx(src), g.mapping.to_idx(src) + g.data.edge_index.size(1)] = 0
        for v in event_graph.nodes:
            for w in event_graph.successors(v):
                dist[g.mapping.to_idx(src), w] = min(dist[g.mapping.to_idx(src), w], dist[g.mapping.to_idx(src), v]+1)
    
    dist_fo = dist[:,2*g.m:] - 1
    dist_fo.fill_diagonal_(0)
    return dist_fo, pred


def temporal_shortest_paths(g: pp.TemporalGraph, delta: int):
    # generate temporal event DAG
    edge_index = lift_order_efficient(g, delta)    

    # Add indices of g.n first-order nodes as source nodes of paths in augmented TEG
    src_edges_src = g.m + g.data.edge_index[0,:]
    src_edges_dst = torch.arange(0, g.data.edge_index.size(1))

    # Add indices of g.n first-order nodes as target nodes of paths in augmented TEG
    dst_edges_src = torch.arange(0, g.data.edge_index.size(1))
    dst_edges_dst = g.m + g.n + g.data.edge_index[1,:]

    src_edges = torch.stack([src_edges_src, src_edges_dst])
    dst_edges = torch.stack([dst_edges_src, dst_edges_dst])
    edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)

    event_graph = pp.Graph.from_edge_index(edge_index, num_nodes=g.m + 2 * g.n)
    m = event_graph.sparse_adj_matrix()
    print(m.shape)
    # compute shortest paths from all source nodes to all nodes 
    dist, pred = dijkstra(m, directed=True, indices = np.arange(g.m, g.m+g.n),  return_predecessors=True, unweighted=True)
    print(dist.shape)
    print(g.n + g.m)
    # we are only interested in target nodes, whose indices start at G.m + G.n
    dist_fo = dist[:,g.m+g.n:] - 1
    np.fill_diagonal(dist_fo, 0)
    pred_fo = pred[:,g.n+g.m:]
    return dist_fo, pred_fo


    
def temporal_closeness_centrality(g: pp.TemporalGraph, delta: int) -> dict:

    centralities = dict()
    dist, _ = temporal_shortest_paths(g, delta)
    for x in g.nodes:
        centralities[x] = sum((g.n - 1) / dist[np.arange(g.n)!=x, g.mapping.to_idx(x)])

    return centralities

In [6]:
dist, pred = temporal_shortest_paths(t_ants, delta=30)
print(dist.shape)
print(t_ants.n)
print(t_ants.m)

100%|██████████| 594/594 [00:00<00:00, 6304.91it/s]

(1181, 1181)
(68, 1181)
1113
(68, 68)
68
1045





In [11]:
idx[:,1]

NameError: name 'idx' is not defined

In [None]:
edge_index = lift_order_efficient(t)
print(edge_index)

In [50]:
print(t.data.edge_index)
dist, pred = temporal_shortest_paths(t, delta=1)

print(dist)
print(pred)

tensor([[0, 0, 1, 1, 3, 1],
        [1, 2, 2, 3, 4, 4]])


100%|██████████| 4/4 [00:00<00:00, 2955.30it/s]

(16, 16)
(5, 16)
11
[[ 0.  1.  1.  2.  3.]
 [inf  0.  1.  1.  1.]
 [inf inf  0. inf inf]
 [inf inf inf  0.  1.]
 [inf inf inf inf  0.]]
[[-9999     0     1     3     4]
 [-9999 -9999     2     3     5]
 [-9999 -9999 -9999 -9999 -9999]
 [-9999 -9999 -9999 -9999     4]
 [-9999 -9999 -9999 -9999 -9999]]





In [51]:
dist[:,4]

array([ 3.,  1., inf,  1.,  0.])

In [None]:
t.mapping.node_ids

In [None]:
print(temporal_closeness_centrality(t, delta=1))
print(t.n)

In [None]:
temporal_shortest_paths(t_sp, delta=3600)

In [None]:
edge_index[0,:]

In [None]:
t.data.edge_index[:,edge_index[0,:]][0]

In [None]:
t.data.edge_index[:,edge_index[1,:]][1]

In [None]:
#print(t.data.edge_index)
print(t_sp)
g = temporal_shortest_paths(t_sp, delta=300)

In [None]:
indeg = degree(g.data.edge_index[1])
roots = torch.where(indeg==0)[0]
print(roots)

In [None]:
def traverse(g, path):
    if g.get_successors(path[-1]).size(0) == 0:
        pass
    else:
        for w in g.successors(path[-1]):
            traverse(g, path + (w,))

In [None]:
i = 0
for x in roots:
    print(x)
    traverse(g, (x,))

In [None]:
ho_index = lift_order_not_efficient(t, delta=1)
print(ho_index)

In [None]:
ho_index = lift_order_efficient(t, delta=1)
print(ho_index)

In [None]:
print(t.data.edge_index)

In [None]:
node_sequence = torch.arange(t.data.num_nodes, device=t.data.edge_index.device).unsqueeze(1)
print(node_sequence)
node_sequence = torch.cat([node_sequence[t.data.edge_index[0]], node_sequence[t.data.edge_index[1]][:, -1:]], dim=1)
print(node_sequence)

In [None]:
lift_order_not_efficient(t_sp, delta=300)

In [None]:
lift_order_efficient(t_sp, delta=300)

In [None]:
lift_order_not_efficient(t_sp, delta=300)

In [None]:
x = torch.cartesian_prod(torch.tensor([0,1]), torch.tensor([1,3])).t()
# edge 0 = 0->1
# edge 1 = 1->2
# edge 2 = 0->1

# combination 0,1:     0->1, 1->2
# combination 0,2:     0->1, 0->1
print(x)

In [None]:
src_edges = torch.index_select(t.data.edge_index, dim=1, index=x[0])
print(src_edges)

In [None]:
dst_edges = torch.index_select(t.data.edge_index, dim=1, index=x[1])
print(dst_edges)

In [None]:
 #select all indices where 
torch.where(src_edges[1,:] == dst_edges[0,:])[0]
x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]