In [174]:
import time
from typing import Optional

from tqdm import trange
import torch
from torch import Tensor
from torch_geometric import EdgeIndex
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import MessagePassing
from torch_geometric.experimental import disable_dynamic_shapes
from torch_geometric.nn.aggr import Aggregation
from torch_geometric.transforms import LineGraph
from torch_geometric.utils import scatter, cumsum, coalesce, degree
from torch_geometric.utils import to_torch_sparse_tensor, to_torch_csc_tensor, to_torch_csr_tensor, to_torch_coo_tensor


import pathpyG as pp

In [160]:
def lift_order_edge_index(edge_index: EdgeIndex | torch.Tensor) -> torch.Tensor:
    N = edge_index.max().item() + 1
    if isinstance(edge_index, torch.Tensor):
        edge_index = EdgeIndex(edge_index, sparse_size=(N, N))
    edge_index = edge_index.sort_by("row")[0]
    # Compute A^2 to get all paths of length 2 with source and target nodes
    # A_2 is a tensor of shape (2, num_p) where num_p is the number of paths that have unique source and target nodes
    # A_2_counts is a tensor of shape (num_p,) that contains the number of paths that have the corresponding source and target node
    A_2, A_2_counts = edge_index @ edge_index
    # as many times as A_2_counts specifies
    # For that we first use the torch.cumsum function to get index pointers where the next source/target node starts
    # (Note that the cumsum from PyG appends a 0 at the beginning, so we need explicitly the torch.cumsum function)
    ptrs = torch.cumsum(A_2_counts, dim=0)
    # Then we use the bucketize function to get the repeated index pointers
    # We use the `ptrs` to specify the boundaries for each bucket
    # With a range of ints from 0 to the sum of all A_2_counts, we can then repeat each index as many times as specified in A_2_counts
    idx_range = torch.arange(A_2_counts.sum(), device=edge_index.device)
    expand_idx = torch.bucketize(idx_range, ptrs, right=True)
    # We use this index to expand the source and target nodes
    path_srcs = A_2[0][expand_idx]
    path_dsts = A_2[1][expand_idx]

    # Now only the center nodes are missing!
    # The center nodes are all the nodes that have at least one incoming and one outgoing edge
    # Thus we can compute the indegree and outdegree of each node 
    indegree = degree(edge_index[1], dtype=torch.long, num_nodes=N)
    outdegree = degree(edge_index[0], dtype=torch.long, num_nodes=N)
    # If the product of indegree and outdegree is not 0, the node is a center node
    in_out = indegree * outdegree
    center_nodes = in_out.nonzero().squeeze()
    # Next, we filter out all none center nodes from the the destination nodes of all edges
    # Note that the destinations keep the original order of the edge index
    # We also have each destination node repeated as many times as it has incoming edges
    filtered_dsts = edge_index[1][torch.isin(edge_index[1], center_nodes)]

    # Since in the end we need a higher order edge for each combination of source and destination nodes for each center node,
    # we also need to repeat each center node as many times as it has outgoing edges.
    # Note that our edge index is sorted by the source nodes and we are repeating the center nodes for each existing outgoing edge
    # so we can insert the repeated center nodes right before/after the existing destination nodes
    # For this we repeat the bucketize function from above but use the outdegree as the boundaries
    ptrs_center = torch.cumsum(outdegree[filtered_dsts], dim=0)
    expand_idx_center = torch.bucketize(idx_range, ptrs_center, right=True)
    path_centers = filtered_dsts[expand_idx_center]

    # Finally, we can stack the source, center and destination nodes to get the higher order edge index
    ho_srcs = torch.stack([path_srcs, path_centers], dim=1)
    ho_dsts = torch.stack([path_centers, path_dsts], dim=1)
    ho_edge_index = torch.stack([ho_srcs, ho_dsts], dim=0)
    return ho_edge_index

# DeBruijn Transformations using GNNs

In [17]:
class ConcatAggregation(Aggregation):
    
    def __init__(self):
        super().__init__()

    # Not sure how this aggregation works, only that it does
    # Inspired by the LSTMAggregation implementation in PyG:
    # https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/aggr/lstm.html
    @disable_dynamic_shapes(required_args=['dim_size', 'max_num_elements'])
    def forward(
        self,
        x: Tensor,
        index: Optional[Tensor] = None,
        ptr: Optional[Tensor] = None,
        dim_size: Optional[int] = None,
        dim: int = -2,
        max_num_elements: Optional[int] = None,
    ) -> Tensor:

        # Concetenate all messages with padding value -1
        x, _ = self.to_dense_batch(x, index, ptr, dim_size, dim, max_num_elements=max_num_elements, fill_value=-1)
        return x


class DeBruijnTransform(MessagePassing):
    # freq_aggr is a string specifying how we should count each path in the network
    # This can either be `propagation` where every path is counted once
    # or `diffusion` where every path is counted depending on the number of outgoing edges
    # of the source node.
    def __init__(self, freq_aggr: str = "propagation"):
        super().__init__(aggr=ConcatAggregation(), flow="target_to_source")
        self.freq_aggr = freq_aggr

    def forward(self, node_idx, edge_index, edge_attr=None):
        # Sort edge_index because otherwise propagate will not work in combination with the ConcatAggregation
        edge_index = coalesce(edge_index, sort_by_row=True)
        # Set the dimension along which the node feature tensor is expected
        # This is the default value, but we need to set it explicitly here
        # because we change it later
        self.node_dim = -2
        # Update the node idx by passing the messages and aggregating them
        # In the message function, we concatenate the node idx of the source node
        # with the node idx of the target node
        # In the aggregation function, we concatenate all messages from the neighbors
        # The resulting feature for every node is a tensor of shape (max_degree, 2)
        # where max_degree is the maximum degree of the graph
        # If a node has less neighbors than max_degree, the remaining entries are filled with -1
        node_idx_set_higher_order = self.propagate(edge_index, node_idx=node_idx)
        # Since our node features changed from shape (N, 1) to (N, max_degree, 2)
        # we need to set the node_dim to -3
        self.node_dim = -3
        # We use the function that is used to update node features to create the higher order edges
        edge_index_higher_order, edge_attr_higher_order = self.edge_updater(
            edge_index, 
            node_idx=node_idx_set_higher_order,
            edge_attr=edge_attr
            )

        if edge_attr_higher_order is not None:
            return edge_index_higher_order, edge_attr_higher_order
        return edge_index_higher_order

    def message(self, node_idx_i, node_idx_j):
        # Concatenate the node idx of the source node with the node idx of the target node
        # The shape changes from (N, k) to (N, k+1) where k is the order of the nodes before
        return torch.cat([node_idx_i, node_idx_j[:, -1:]], dim=-1)
    
    def edge_update(self, node_idx_i, node_idx_j, edge_attr=None) -> tuple[Tensor, Optional[Tensor]]:
        # We take the higher order node idx sets that have been created for each node adjacent
        # to the edge (node_idx_i, node_idx_j) and repeat each node idx across different dimensions
        # so that we can compare them with each other
        #
        # Example:
        #
        #   node_idx_i = [[0, 3], [1, 3], [2, 3]]
        #   node_idx_j = [[3, 4], [2, 4], [-1, -1]]
        #
        #   strided_node_idx_i = [[
        #                           [0, 3],
        #                           [1, 3], 
        #                           [2, 3]
        #                         ],
        #                         [
        #                           [0, 3],
        #                           [1, 3],
        #                           [2, 3]
        #                         ],
        #                         [
        #                           [0, 3],
        #                           [1, 3],
        #                           [2, 3]
        #                         ]]
        #   strided_node_idx_j = [[
        #                           [3, 4],
        #                           [3, 4],
        #                           [3, 4]
        #                         ],
        #                         [
        #                           [2, 4],
        #                           [2, 4],
        #                           [2, 4]
        #                         ],
        #                         [
        #                           [-1, -1],
        #                           [-1, -1],
        #                           [-1, -1]
        #                         ]]
        strided_node_idx_i = node_idx_i.unsqueeze(1).expand(-1, node_idx_j.size(1), -1, -1)
        strided_node_idx_j = node_idx_j.unsqueeze(2).expand(-1, -1, node_idx_i.size(1), -1)
        # Only create an higher order edge if the target node idx of the first edge is equal to
        # the source node idx of the second edge
        edge_mask = (strided_node_idx_i[:, :, :, 1:] == strided_node_idx_j[:, :, :, :-1]).all(dim=-1)
        # Also, we need to remove the -1 padding values
        padd_mask = (
            (strided_node_idx_i[:, :, :] != -1).all(dim=-1) &
            (strided_node_idx_j[:, :, :] != -1).all(dim=-1)
        )
        # For the above, the following mask is:
        #
        #   mask = [[True, True, True],
        #           [False, False, False],
        #           [False, False, False]]
        mask = (edge_mask & padd_mask)
        # Concetenate the remaining higher order edges to create a new edge index
        higher_order_edges = torch.cat([strided_node_idx_i[mask].unsqueeze(0), strided_node_idx_j[mask].unsqueeze(0)], dim=0)
        
        if edge_attr is not None:
            # If edge attributes are given, we need to fit the shape to apply the same mask
            strided_edge_attr = edge_attr.unsqueeze(1).unsqueeze(1).expand(-1, node_idx_j.size(1), node_idx_i.size(1))
            # Apply the mask and use some way to combine the edge attributes of source and target node
            # For now, we just take the edge attribute of the source node
            if self.freq_aggr == "propagation":
                higher_order_edge_attr = strided_edge_attr[mask]
            elif self.freq_aggr == "diffusion":
                higher_order_edge_attr = strided_edge_attr[mask] / mask.sum(dim=(1,2), keepdim=True).expand(-1, node_idx_j.size(1), node_idx_i.size(1))[mask]
            else:
                raise ValueError(f"Unknown frequency aggregation method {self.freq_aggr}")
            return higher_order_edges, higher_order_edge_attr
        return higher_order_edges, None

## Toy Example

In [165]:
edge_index = torch.tensor([[0, 0, 1, 1, 3, 4, 1, 6, 5],
                           [1, 3, 2, 3, 4, 5, 6, 5, 7]])
node_idx = torch.arange(edge_index.max() + 1).reshape(-1, 1)
edge_index_2_fast = DeBruijnTransform()(node_idx, edge_index)
print(edge_index_2_fast)

tensor([[[0, 1],
         [0, 1],
         [0, 1],
         [0, 3],
         [1, 3],
         [1, 6],
         [3, 4],
         [4, 5],
         [6, 5]],

        [[1, 2],
         [1, 3],
         [1, 6],
         [3, 4],
         [3, 4],
         [6, 5],
         [4, 5],
         [5, 7],
         [5, 7]]])


In [166]:
edge_index_2 = pp.DAGData.lift_order_dag(edge_index.unsqueeze(-1))
print(edge_index_2)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA_gather)

In [167]:
edge_index_new = lift_order_edge_index(EdgeIndex(edge_index))
print(edge_index_new)

tensor([[[0, 1],
         [0, 1],
         [0, 1],
         [0, 3],
         [1, 3],
         [1, 6],
         [3, 4],
         [4, 5],
         [6, 5]],

        [[1, 2],
         [1, 3],
         [1, 6],
         [3, 4],
         [3, 4],
         [6, 5],
         [4, 5],
         [5, 7],
         [5, 7]]])


In [169]:
print((edge_index_2_fast == edge_index_new).all())

tensor(True)


### With Edge Weights

Depending on how you count each walk, you will get different statistics. We can choose the aggregation via `freq_aggr` to be either "propagation", i.e. each walk counts with its weight, or "diffusion" i.e. each walk is counted with the probability of a random walker starting at the first node to end up in the last. 

In [6]:
edge_index = torch.tensor([[0, 0, 1, 1, 3, 4, 1, 6, 5],
                           [1, 3, 2, 3, 4, 5, 6, 5, 7]])
node_idx = torch.arange(edge_index.max() + 1).reshape(-1, 1)
edge_attr = torch.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=torch.float32)
edge_index_2_fast, edge_attr_2 = DeBruijnTransform(freq_aggr="diffusion")(node_idx, edge_index, edge_attr)
print(edge_index_2_fast)

tensor([[[0, 1],
         [0, 1],
         [0, 1],
         [0, 3],
         [1, 3],
         [1, 6],
         [3, 4],
         [4, 5],
         [6, 5]],

        [[1, 2],
         [1, 3],
         [1, 6],
         [3, 4],
         [3, 4],
         [6, 5],
         [4, 5],
         [5, 7],
         [5, 7]]])


In [7]:
print(edge_attr_2)

tensor([0.3333, 0.3333, 0.3333, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000])


### 3rd order
Currently, the transformation works only with a standard edge index. The good thing is, you can still pass in the higher_order node_idx and then the output is directly a third order edge index.

In [8]:
# Transform the edge index since the DeBruijnTransform only works for the normal edge index
edges_2 = edge_index_2_fast.reshape(-1, 2)
uniques, inverse_idx = edges_2.unique(dim=0, return_inverse=True)
transformed_edge_index_2_fast = inverse_idx.reshape(2, -1)

In [9]:
edge_index_3_fast = DeBruijnTransform()(uniques, transformed_edge_index_2_fast)
print(edge_index_3_fast)

tensor([[[0, 1, 3],
         [0, 1, 6],
         [0, 3, 4],
         [1, 3, 4],
         [1, 6, 5],
         [3, 4, 5]],

        [[1, 3, 4],
         [1, 6, 5],
         [3, 4, 5],
         [3, 4, 5],
         [6, 5, 7],
         [4, 5, 7]]])


In [10]:
edge_index_3 = pp.DAGData.lift_order_dag(edge_index_2)
print(edge_index_3)

tensor([[[0, 1, 3],
         [0, 1, 6],
         [0, 3, 4],
         [1, 3, 4],
         [3, 4, 5],
         [1, 6, 5]],

        [[1, 3, 4],
         [1, 6, 5],
         [3, 4, 5],
         [3, 4, 5],
         [4, 5, 7],
         [6, 5, 7]]])


# Exponentionally Large DAG

In [22]:
layers = 5
branches = 15

edges = []
prev_layer_nodes = [0]
j = 1
for _ in trange(layers):
    layer_nodes = []
    for node in prev_layer_nodes:
        for _ in range(branches):
            layer_nodes.append(j)
            edges.append((f"{node}", f"{j}"))
            j+=1
    prev_layer_nodes = layer_nodes

dag = pp.Graph.from_edge_list(edges)
dag_edge_index = dag.data.edge_index.unsqueeze(-1)

node_idx = torch.arange(dag_edge_index.max().item() + 1).unsqueeze(-1)
node_idx_gpu = node_idx.cuda()
dag_edge_index_gpu = dag.data.edge_index.cuda()

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:00<00:00, 18.74it/s]


### Current implementation

In [12]:
%timeit pp.DAGData.lift_order_dag(dag_edge_index)

1min 9s ± 3.52 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Message Passing based implementation (CPU)

In [23]:
%timeit DeBruijnTransform()(node_idx, dag.data.edge_index)

717 ms ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Message Passing based implementation (GPU)

In [24]:
%timeit DeBruijnTransform()(node_idx_gpu, dag_edge_index_gpu)

83.1 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### New implementation (CPU)

In [49]:
%timeit lift_order_edge_index(dag.data.edge_index)

116 ms ± 2.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [50]:
%timeit lift_order_edge_index(dag_edge_index_gpu)

98.9 ms ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [183]:
data_gpu = dag.data.to("cuda")

In [184]:
%timeit linegraph(data_gpu)

KeyboardInterrupt: 

In [182]:
linegraph = LineGraph()
%timeit linegraph(dag.data)

6.82 s ± 167 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
# print((dag_edge_index_order_2.sort(dim=1)[0] == dag_edge_index_order_2_fast.sort(dim=1)[0]).all())

## Many Walks

In [52]:
n_walks = 10000
walk_length = 1000

walks = [list(range(walk_length)) for _ in range(n_walks)]
orig_walk = pp.WalkData()
for walk in walks:
    orig_walk.add_walk_seq(walk)

path_list = list(orig_walk.paths.values())
path_freq_tensor = torch.tensor(list(orig_walk.path_freq.values()))
mapping = pp.IndexMap()
nested_walk = pp.WalkDataNested(path_list, path_freq=path_freq_tensor, mapping=mapping)

  self.paths = nested_tensor(paths, dtype=torch.long)


### Original Walk Implementation

In [5]:
%timeit orig_walk.edge_index_k_weighted(2)

37.5 s ± 7.25 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Nested Tensor Implementation (CPU)

In [6]:
pp.config['torch']["device"] = "cpu"
%timeit nested_walk.edge_index_k_weighted(2)

466 ms ± 20.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Nested Tensor (GPU)

In [53]:
pp.config['torch']["device"] = "cuda"
cuda_path_list = [path.cuda() for path in path_list]
cuda_path_freq = path_freq_tensor.cuda()
cuda_nested_walk = pp.WalkDataNested(cuda_path_list, path_freq=cuda_path_freq, mapping=mapping)
%timeit cuda_nested_walk.edge_index_k_weighted(2)

440 ms ± 24.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Message Passing Implementation (CPU + GPU)

In [54]:
# We create a list of Data objects where each Data object contains the edge index of a path (could also be a DAG in theory)
data_list = [Data(edge_index=path.long(), num_nodes=walk_length) for path in path_list]
# We use a dataloader from PyG to combine all the edge indices into a single graph with multiple disjoint subgraphs
# If two paths share a node, the node is duplicated in the resulting graph and the new higher order edges need to be aggregated afterwards
# Note that due to the `batch_size` parameter, we can also do computations on a set of paths that are too large to fit into memory at once
walk_graph = next(iter(DataLoader(data_list, batch_size=n_walks)))
edge_index = walk_graph.edge_index
node_idx = torch.arange(edge_index.max() + 1).unsqueeze(-1)

The following measures the time to do the De Bruijn graph transformation for the edge index that contains all paths as disjunct subgraphs. Since the aggregations afterwards are omitted, the runtimes are not exactly comparable to the above. See the next section (With Weights and the Aggregation) for a full `edge_index_k_weighted` transformation.

In [55]:
%timeit DeBruijnTransform()(node_idx, edge_index)

576 ms ± 76.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [56]:
cuda_edge_index = edge_index.cuda()
cuda_node_idx = node_idx.cuda()
%timeit DeBruijnTransform()(cuda_node_idx, cuda_edge_index)

71.2 ms ± 5.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [58]:
%timeit lift_order_edge_index(edge_index)

2.24 s ± 26.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [57]:
%timeit lift_order_edge_index(cuda_edge_index)

128 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### With Weights and the Aggregation

In [11]:
def edge_index_k_weighted(path_list, path_freq, aggregation="propagation", device="cuda"):
    data_list = [
        Data(
            edge_index=path.long(), 
            num_nodes=walk_length,
            edge_attr=torch.ones(path.size(1), dtype=torch.float32) * path_freq[i],
            node_idx=torch.arange(walk_length).unsqueeze(-1)
        ) for i, path in enumerate(path_list)
        ]
    walk_graph = next(iter(DataLoader(data_list, batch_size=n_walks, follow_batch=["node_idx"]))).to(device)
    edge_index = walk_graph.edge_index
    edge_attr = walk_graph.edge_attr
    node_idx = torch.arange(edge_index.max() + 1, device=device).unsqueeze(-1)
    edge_index_2, edge_attr_2 = DeBruijnTransform(aggregation)(node_idx, edge_index, edge_attr)
    orig_edge_index_2 = walk_graph.node_idx.squeeze()[edge_index_2]
    unique_edge_index_2, inverse_idx = orig_edge_index_2.unique(dim=1, return_inverse=True)
    edge_attr_2 = torch.zeros(unique_edge_index_2.size(1), device=device).index_add(0, inverse_idx, edge_attr_2)
    return unique_edge_index_2, edge_attr_2

In [12]:
%timeit edge_index_k_weighted(path_list, path_freq_tensor, device="cuda")

774 ms ± 48.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [185]:
edge_index = EdgeIndex(torch.tensor([
                                        [0, 0, 1, 0, 0, 2, 2, 3, 3, 4, 4],
                                        [1, 2, 2, 4, 6, 3, 4, 4, 5, 5, 6]
]), sparse_size=(7, 7))
edge_index = edge_index.sort_by("row")[0]
N = 7
A = edge_index.to_dense()

In [195]:
data = Data(edge_index=edge_index)
line_graph = LineGraph(force_directed=True)(data)
edge_index.T[line_graph.edge_index]

tensor([[[0, 1],
         [0, 2],
         [0, 2],
         [0, 4],
         [0, 4],
         [1, 2],
         [1, 2],
         [2, 3],
         [2, 3],
         [2, 4],
         [2, 4],
         [3, 4],
         [3, 4]],

        [[1, 2],
         [2, 3],
         [2, 4],
         [4, 5],
         [4, 6],
         [2, 3],
         [2, 4],
         [3, 4],
         [3, 5],
         [4, 5],
         [4, 6],
         [4, 5],
         [4, 6]]])

In [197]:
(lift_order_edge_index(edge_index) == edge_index.T[line_graph.edge_index]).all()

tensor(True)

In [186]:
edge_index

EdgeIndex([[0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4],
           [1, 2, 4, 6, 2, 3, 4, 4, 5, 5, 6]], sparse_size=(7, 7), nnz=11,
          sort_order=row)

In [187]:
# Fast dense implementation
(A * A.unsqueeze(2)).nonzero()

tensor([[0, 1, 2],
        [0, 2, 3],
        [0, 2, 4],
        [0, 4, 5],
        [0, 4, 6],
        [1, 2, 3],
        [1, 2, 4],
        [2, 3, 4],
        [2, 3, 5],
        [2, 4, 5],
        [2, 4, 6],
        [3, 4, 5],
        [3, 4, 6]])

In [161]:
from time import time
from torch_geometric.testing import get_random_edge_index

for i in range(1, 31):
    edge_index = get_random_edge_index(100*i, 100*i, 50*i).to("cuda")
    edge_index = coalesce(edge_index)
    num_nodes = 100*i

    PyG_times, MP_times, Indexing_times = [], [], []
    t = time()
    PyG_line_graph_data = LineGraph(force_directed=True)(Data(edge_index=edge_index, num_nodes=num_nodes))
    PyG_line_graph = edge_index.T[PyG_line_graph_data.edge_index]
    PyG_times.append(time() - t)

    t = time()
    MP_line_graph = DeBruijnTransform()(torch.arange(edge_index.max() + 1).unsqueeze(-1).to("cuda"), edge_index)
    MP_times.append(time() - t)

    t = time()
    Indexing_line_graph = lift_order_edge_index(edge_index)
    Indexing_times.append(time() - t)

    # check if the line graphs are equal
    if not (PyG_line_graph == MP_line_graph).all():
        print(f"Iteration {i}: Message Passing and PyG are equal")
        print((PyG_line_graph != MP_line_graph).nonzero())
        break
    if not (PyG_line_graph == Indexing_line_graph).all():
        print(f"Iteration {i}: Indexing and PyG are equal")
        print(f"Number of nodes: {num_nodes} vs {edge_index.max().item() + 1}")
        print(edge_index)
        print((PyG_line_graph != Indexing_line_graph).nonzero())
        print("######################")
        print(PyG_line_graph)
        print("######################")
        print(Indexing_line_graph)
        print("######################")
        edge_index_obj = EdgeIndex(edge_index, sparse_size=(num_nodes, num_nodes)).sort_by("row")[0]
        print(edge_index_obj @ edge_index_obj)
        break

Iteration 1: Indexing and PyG are equal
Number of nodes: 100 vs 99
tensor([[ 0,  2,  5,  5,  9, 11, 12, 14, 14, 16, 16, 19, 22, 23, 25, 27, 27, 30,
         30, 34, 34, 35, 35, 38, 39, 42, 43, 43, 46, 52, 57, 66, 67, 67, 68, 70,
         72, 73, 75, 77, 79, 80, 83, 86, 87, 88, 90, 93, 94, 98],
        [41, 85, 40, 58, 25, 12, 43, 27, 57,  5,  9, 48, 82, 34, 13, 34, 58, 26,
         44, 50, 60, 62, 83, 28, 27, 46, 45, 51, 11, 59,  3, 15, 39, 73, 81, 33,
         89,  2, 57,  7, 27,  6, 80, 77, 68, 58, 22, 90, 96, 47]],
       device='cuda:0')
tensor([[ 1,  4,  1],
        [ 1,  5,  1],
        [ 1,  6,  1],
        [ 1,  7,  1],
        [ 1,  8,  1],
        [ 1,  9,  1],
        [ 1, 19,  1],
        [ 1, 20,  1]], device='cuda:0')
######################
tensor([[[ 9, 25],
         [11, 12],
         [12, 43],
         [12, 43],
         [14, 27],
         [14, 27],
         [14, 57],
         [16,  5],
         [16,  5],
         [16,  9],
         [23, 34],
         [23, 34],
       

In [170]:
edge_index = torch.tensor([[0, 0, 1, 1, 2, 3, 3, 3, 4, 1, 6, 5],
                           [1, 3, 2, 3, 4, 2, 4, 7, 5, 6, 5, 7]])

edge_index = EdgeIndex(edge_index, sparse_size=(8, 8)).sort_by("row")[0]

In [171]:
index_edge = lift_order_edge_index(edge_index)
index_edge

tensor([[[0, 1],
         [0, 1],
         [0, 1],
         [0, 3],
         [0, 3],
         [0, 3],
         [1, 2],
         [1, 3],
         [1, 3],
         [1, 3],
         [1, 6],
         [2, 4],
         [3, 2],
         [3, 4],
         [4, 5],
         [6, 5]],

        [[1, 2],
         [1, 2],
         [1, 3],
         [3, 6],
         [3, 4],
         [3, 7],
         [2, 4],
         [3, 4],
         [3, 2],
         [3, 7],
         [6, 5],
         [4, 5],
         [2, 4],
         [4, 5],
         [5, 7],
         [5, 7]]])

In [172]:
line_edge = edge_index.T[LineGraph()(Data(edge_index=edge_index, num_nodes=8)).edge_index]
line_edge

tensor([[[0, 1],
         [0, 1],
         [0, 1],
         [0, 3],
         [0, 3],
         [0, 3],
         [1, 2],
         [1, 3],
         [1, 3],
         [1, 3],
         [1, 6],
         [2, 4],
         [3, 2],
         [3, 4],
         [4, 5],
         [6, 5]],

        [[1, 2],
         [1, 3],
         [1, 6],
         [3, 2],
         [3, 4],
         [3, 7],
         [2, 4],
         [3, 2],
         [3, 4],
         [3, 7],
         [6, 5],
         [4, 5],
         [2, 4],
         [4, 5],
         [5, 7],
         [5, 7]]])

In [173]:
index_edge == line_edge

tensor([[[ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True]],

        [[ True,  True],
         [ True, False],
         [ True, False],
         [ True, False],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True, False],
         [ True, False],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True],
         [ True,  True]]])

In [181]:
edge_index

EdgeIndex([[0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 5, 6],
           [1, 3, 2, 3, 6, 4, 2, 4, 7, 5, 7, 5]], sparse_size=(8, 8), nnz=12,
          sort_order=row)

In [179]:
A = to_torch_csc_tensor(edge_index)

In [178]:
A @ A

tensor(indices=tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 4, 6],
                       [2, 3, 4, 6, 7, 2, 4, 5, 7, 5, 4, 5, 7, 7]]),
       values=tensor([2., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1.]),
       size=(8, 8), nnz=14, layout=torch.sparse_coo)

In [180]:
A_2 = A @ A
A_2

tensor(crow_indices=tensor([ 0,  5,  9, 10, 12, 13, 13, 14, 14]),
       col_indices=tensor([2, 3, 6, 4, 7, 4, 2, 7, 5, 5, 4, 5, 7, 7]),
       values=tensor([2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.]),
       size=(8, 8), nnz=14, layout=torch.sparse_csr)

In [158]:
A_2.col_indices()

tensor([2, 3, 6, 4, 7, 4, 2, 7, 5, 5, 4, 5, 7, 7])