In [1]:
import pathpyG as pp
pp.config['torch']['device'] = 'cpu'

In [2]:
import torch
from torch_geometric.data import Data
from torch_geometric.utils import scatter

PahtpyG takes as input various types of data: 
- Paths: DAG class
- DAGs: DAG class 
- Edgelists: DAG e network
- Time stamped interactions (events)


These data can then be represented with different types of models: 
- Network: Graph class
- Higher-Order Network (as layers of a mon)
- Multi-Order network
- Temporal Network

The focus of the package is teh ability to represent, model, use, memory in interactions within statistical, machine learning, and deep learning methods. 

**Passing walks to DAG class**

Walks can be passed in different ways. 
The most intuitive ways is to pass them as tuples (iterables?)  to a DAGData object.
This approach requires a mapping from string ids to node indices (these mappings are handled by IndexMap). 
Such a mapping can be conveniently obtained intitializing a network object.
The network object represent the topological backbone traversed by the walk dynamics.

In [3]:
g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c')])
dags = pp.DAGData(mapping = g.mapping)

dags.append_walk(('a', 'b', 'c', 'b'), weight=1.0)
dags.append_walk(('a', 'c'), weight = 2.0)
print(dags)

DAGData with 2 dags with total weight 3.0


otehrwise, we can independently initilize an EdgeIndex object

In [4]:
dags = pp.DAGData(pp.IndexMap(list("abc")))
dags.append_walk(('a', 'b', 'c', 'b'), weight=1.0)
dags.append_walk(('a', 'c'), weight = 2.0)
print(dags)

DAGData with 2 dags with total weight 3.0


Finally, we can pass walks as edge indices without specifying a mapping. 
Notice that for the node indices to represent a valid walk, all subsequent edges must be adjecent.
In the edge_index format, this means that i-th element of the target indices must be equal to i+1 element of the source indices. 
Intuitively, this represents the fact that the node receives the path as a target and then propagates the path as a source. 

In [5]:
dags = pp.DAGData()
dags.append_dag(torch.tensor([[1,2,3,4],[2,3,4,5]]), weight=1)
dags.append_dag(torch.tensor([[3,4,5,6],[4,5,6,7]]), weight=2)
print(dags)

DAGData with 2 dags with total weight 3


**Passing DAGs to DAG class**

Naturally, we can also pass DAGs to the DAG class. 
Both with and without IndexMap, the operation is now perfomed using the append_dag method. 
In a dag, we are no longer constrained to pass edge indices where the i-th element for the target is equal to the i+1 elemen fof the source.  
This is a consequence of the fact that DAGs have bifurcations while walks, by definition, cannot. 
The edge_index of a DAG represents source target intearctions in the dag. 
[[0,0][1,2]] represents the root node (at t_0) interacting with three other nodes at times t_1, t_2. 


Notice that the current implementation of the DAG class cannot represent DAGs with the same node appearing at different times. 
For example, in [[0,0,1],[1,2,2]] we are saying that the node with index 1 hits the same 2 that was hit by 0 (i.e., 2 as indegree 2). 
This representation, however, does not allow us to say that 2 hits 1 at a later time (leading to two copies of the node, both with indegree one)

In [6]:
dags = pp.DAGData()
dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]))

In [7]:
dags = pp.DAGData(pp.IndexMap(list("abc")))
dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]))
print(dags.dags)

[Data(edge_index=[2, 3], node_sequence=[3, 1], num_nodes=3, edge_weight=[3])]


**Passing Walks and DAGs**
Finally, we can pass both walks and dags at the same time

CURRELTY BUGGY (bug appears when training a multi order network)

In [8]:
# Example with mix of walks or dags
dags = pp.DAGData(mapping = g.mapping)

dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]), weight=2)
dags.append_walk(('a', 'b', 'c'))
print(dags)

m = pp.MultiOrderModel.from_DAGs(dags, max_order=2)

DAGData with 2 dags with total weight 3.0


### Temporal network

In [9]:
torch.repeat_interleave(torch.tensor([2,3,4]))

tensor([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [10]:
import torch_geometric
# import torch_geometric.utils
we = torch_geometric.utils.cumsum(torch.tensor([1,1,1,1,1,1]), dim = 0)[:-1]
torch.repeat_interleave(we)

tensor([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5])

### Multi-Order model
 

In [11]:
def compute_weighted_outdegrees(graph):
    # source_ids = graph.data.edge_index[0]
    # weighted_outdegree = torch.zeros(hgraphon_1.data.num_nodes, dtype=graph.data.edge_weight.dtype)
    # weighted_outdegree = weighted_outdegree.index_add_(
    #     dim = 0, 
    #     index = graph.data.edge_index[0], 
    #     source = graph.data.edge_weight,#[source_ids]
    #     )
    weighted_outdegree = scatter(graph.data.edge_weight, graph.data.edge_index[0], dim=0, dim_size=graph.data.num_nodes, reduce='sum')
    return weighted_outdegree

def compute_transition_probabilities(graph):
    weighted_outdegree = compute_weighted_outdegrees(graph)
    source_ids = graph.data.edge_index[0]
    return graph.data.edge_weight/ weighted_outdegree[source_ids]


In [203]:
dag_data = pp.DAGData(pp.IndexMap(list("01234")))

# dag_data.append_dag(torch.tensor([[0,2],[2,3]]), weight=20)
# dag_data.append_dag(torch.tensor([[1,2],[2,4]]), weight=20)
# print(dag_data)

dag_data.append_walk(list("0230230"), weight=30)
dag_data.append_walk(list("1241241"), weight=70)
dag_data.append_walk(list("0230241"), weight=1)

m = pp.MultiOrderModel.from_DAGs(dag_data, max_order=7)

hon_1 = m.layers[1]
hon_2 = m.layers[2]
hon_3 = m.layers[3]
hon_4 = m.layers[4]
hon_5 = m.layers[5]
hon_6 = m.layers[5]
hon_7 = m.layers[7]
print(hon_1.data.edge_weight)
print(hon_2.data.edge_weight)

t_1 = compute_transition_probabilities(hon_1)
t_2 = compute_transition_probabilities(hon_2)

print(t_1)
print(t_2)

tensor([ 62, 140,  61, 141,  61, 141])
tensor([ 61,   1, 140,  61, 141,  31,  70])
tensor([1.0000, 1.0000, 0.3020, 0.6980, 1.0000, 1.0000])
tensor([0.9839, 0.0161, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000])


In [184]:
hon_1.data.edge_weight

tensor([ 62, 140,  61, 141,  61, 141])

In [182]:
hon_6.data.edge_weight

tensor([30,  1, 30, 30,  1, 30])

In [13]:
dag_data.dags[0]

Data(edge_index=[2, 6], node_sequence=[7, 1], num_nodes=7, edge_weight=[6])

In [14]:
dag_data.dags[0].edge_index

tensor([[0, 1, 2, 3, 4, 5],
        [1, 2, 3, 4, 5, 6]])

In [15]:
dag_data.dags[0].edge_weight

tensor([30, 30, 30, 30, 30, 30])

In [16]:
hon_2.data.node_sequence

tensor([[0, 2],
        [1, 2],
        [2, 3],
        [2, 4],
        [3, 0],
        [4, 1]])

In [17]:
hon_2.data.edge_weight

tensor([61,  1, 60, 61, 61, 31, 30])

In [18]:
def generate_general_bipartite_edge_index(g_source, g_target) -> torch.Tensor:
    """Generate edge_index for bipartite graph connecting nodes of higher-order graphs with arbitrary (but different) orders."""
    order_source = g_source.data.node_sequence[0].shape[0]
    order_target = g_target.data.node_sequence[0].shape[0]
    assert order_source!= order_target, "Source and target must have different orders to generate bipartite indices"
    node_sequence_source = g_source.data.node_sequence
    node_sequence_target = g_target.data.node_sequence
    d_order = max(order_source, order_target) - min(order_source, order_target)
    if order_source>order_target:
        mask = torch.all(node_sequence_target[:,None] == node_sequence_source[:, d_order:], dim=-1).T
        bip_tensor = torch.nonzero(mask, as_tuple=False).T
    else:
        mask = torch.all(node_sequence_source[:, None] == node_sequence_target[:, :-d_order], dim=-1)
        bip_tensor = torch.nonzero(mask, as_tuple=False).T
    return bip_tensor
    

source > target

In [19]:
g_source = hon_3
g_target = hon_4
generate_general_bipartite_edge_index(g_source, g_target)

tensor([[0, 1, 2, 3, 4, 5, 5, 6],
        [0, 1, 2, 3, 4, 5, 6, 7]])

In [20]:
import numpy as np
class Lh_conv(torch_geometric.nn.MessagePassing):
    def __init__(self):
        super().__init__(aggr="sum", flow="source_to_target", node_dim = -1)
    def forward(self, x_source, source_to_target_edge_index, transition_probability):
        
        N = len(source_to_target_edge_index[0].unique())
        M = len(source_to_target_edge_index[1].unique())

        return self.propagate(
                    source_to_target_edge_index,
                    size = (N,M),
                    x = (x_source, None),
                    transition_probability = transition_probability
                    )

    def message(self, x_j, transition_probability):
        return x_j + np.log(transition_probability)

In [21]:
lh_conv = Lh_conv()
x_source = torch.tensor([np.log(1)]) # lh of path until now # here 1 cause considering the example of a path set{(0,0),(0,1)}, hence only have a deterministic 0th transition *->0
# x_target = None # these are gonna be genetaed
source_to_target_edge_index = torch.tensor([[0,0],[0,1]])
transition_probability = torch.tensor([.3,.7])
vec_log_lh = lh_conv(x_source, source_to_target_edge_index, transition_probability)
print(vec_log_lh.sum())

tensor(-1.5606, dtype=torch.float64)


In [22]:
torch.tensor([np.log(1)])

tensor([0.], dtype=torch.float64)

Gettin zeroth order

In [23]:
dag_data.dags

[Data(edge_index=[2, 6], node_sequence=[7, 1], num_nodes=7, edge_weight=[6]),
 Data(edge_index=[2, 6], node_sequence=[7, 1], num_nodes=7, edge_weight=[6]),
 Data(edge_index=[2, 6], node_sequence=[7, 1], num_nodes=7, edge_weight=[6])]

In [24]:
from torch_geometric.loader import DataLoader
dag_graph = next(iter(DataLoader(dag_data.dags, batch_size=len(dag_data.dags)))).to(pp.config["torch"]["device"])
edge_index = dag_graph.edge_index
node_sequence = dag_graph.node_sequence

In [166]:
hon_4.mapping.id_to_idx

{('0', '2', '3', '0'): 0,
 ('0', '2', '4', '1'): 1,
 ('1', '2', '4', '1'): 2,
 ('2', '3', '0', '2'): 3,
 ('2', '4', '1', '2'): 4,
 ('3', '0', '2', '3'): 5,
 ('3', '0', '2', '4'): 6,
 ('4', '1', '2', '4'): 7}

In [29]:
unique_nodes, counts = torch.unique(dag_graph.node_sequence, return_counts=True)

node_emission_probabilities = counts/counts.sum()
node_emission_probabilities

tensor([0.2381, 0.1905, 0.2857, 0.1429, 0.1429])

In [31]:
# teh source to target index should come from the observaations... 

n_nodes = len(unique_nodes)
lh_conv = Lh_conv()
x_source = torch.zeros(1)# ((n_nodes,1)) 
# x_target = None # these are gonna be genetaed
print("0-th")
print(node_emission_probabilities)
source_to_target_edge_index_zeroth = torch.stack([torch.zeros(n_nodes,dtype = int),unique_nodes])
vec_log_lh = lh_conv(x_source, source_to_target_edge_index_zeroth, node_emission_probabilities)
print(vec_log_lh)

print("1-th")
T = compute_transition_probabilities(hon_1)
print(T)
source_to_target_edge_index = generate_general_bipartite_edge_index(hon_1,hon_2)
vec_log_lh = lh_conv(vec_log_lh, source_to_target_edge_index, T)
print(vec_log_lh)

print("2-th")
T = compute_transition_probabilities(hon_2)
print(T)
source_to_target_edge_index = generate_general_bipartite_edge_index(hon_2,hon_3)
vec_log_lh = lh_conv(vec_log_lh, source_to_target_edge_index,T)
print(vec_log_lh)


print("3-th (statio)")
T = compute_transition_probabilities(hon_3)
print(T)
for i in range(5):
    source_to_target_edge_index = hon_3.data.edge_index
    vec_log_lh = lh_conv(vec_log_lh, source_to_target_edge_index,T)
    print(vec_log_lh)

0-th
tensor([0.2381, 0.1905, 0.2857, 0.1429, 0.1429])
tensor([-1.4351, -1.6582, -1.2528, -1.9459, -1.9459])
1-th
tensor([1.0000, 1.0000, 0.5000, 0.5000, 1.0000, 1.0000])
tensor([-1.4351, -1.6582, -1.9459, -1.9459, -1.9459, -1.9459])
2-th
tensor([0.9839, 0.0161, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000])
tensor([-1.4513, -5.5622, -1.6582, -1.9459, -1.9459, -1.9459, -1.9459])
3-th (statio)
tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9677, 0.0323, 1.0000])
tensor([-1.9787, -5.3799, -1.9459, -1.4513, -7.2204, -1.9459, -1.9459])
tensor([-1.9787, -5.3799, -1.9459, -1.9787, -7.3258, -1.4513, -7.2204])
tensor([-1.4841, -4.8853, -7.2204, -1.9787, -7.3258, -1.9787, -7.3258])
tensor([ -2.0115,  -5.4127,  -7.3258,  -1.4841, -12.1058,  -1.9787,  -7.3258])
tensor([ -2.0115,  -5.4127,  -7.3258,  -2.0115, -12.7385,  -1.4841, -12.1058])


In [210]:
list_node_seq_paths[0].numel()

7

In [214]:
list_node_seq_paths  =[dag_data.dags[i].node_sequence.T[0] for i in range(len(dag_data.dags))]
padded_paths = [torch.nn.functional.pad(x, pad=(0, 10 - x.numel()), mode='constant', value=-1) for x in list_node_seq_paths]
padded_paths = torch.stack(padded_paths)
padded_paths

tensor([[ 0,  2,  3,  0,  2,  3,  0, -1, -1, -1],
        [ 1,  2,  4,  1,  2,  4,  1, -1, -1, -1],
        [ 0,  2,  3,  0,  2,  4,  1, -1, -1, -1]])

In [215]:
max_order = 2
for i in range(0,padded_paths.shape[1]):
    source = padded_paths[:,max(0,i-max_order):i]
    target = padded_paths[:,max(0,i-max_order+1):i+1]
    print(source)
    print(target)
    print("-------------------------------------------")

tensor([], size=(3, 0), dtype=torch.int64)
tensor([[0],
        [1],
        [0]])
-------------------------------------------
tensor([[0],
        [1],
        [0]])
tensor([[0, 2],
        [1, 2],
        [0, 2]])
-------------------------------------------
tensor([[0, 2],
        [1, 2],
        [0, 2]])
tensor([[2, 3],
        [2, 4],
        [2, 3]])
-------------------------------------------
tensor([[2, 3],
        [2, 4],
        [2, 3]])
tensor([[3, 0],
        [4, 1],
        [3, 0]])
-------------------------------------------
tensor([[3, 0],
        [4, 1],
        [3, 0]])
tensor([[0, 2],
        [1, 2],
        [0, 2]])
-------------------------------------------
tensor([[0, 2],
        [1, 2],
        [0, 2]])
tensor([[2, 3],
        [2, 4],
        [2, 4]])
-------------------------------------------
tensor([[2, 3],
        [2, 4],
        [2, 4]])
tensor([[3, 0],
        [4, 1],
        [4, 1]])
-------------------------------------------
tensor([[3, 0],
        [4, 1]

In [35]:
# from torch_geometric.loader import DataLoader
# dag_graph = next(
#     iter(
#         DataLoader(
#             dag_data.dags, 
#             batch_size=len(dag_data.dags)))).to(pp.config["torch"]["device"]
#                                                 )
# assert dag_graph.edge_weight is not None
# print(dag_graph.edge_weight)

**starting from temporal network**

In [36]:
# stuff for degrees of freedom etc.
num_len_2_paths = hon_2.data.num_nodes
num_nonzero_outdegrees = len(hon_2.data.edge_index[0].unique())

In [37]:
from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index

In [38]:
tedges = [('0', '2', 1),('2', '3', 2), ('0', '2', 3), ('2', '3', 3), ('1', '2', 14), ('2', '4', 14), ('1', '2', 14),
              ('2', '4', 15)]#, ('1', '2', 5), ('2', '4', 6)]
t = pp.TemporalGraph.from_edge_list(tedges*10)

In [39]:
m = pp.MultiOrderModel.from_temporal_graph(t, max_order=2)

In [40]:
hon_1 = m.layers[1]
hon_2 = m.layers[2]
print(hon_1.data.edge_weight)
print(hon_2.data.edge_weight)

tensor([20., 20., 20., 20.])
tensor([100., 200.])


In [41]:
hon_1.data.edge_weight

tensor([20., 20., 20., 20.])

In [42]:
source_ids = hon_1.data.edge_index[0]
hon_1.data.edge_weight[source_ids]

tensor([20., 20., 20., 20.])

In [43]:
hon_1.data.edge_index[0]

tensor([0, 1, 1, 3])

In [44]:
weighted_outdegree = torch.zeros(hon_1.data.num_nodes)
weighted_outdegree = weighted_outdegree.index_add_(
    dim = 0, 
    index = hon_1.data.edge_index[0], 
    source = hon_1.data.edge_weight[source_ids]
    )
weighted_outdegree

tensor([20., 40.,  0., 20.,  0.])

In [45]:
transition_probabilities = hon_1.data.edge_weight[source_ids]/ weighted_outdegree[source_ids]
transition_probabilities

tensor([1.0000, 0.5000, 0.5000, 1.0000])

In [46]:
# this gives likelihood of all paths of lenght 2
pp.MultiOrderModel.aggregate_edge_weight(
    hon_2.data.edge_index,
    transition_probabilities,
    aggr="mul"
    )

# the we need the number of times each path has occurred

tensor([0.5000, 0.5000])

In [47]:
sort_edge_index(t.data.edge_index, t.data.t)

(tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3, 3, 3, 3, 3],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
          4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1]]),
 tensor([ 1.,  1.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  2.,  3.,  3.,  3.,  3.,  3.,  3.,  2.,
          3.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  3.,  3.,  2.,  3., 14., 14.,
         14., 14., 14., 15., 15., 15., 15., 15., 15., 15., 15., 15., 15., 14.,
         14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14.,
         14., 14., 14., 14., 14., 14., 14

In [48]:
node_sequence = torch.arange(t.data.num_nodes, device=edge_index.device).unsqueeze(1)
node_sequence

tensor([[0],
        [1],
        [2],
        [3],
        [4]])

In [49]:
# edge_weight = torch.ones(edge_index.size(1), device=edge_index.device)
edge_weight = g.data.edge_weight

In [50]:
l1 = pp.MultiOrderModel.aggregate_edge_index(
                edge_index=edge_index, node_sequence=node_sequence, edge_weight=edge_weight
)

IndexError: index 5 is out of bounds for dimension 0 with size 5

In [None]:

l1.data.edge_weight

tensor([1., 1., 1., 1.])

I cannot do the model selection on the temporal graph without the path extraction. 