In [1]:
import pathpyG as pp
pp.config['torch']['device'] = 'cpu'

In [2]:
import torch
from torch_geometric.data import Data

PahtpyG takes as input various types of data: 
- Paths: DAG class
- DAGs: DAG class 
- Edgelists: DAG e network
- Time stamped interactions (events)


These data can then be represented with different types of models: 
- Network: Graph class
- Higher-Order Network (as layers of a mon)
- Multi-Order network
- Temporal Network

The focus of the package is teh ability to represent, model, use, memory in interactions within statistical, machine learning, and deep learning methods. 

**Passing walks to DAG class**

Walks can be passed in different ways. 
The most intuitive ways is to pass them as tuples (iterables?)  to a DAGData object.
This approach requires a mapping from string ids to node indices (these mappings are handled by IndexMap). 
Such a mapping can be conveniently obtained intitializing a network object.
The network object represent the topological backbone traversed by the walk dynamics.

In [3]:
g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c')])
dags = pp.DAGData(mapping = g.mapping)

dags.append_walk(('a', 'b', 'c', 'b'), weight=1.0)
dags.append_walk(('a', 'c'), weight = 2.0)
print(dags)

DAGData with 2 dags with total weight 3.0


otehrwise, we can independently initilize an EdgeIndex object

In [4]:
dags = pp.DAGData(pp.IndexMap(list("abc")))
dags.append_walk(('a', 'b', 'c', 'b'), weight=1.0)
dags.append_walk(('a', 'c'), weight = 2.0)
print(dags)

DAGData with 2 dags with total weight 3.0


Finally, we can pass walks as edge indices without specifying a mapping. 
Notice that for the node indices to represent a valid walk, all subsequent edges must be adjecent.
In the edge_index format, this means that i-th element of the target indices must be equal to i+1 element of the source indices. 
Intuitively, this represents the fact that the node receives the path as a target and then propagates the path as a source. 

In [5]:
dags = pp.DAGData()
dags.append_dag(torch.tensor([[1,2,3,4],[2,3,4,5]]), weight=1)
dags.append_dag(torch.tensor([[3,4,5,6],[4,5,6,7]]), weight=2)
print(dags)

DAGData with 2 dags with total weight 3


**Passing DAGs to DAG class**

Naturally, we can also pass DAGs to the DAG class. 
Both with and without IndexMap, the operation is now perfomed using the append_dag method. 
In a dag, we are no longer constrained to pass edge indices where the i-th element for the target is equal to the i+1 elemen fof the source.  
This is a consequence of the fact that DAGs have bifurcations while walks, by definition, cannot. 
The edge_index of a DAG represents source target intearctions in the dag. 
[[0,0][1,2]] represents the root node (at t_0) interacting with three other nodes at times t_1, t_2. 


Notice that the current implementation of the DAG class cannot represent DAGs with the same node appearing at different times. 
For example, in [[0,0,1],[1,2,2]] we are saying that the node with index 1 hits the same 2 that was hit by 0 (i.e., 2 as indegree 2). 
This representation, however, does not allow us to say that 2 hits 1 at a later time (leading to two copies of the node, both with indegree one)

In [6]:
dags = pp.DAGData()
dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]))

In [7]:
dags = pp.DAGData(pp.IndexMap(list("abc")))
dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]))
print(dags.dags)

[Data(edge_index=[2, 3], node_sequence=[3, 1], num_nodes=3, edge_weight=[3])]


**Passing Walks and DAGs**
Finally, we can pass both walks and dags at the same time

CURRELTY BUGGY (bug appears when training a multi order network)

In [8]:
# Example with mix of walks or dags
dags = pp.DAGData(mapping = g.mapping)

dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]), weight=2)
# dags.append_walk(('a', 'b', 'c'))
print(dags)

m = pp.MultiOrderModel.from_DAGs(dags, max_order=2)

DAGData with 1 dags with total weight 2


### Temporal network

In [9]:
torch.repeat_interleave(torch.tensor([2,3,4]))

tensor([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [10]:
import torch_geometric
# import torch_geometric.utils
we = torch_geometric.utils.cumsum(torch.tensor([1,1,1,1,1,1]), dim = 0)[:-1]
torch.repeat_interleave(we)

tensor([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5])

### Multi-Order model
 

In [29]:
dag_data = pp.DAGData(pp.IndexMap(list("01234")))

# dag_data.append_dag(torch.tensor([[0,2],[2,3]]), weight=20)
# dag_data.append_dag(torch.tensor([[1,2],[2,4]]), weight=20)
# print(dag_data)

dag_data.append_walk(list("023"), weight=20)
dag_data.append_walk(list("124"), weight=20)


m = pp.MultiOrderModel.from_DAGs(dag_data, max_order=2)

In [30]:
from torch_geometric.loader import DataLoader
dag_graph = next(
    iter(
        DataLoader(
            dag_data.dags, 
            batch_size=len(dag_data.dags)))).to(pp.config["torch"]["device"]
                                                )
assert dag_graph.edge_weight is not None

In [31]:
hon_1 = m.layers[1]
hon_2 = m.layers[2]
print(hon_1.data.edge_weight)
print(hon_2.data.edge_weight)

tensor([1., 1., 1., 1.])
tensor([1., 1.])


In [32]:
dag_graph

DataBatch(edge_index=[2, 4], node_sequence=[6, 1], num_nodes=6, edge_weight=[4], batch=[6], ptr=[3])

In [33]:
# weigth exist here but not passed to the hon...
dag_graph.edge_weight

tensor([20, 20, 20, 20])

**starting from temporal network**

In [16]:
# stuff for degrees of freedom etc.
num_len_2_paths = hon_2.data.num_nodes
num_nonzero_outdegrees = len(hon_2.data.edge_index[0].unique())

In [17]:
from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index

In [18]:
tedges = [('0', '2', 1),('2', '3', 2), ('0', '2', 3), ('2', '3', 3), ('1', '2', 14), ('2', '4', 14), ('1', '2', 14),
              ('2', '4', 15)]#, ('1', '2', 5), ('2', '4', 6)]
t = pp.TemporalGraph.from_edge_list(tedges*10)

In [19]:
m = pp.MultiOrderModel.from_temporal_graph(t, max_order=2)

In [20]:
hon_1 = m.layers[1]
hon_2 = m.layers[2]
print(hon_1.data.edge_weight)
print(hon_2.data.edge_weight)

tensor([20., 20., 20., 20.])
tensor([100., 200.])


In [21]:
hon_1.data.edge_weight

tensor([20., 20., 20., 20.])

In [22]:
source_ids = hon_1.data.edge_index[0]
hon_1.data.edge_weight[source_ids]

tensor([20., 20., 20., 20.])

In [23]:
hon_1.data.edge_index[0]

tensor([0, 1, 1, 3])

In [24]:
weighted_outdegree = torch.zeros(hon_1.data.num_nodes)
weighted_outdegree = weighted_outdegree.index_add_(
    dim = 0, 
    index = hon_1.data.edge_index[0], 
    source = hon_1.data.edge_weight[source_ids]
    )
weighted_outdegree

tensor([20., 40.,  0., 20.,  0.])

In [25]:
transition_probabilities = hon_1.data.edge_weight[source_ids]/ weighted_outdegree[source_ids]
transition_probabilities

tensor([1.0000, 0.5000, 0.5000, 1.0000])

In [26]:
# this gives likelihood of all paths of lenght 2
pp.MultiOrderModel.aggregate_edge_weight(
    hon_2.data.edge_index,
    transition_probabilities,
    aggr="mul"
    )

# the we need the number of times each path has occurred

tensor([0.5000, 0.5000])

In [27]:
sort_edge_index(t.data.edge_index, t.data.t)

(tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3, 3, 3, 3, 3],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
          4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1]]),
 tensor([ 1.,  1.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  2.,  3.,  3.,  3.,  3.,  3.,  3.,  2.,
          3.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  3.,  3.,  2.,  3., 14., 14.,
         14., 14., 14., 15., 15., 15., 15., 15., 15., 15., 15., 15., 15., 14.,
         14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14.,
         14., 14., 14., 14., 14., 14., 14

In [28]:
node_sequence = torch.arange(t.data.num_nodes, device=edge_index.device).unsqueeze(1)
node_sequence

NameError: name 'edge_index' is not defined

In [None]:
# edge_weight = torch.ones(edge_index.size(1), device=edge_index.device)
edge_weight = g.data.edge_weight

In [None]:
l1 = pp.MultiOrderModel.aggregate_edge_index(
                edge_index=edge_index, node_sequence=node_sequence, edge_weight=edge_weight
)

In [None]:

l1.data.edge_weight

tensor([1., 1., 1., 1.])

I cannot do the model selection on the temporal graph without the path extraction. 