Skip to content

Commit

Permalink
Ehancement/1238 no zip np array (#1253)
Browse files Browse the repository at this point in the history
This PR changes adds the following methods:

- `StellarGraph.edges_arrays`
- `StellarGraph.neighbors_arrays`
- `StellarGraph.in_nodes_arrays`
- `StellarGraph.out_nodes_arrays`

Which return a tuple of np.arrays instead of a list of tuples as the old correpsonding methods without the `_array` suffix did. The old methods now simply wrap the new `_array` methods to preserve the API.

See #1238

* _transform edges no returns a tuple of numpy arrays

* edges now returns a tuple of numpy arrays

* fixes

* fix tests

* fix tests

* fix tests

* formatting

* update doc strings

* fix tests

* add compatability for passing edgelists from edge splitter

* add in `edge_arrays` function

* add to_edges function in EdgeList

* added .*_arrays methods for neighbor methods

* docstring update

* docstring update

* docstring update

* docstring update

* backwards compatability

* test new conversion methods

* Update tests/data/test_edge_splitter.py

Co-Authored-By: kevin <33508488+kjun9@users.noreply.github.com>

* Update tests/datasets/test_datasets.py

Co-Authored-By: kevin <33508488+kjun9@users.noreply.github.com>

* fix

* revert fullbatch tests back

* revert tests back

* revert demo

* removed edgelist

* Update stellargraph/core/graph.py

Co-authored-by: kevin <33508488+kjun9@users.noreply.github.com>

* doc string update

* removed redundant code

* merge fixes

* reverted files back

* reverted files back

* reverted files back

* docstring fixes

* fix typo

* samples use neighbor arrays

* benchmark uses neighbor arrays

Co-authored-by: kevin <33508488+kjun9@users.noreply.github.com>
  • Loading branch information
kieranricardo and kjun9 committed May 15, 2020
1 parent 9b81deb commit ef81955
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 66 deletions.
180 changes: 138 additions & 42 deletions stellargraph/core/graph.py
Expand Up @@ -556,6 +556,12 @@ def nodes(self, node_type=None, use_ilocs=False) -> Iterable[Any]:
return ilocs
return self._nodes.ids.from_iloc(ilocs)

def _to_edges(self, edge_arrs):
edges = list(zip(*(arr for arr in edge_arrs[:3] if arr is not None)))
if edge_arrs[3] is not None:
return edges, edge_arrs[3]
return edges

def edges(
self, include_edge_type=False, include_edge_weight=False, use_ilocs=False
) -> Iterable[Any]:
Expand All @@ -572,31 +578,40 @@ def edges(
use_ilocs (bool): if True return :ref:`ilocs for nodes (and edge types) <iloc-explanation>`
Returns:
The graph edges. If edge weights are included then a tuple of (edges, weights)
The graph edges. If edge weights are included then a tuple of (edges, weights).
"""
# FIXME: these would be better returned as the 2 or 3 arrays directly, rather than tuple-ing
# (the same applies to all other instances of zip in this file)
sources = self._edges.sources
targets = self._edges.targets

if not use_ilocs:
sources = self._nodes.ids.from_iloc(sources)
targets = self._nodes.ids.from_iloc(targets)
edge_arrs = self.edge_arrays(
include_edge_type, include_edge_weight, use_ilocs=use_ilocs
)
return self._to_edges(edge_arrs)

if include_edge_type:
if use_ilocs:
types = self._edges.type_ilocs
else:
types = self._edges.type_of_iloc(slice(None))
def edge_arrays(
self, include_edge_type=False, include_edge_weight=False, use_ilocs=False
) -> tuple:
"""
Obtains the collection of edges in the graph as a tuple of arrays (sources, targets, types, weights).
``types`` and ``weights`` will be `None` if the optional parameters are not specified.
edges = list(zip(sources, targets, types))
else:
edges = list(zip(sources, targets))
Args:
include_edge_type (bool): A flag that indicates whether to return edge types.
include_edge_weight (bool): A flag that indicates whether to return edge weights.
use_ilocs (bool): if True return :ref:`ilocs for nodes (and edge types) <iloc-explanation>`
if include_edge_weight:
return edges, self._edges.weights
Returns:
A tuple containing 1D arrays of the source and target nodes (sources, targets, types, weights).
Setting include_edge_type and/or include_edge_weight to True will include arrays of edge types
and/or edge weights in this tuple, otherwise they will be set to ``None``.
"""
types = types = self._edges.type_ilocs if include_edge_type else None
weights = self._edges.weights if include_edge_weight else None
sources = self._edges.sources
targets = self._edges.targets

return edges
if not use_ilocs:
sources = self.node_ilocs_to_ids(sources)
targets = self.node_ilocs_to_ids(targets)
types = self._edges.type_of_iloc(slice(None)) if include_edge_type else None
return sources, targets, types, weights

def has_node(self, node: Any) -> bool:
"""
Expand Down Expand Up @@ -632,34 +647,38 @@ def _transform_edges(
if weights is not None:
weights = weights[correct_type]

# FIXME(#718): it would be better to return these as ndarrays, instead of (zipped) lists
if weights is not None:
return other_node, weights

return other_node

def _to_neighbors(self, neigh_arrs, include_edge_weight):
if include_edge_weight:
return [
NeighbourWithWeight(node, weight)
for node, weight in zip(other_node, weights)
NeighbourWithWeight(neigh, weight) for neigh, weight in zip(*neigh_arrs)
]
return list(neigh_arrs)

return list(other_node)

def neighbors(
def neighbor_arrays(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
) -> Iterable[Any]:
):
"""
Obtains the collection of neighbouring nodes connected
to the given node.
Obtains the collection of neighbouring nodes connected to the given node
as an array of node_ids. If `include_edge_weight` edge is `True` then
an array of edges weights is also returned in a tuple of `(neighbor_ids, edge_weights)`.
Args:
node (any): The node in question.
include_edge_weight (bool, default False): If True, each neighbour in the
output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
include_edge_weight (bool, default False): If True an array of edge weights is also returned.
edge_types (list of hashable, optional): If provided, only traverse the graph
via the provided edge types when collecting neighbours.
use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
(and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
neighbour is returned.
Returns:
iterable: The neighbouring nodes.
A numpy array of the neighboring nodes. If `include_edge_weight` is `True` then an array
of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
"""
if not use_ilocs:
node = self._nodes.ids.to_iloc([node])[0]
Expand All @@ -673,30 +692,55 @@ def neighbors(
other_node, edge_ilocs, include_edge_weight, edge_types, use_ilocs
)

def in_nodes(
def neighbors(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
) -> Iterable[Any]:
) -> Iterable[any]:
"""
Obtains the collection of neighbouring nodes connected
to the given node.
Args:
node (any): The node in question.
include_edge_weight (bool, default False): If True, each neighbour in the
output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
edge_types (list of hashable, optional): If provided, only traverse the graph
via the provided edge types when collecting neighbours.
use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
(and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
neighbour is returned.
Returns:
iterable: The neighboring nodes.
"""
neigh_arrs = self.neighbor_arrays(
node, include_edge_weight, edge_types, use_ilocs=use_ilocs
)
return self._to_neighbors(neigh_arrs, include_edge_weight)

def in_node_arrays(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
):
"""
Obtains the collection of neighbouring nodes with edges
directed to the given node. For an undirected graph,
neighbours are treated as both in-nodes and out-nodes.
Args:
node (any): The node in question.
include_edge_weight (bool, default False): If True, each neighbour in the
output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
include_edge_weight (bool, default False): If True an array of edge weights is also returned.
edge_types (list of hashable, optional): If provided, only traverse the graph
via the provided edge types when collecting neighbours.
use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
(and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
neighbour is returned.
Returns:
iterable: The neighbouring in-nodes.
A numpy array of the neighboring in-nodes. If `include_edge_weight` is `True` then an array
of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
"""
if not self.is_directed():
# all edges are both incoming and outgoing for undirected graphs
return self.neighbors(
return self.neighbor_arrays(
node,
include_edge_weight=include_edge_weight,
edge_types=edge_types,
Expand All @@ -712,12 +756,12 @@ def in_nodes(
source, edge_ilocs, include_edge_weight, edge_types, use_ilocs
)

def out_nodes(
def in_nodes(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
) -> Iterable[Any]:
"""
Obtains the collection of neighbouring nodes with edges
directed from the given node. For an undirected graph,
directed to the given node. For an undirected graph,
neighbours are treated as both in-nodes and out-nodes.
Args:
Expand All @@ -731,11 +775,37 @@ def out_nodes(
neighbour is returned.
Returns:
iterable: The neighbouring out-nodes.
iterable: The neighbouring in-nodes.
"""
neigh_arrs = self.in_node_arrays(
node, include_edge_weight, edge_types, use_ilocs=use_ilocs
)
return self._to_neighbors(neigh_arrs, include_edge_weight)

def out_node_arrays(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
):
"""
Obtains the collection of neighbouring nodes with edges
directed from the given node. For an undirected graph,
neighbours are treated as both in-nodes and out-nodes.
Args:
node (any): The node in question.
include_edge_weight (bool, default False): If True an array of edge weights is also returned.
edge_types (list of hashable, optional): If provided, only traverse the graph
via the provided edge types when collecting neighbours.
use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
(and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
neighbour is returned.
Returns:
A numpy array of the neighboring out-nodes. If `include_edge_weight` is `True` then an array
of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
"""
if not self.is_directed():
# all edges are both incoming and outgoing for undirected graphs
return self.neighbors(
return self.neighbor_arrays(
node,
include_edge_weight=include_edge_weight,
edge_types=edge_types,
Expand All @@ -752,6 +822,32 @@ def out_nodes(
target, edge_ilocs, include_edge_weight, edge_types, use_ilocs
)

def out_nodes(
self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
) -> Iterable[Any]:
"""
Obtains the collection of neighbouring nodes with edges
directed from the given node. For an undirected graph,
neighbours are treated as both in-nodes and out-nodes.
Args:
node (any): The node in question.
include_edge_weight (bool, default False): If True, each neighbour in the
output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
edge_types (list of hashable, optional): If provided, only traverse the graph
via the provided edge types when collecting neighbours.
use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
(and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
neighbour is returned.
Returns:
iterable: The neighbouring out-nodes.
"""
neigh_arrs = self.out_node_arrays(
node, include_edge_weight, edge_types, use_ilocs=use_ilocs
)
return self._to_neighbors(neigh_arrs, include_edge_weight)

def nodes_of_type(self, node_type=None):
"""
Get the nodes of the graph with the specified node types.
Expand Down
48 changes: 25 additions & 23 deletions stellargraph/data/explorer.py
Expand Up @@ -160,7 +160,7 @@ def _get_random_state(self, seed):
return rs

def neighbors(self, node):
return self.graph.neighbors(node, use_ilocs=True)
return self.graph.neighbor_arrays(node, use_ilocs=True)

def run(self, *args, **kwargs):
"""
Expand Down Expand Up @@ -280,8 +280,8 @@ def _walk(self, rs, start_node, length):
walk = [start_node]
current_node = start_node
for _ in range(length - 1):
neighbours = self.graph.neighbors(current_node, use_ilocs=True)
if not neighbours:
neighbours = self.graph.neighbor_arrays(current_node, use_ilocs=True)
if len(neighbours) == 0:
# dead end, so stop
break
else:
Expand Down Expand Up @@ -572,7 +572,9 @@ def run(self, nodes, *, n=None, length=None, metapaths=None, seed=None):
for d in range(length):
walk.append(current_node)
# d+1 can also be used to index metapath to retrieve the node type for the next step in the walk
neighbours = self.graph.neighbors(current_node, use_ilocs=True)
neighbours = self.graph.neighbor_arrays(
current_node, use_ilocs=True
)
# filter these by node type
neighbour_types = self.graph.node_type(
neighbours, use_ilocs=True
Expand Down Expand Up @@ -677,7 +679,9 @@ def run(self, nodes, n_size, n=1, seed=None):
continue

neighbours = (
self.neighbors(cur_node) if cur_node is not None else []
self.graph.neighbor_arrays(cur_node, use_ilocs=True)
if cur_node != -1
else []
)
if len(neighbours) == 0:
# Either node is unconnected or is in directed graph with no out-nodes.
Expand Down Expand Up @@ -890,14 +894,14 @@ def _sample_neighbours(self, rs, node, idx, size):
The fixed-length list of neighbouring nodes (or None values
if the neighbourhood is empty).
"""
if node is None:
if node == -1:
# Non-node, e.g. previously sampled from empty neighbourhood
return [-1] * size
neighbours = list(
self.graph.in_nodes(node, use_ilocs=True)
if idx == 0
else self.graph.out_nodes(node, use_ilocs=True)
)

if idx == 0:
neighbours = self.graph.in_node_arrays(node, use_ilocs=True)
else:
neighbours = self.graph.out_node_arrays(node, use_ilocs=True)
if len(neighbours) == 0:
# Sampling from empty neighbourhood
return [-1] * size
Expand Down Expand Up @@ -1054,7 +1058,7 @@ def run(
walks = []
num_cw_curr = 0

edges, times = self.graph.edges(include_edge_weight=True)
sources, targets, _, times = self.graph.edge_arrays(include_edge_weight=True)
edge_biases = self._temporal_biases(
times, None, bias_type=initial_edge_bias, is_forward=False,
)
Expand All @@ -1071,8 +1075,9 @@ def not_progressing_enough():

# loop runs until we have enough context windows in total
while num_cw_curr < num_cw:
first_edge_index = self._sample(len(edges), edge_biases, np_rs)
src, dst = edges[first_edge_index]
first_edge_index = self._sample(len(times), edge_biases, np_rs)
src = sources[first_edge_index]
dst = targets[first_edge_index]
t = times[first_edge_index]

remaining_length = num_cw - num_cw_curr + cw_size - 1
Expand Down Expand Up @@ -1125,18 +1130,15 @@ def _step(self, node, time, bias_type, np_rs):
Perform 1 temporal step from a node. Returns None if a dead-end is reached.
"""
neighbours, times = self.graph.neighbor_arrays(node, include_edge_weight=True)
neighbours = neighbours[times > time]
times = times[times > time]

neighbours = [
(neighbour, t)
for neighbour, t in self.graph.neighbors(node, include_edge_weight=True)
if t > time
]

if neighbours:
times = [t for _, t in neighbours]
if len(neighbours) > 0:
biases = self._temporal_biases(times, time, bias_type, is_forward=True)
chosen_neighbour_index = self._sample(len(neighbours), biases, np_rs)
next_node, next_time = neighbours[chosen_neighbour_index]
next_node = neighbours[chosen_neighbour_index]
next_time = times[chosen_neighbour_index]
return next_node, next_time
else:
return None
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_graph.py
Expand Up @@ -810,7 +810,7 @@ def test_benchmark_get_neighbours(benchmark, use_ilocs):
# get the neigbours of every node in the graph
def f():
for i in range(num_nodes):
sg.neighbors(i, use_ilocs=use_ilocs)
sg.neighbor_arrays(i, use_ilocs=use_ilocs)

benchmark(f)

Expand Down

0 comments on commit ef81955

Please sign in to comment.