Ehancement/1238 no zip np array (#1253)

This PR changes adds the following methods: - `StellarGraph.edges_arrays` - `StellarGraph.neighbors_arrays` - `StellarGraph.in_nodes_arrays` - `StellarGraph.out_nodes_arrays` Which return a tuple of np.arrays instead of a list of tuples as the old correpsonding methods without the `_array` suffix did. The old methods now simply wrap the new `_array` methods to preserve the API. See #1238 * _transform edges no returns a tuple of numpy arrays * edges now returns a tuple of numpy arrays * fixes * fix tests * fix tests * fix tests * formatting * update doc strings * fix tests * add compatability for passing edgelists from edge splitter * add in `edge_arrays` function * add to_edges function in EdgeList * added .*_arrays methods for neighbor methods * docstring update * docstring update * docstring update * docstring update * backwards compatability * test new conversion methods * Update tests/data/test_edge_splitter.py Co-Authored-By: kevin <33508488+kjun9@users.noreply.github.com> * Update tests/datasets/test_datasets.py Co-Authored-By: kevin <33508488+kjun9@users.noreply.github.com> * fix * revert fullbatch tests back * revert tests back * revert demo * removed edgelist * Update stellargraph/core/graph.py Co-authored-by: kevin <33508488+kjun9@users.noreply.github.com> * doc string update * removed redundant code * merge fixes * reverted files back * reverted files back * reverted files back * docstring fixes * fix typo * samples use neighbor arrays * benchmark uses neighbor arrays Co-authored-by: kevin <33508488+kjun9@users.noreply.github.com>
stellargraph · May 15, 2020 · ef81955 · ef81955
1 parent 9b81deb
commit ef81955
Show file tree

Hide file tree

Showing 3 changed files with 164 additions and 66 deletions.
diff --git a/stellargraph/core/graph.py b/stellargraph/core/graph.py
@@ -556,6 +556,12 @@ def nodes(self, node_type=None, use_ilocs=False) -> Iterable[Any]:
             return ilocs
         return self._nodes.ids.from_iloc(ilocs)
 
+    def _to_edges(self, edge_arrs):
+        edges = list(zip(*(arr for arr in edge_arrs[:3] if arr is not None)))
+        if edge_arrs[3] is not None:
+            return edges, edge_arrs[3]
+        return edges
+
     def edges(
         self, include_edge_type=False, include_edge_weight=False, use_ilocs=False
     ) -> Iterable[Any]:
@@ -572,31 +578,40 @@ def edges(
             use_ilocs (bool): if True return :ref:`ilocs for nodes (and edge types) <iloc-explanation>`
 
         Returns:
-            The graph edges. If edge weights are included then a tuple of (edges, weights)
+            The graph edges. If edge weights are included then a tuple of (edges, weights).
         """
-        # FIXME: these would be better returned as the 2 or 3 arrays directly, rather than tuple-ing
-        # (the same applies to all other instances of zip in this file)
-        sources = self._edges.sources
-        targets = self._edges.targets
-
-        if not use_ilocs:
-            sources = self._nodes.ids.from_iloc(sources)
-            targets = self._nodes.ids.from_iloc(targets)
+        edge_arrs = self.edge_arrays(
+            include_edge_type, include_edge_weight, use_ilocs=use_ilocs
+        )
+        return self._to_edges(edge_arrs)
 
-        if include_edge_type:
-            if use_ilocs:
-                types = self._edges.type_ilocs
-            else:
-                types = self._edges.type_of_iloc(slice(None))
+    def edge_arrays(
+        self, include_edge_type=False, include_edge_weight=False, use_ilocs=False
+    ) -> tuple:
+        """
+        Obtains the collection of edges in the graph as a tuple of arrays (sources, targets, types, weights).
+        ``types`` and ``weights`` will be `None` if the optional parameters are not specified.
 
-            edges = list(zip(sources, targets, types))
-        else:
-            edges = list(zip(sources, targets))
+        Args:
+            include_edge_type (bool): A flag that indicates whether to return edge types.
+            include_edge_weight (bool): A flag that indicates whether to return edge weights.
+            use_ilocs (bool): if True return :ref:`ilocs for nodes (and edge types) <iloc-explanation>`
 
-        if include_edge_weight:
-            return edges, self._edges.weights
+        Returns:
+            A tuple containing 1D arrays of the source and target nodes (sources, targets, types, weights).
+            Setting include_edge_type and/or include_edge_weight to True will include arrays of edge types
+            and/or edge weights in this tuple, otherwise they will be set to ``None``.
+        """
+        types = types = self._edges.type_ilocs if include_edge_type else None
+        weights = self._edges.weights if include_edge_weight else None
+        sources = self._edges.sources
+        targets = self._edges.targets
 
-        return edges
+        if not use_ilocs:
+            sources = self.node_ilocs_to_ids(sources)
+            targets = self.node_ilocs_to_ids(targets)
+            types = self._edges.type_of_iloc(slice(None)) if include_edge_type else None
+        return sources, targets, types, weights
 
     def has_node(self, node: Any) -> bool:
         """
@@ -632,34 +647,38 @@ def _transform_edges(
             if weights is not None:
                 weights = weights[correct_type]
 
-        # FIXME(#718): it would be better to return these as ndarrays, instead of (zipped) lists
         if weights is not None:
+            return other_node, weights
+
+        return other_node
+
+    def _to_neighbors(self, neigh_arrs, include_edge_weight):
+        if include_edge_weight:
             return [
-                NeighbourWithWeight(node, weight)
-                for node, weight in zip(other_node, weights)
+                NeighbourWithWeight(neigh, weight) for neigh, weight in zip(*neigh_arrs)
             ]
+        return list(neigh_arrs)
 
-        return list(other_node)
-
-    def neighbors(
+    def neighbor_arrays(
         self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
-    ) -> Iterable[Any]:
+    ):
         """
-        Obtains the collection of neighbouring nodes connected
-        to the given node.
+        Obtains the collection of neighbouring nodes connected to the given node
+        as an array of node_ids. If `include_edge_weight` edge is `True` then
+        an array of edges weights is also returned in a tuple of `(neighbor_ids, edge_weights)`.
 
         Args:
             node (any): The node in question.
-            include_edge_weight (bool, default False): If True, each neighbour in the
-                output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
+            include_edge_weight (bool, default False): If True an array of edge weights is also returned.
             edge_types (list of hashable, optional): If provided, only traverse the graph
                 via the provided edge types when collecting neighbours.
             use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
                 (and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
                 neighbour is returned.
 
         Returns:
-            iterable: The neighbouring nodes.
+            A numpy array of the neighboring nodes. If `include_edge_weight` is `True` then an array
+            of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
         """
         if not use_ilocs:
             node = self._nodes.ids.to_iloc([node])[0]
@@ -673,30 +692,55 @@ def neighbors(
             other_node, edge_ilocs, include_edge_weight, edge_types, use_ilocs
         )
 
-    def in_nodes(
+    def neighbors(
         self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
-    ) -> Iterable[Any]:
+    ) -> Iterable[any]:
+        """
+        Obtains the collection of neighbouring nodes connected
+        to the given node.
+
+        Args:
+            node (any): The node in question.
+            include_edge_weight (bool, default False): If True, each neighbour in the
+                output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
+            edge_types (list of hashable, optional): If provided, only traverse the graph
+                via the provided edge types when collecting neighbours.
+            use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
+                (and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
+                neighbour is returned.
+
+        Returns:
+            iterable: The neighboring nodes.
+        """
+        neigh_arrs = self.neighbor_arrays(
+            node, include_edge_weight, edge_types, use_ilocs=use_ilocs
+        )
+        return self._to_neighbors(neigh_arrs, include_edge_weight)
+
+    def in_node_arrays(
+        self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
+    ):
         """
         Obtains the collection of neighbouring nodes with edges
         directed to the given node. For an undirected graph,
         neighbours are treated as both in-nodes and out-nodes.
 
         Args:
             node (any): The node in question.
-            include_edge_weight (bool, default False): If True, each neighbour in the
-                output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
+            include_edge_weight (bool, default False): If True an array of edge weights is also returned.
             edge_types (list of hashable, optional): If provided, only traverse the graph
                 via the provided edge types when collecting neighbours.
             use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
                 (and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
                 neighbour is returned.
 
         Returns:
-            iterable: The neighbouring in-nodes.
+            A numpy array of the neighboring in-nodes. If `include_edge_weight` is `True` then an array
+            of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
         """
         if not self.is_directed():
             # all edges are both incoming and outgoing for undirected graphs
-            return self.neighbors(
+            return self.neighbor_arrays(
                 node,
                 include_edge_weight=include_edge_weight,
                 edge_types=edge_types,
@@ -712,12 +756,12 @@ def in_nodes(
             source, edge_ilocs, include_edge_weight, edge_types, use_ilocs
         )
 
-    def out_nodes(
+    def in_nodes(
         self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
     ) -> Iterable[Any]:
         """
         Obtains the collection of neighbouring nodes with edges
-        directed from the given node. For an undirected graph,
+        directed to the given node. For an undirected graph,
         neighbours are treated as both in-nodes and out-nodes.
 
         Args:
@@ -731,11 +775,37 @@ def out_nodes(
                 neighbour is returned.
 
         Returns:
-            iterable: The neighbouring out-nodes.
+            iterable: The neighbouring in-nodes.
+        """
+        neigh_arrs = self.in_node_arrays(
+            node, include_edge_weight, edge_types, use_ilocs=use_ilocs
+        )
+        return self._to_neighbors(neigh_arrs, include_edge_weight)
+
+    def out_node_arrays(
+        self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
+    ):
+        """
+        Obtains the collection of neighbouring nodes with edges
+        directed from the given node. For an undirected graph,
+        neighbours are treated as both in-nodes and out-nodes.
+
+        Args:
+            node (any): The node in question.
+            include_edge_weight (bool, default False): If True an array of edge weights is also returned.
+            edge_types (list of hashable, optional): If provided, only traverse the graph
+                via the provided edge types when collecting neighbours.
+            use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
+                (and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
+                neighbour is returned.
+
+        Returns:
+            A numpy array of the neighboring out-nodes. If `include_edge_weight` is `True` then an array
+            of edge weights is also returned in a tuple `(neighbor_array, edge_weight_array)`
         """
         if not self.is_directed():
             # all edges are both incoming and outgoing for undirected graphs
-            return self.neighbors(
+            return self.neighbor_arrays(
                 node,
                 include_edge_weight=include_edge_weight,
                 edge_types=edge_types,
@@ -752,6 +822,32 @@ def out_nodes(
             target, edge_ilocs, include_edge_weight, edge_types, use_ilocs
         )
 
+    def out_nodes(
+        self, node: Any, include_edge_weight=False, edge_types=None, use_ilocs=False
+    ) -> Iterable[Any]:
+        """
+        Obtains the collection of neighbouring nodes with edges
+        directed from the given node. For an undirected graph,
+        neighbours are treated as both in-nodes and out-nodes.
+
+        Args:
+            node (any): The node in question.
+            include_edge_weight (bool, default False): If True, each neighbour in the
+                output is a named tuple with fields `node` (the node ID) and `weight` (the edge weight)
+            edge_types (list of hashable, optional): If provided, only traverse the graph
+                via the provided edge types when collecting neighbours.
+            use_ilocs (bool): if True `node` is treated as a :ref:`node iloc <iloc-explanation>`
+                (and similarly `edge_types` is treated as a edge type ilocs) and the ilocs of each
+                neighbour is returned.
+
+        Returns:
+            iterable: The neighbouring out-nodes.
+        """
+        neigh_arrs = self.out_node_arrays(
+            node, include_edge_weight, edge_types, use_ilocs=use_ilocs
+        )
+        return self._to_neighbors(neigh_arrs, include_edge_weight)
+
     def nodes_of_type(self, node_type=None):
         """
         Get the nodes of the graph with the specified node types.

diff --git a/stellargraph/data/explorer.py b/stellargraph/data/explorer.py
@@ -160,7 +160,7 @@ def _get_random_state(self, seed):
         return rs
 
     def neighbors(self, node):
-        return self.graph.neighbors(node, use_ilocs=True)
+        return self.graph.neighbor_arrays(node, use_ilocs=True)
 
     def run(self, *args, **kwargs):
         """
@@ -280,8 +280,8 @@ def _walk(self, rs, start_node, length):
         walk = [start_node]
         current_node = start_node
         for _ in range(length - 1):
-            neighbours = self.graph.neighbors(current_node, use_ilocs=True)
-            if not neighbours:
+            neighbours = self.graph.neighbor_arrays(current_node, use_ilocs=True)
+            if len(neighbours) == 0:
                 # dead end, so stop
                 break
             else:
@@ -572,7 +572,9 @@ def run(self, nodes, *, n=None, length=None, metapaths=None, seed=None):
                     for d in range(length):
                         walk.append(current_node)
                         # d+1 can also be used to index metapath to retrieve the node type for the next step in the walk
-                        neighbours = self.graph.neighbors(current_node, use_ilocs=True)
+                        neighbours = self.graph.neighbor_arrays(
+                            current_node, use_ilocs=True
+                        )
                         # filter these by node type
                         neighbour_types = self.graph.node_type(
                             neighbours, use_ilocs=True
@@ -677,7 +679,9 @@ def run(self, nodes, n_size, n=1, seed=None):
                         continue
 
                     neighbours = (
-                        self.neighbors(cur_node) if cur_node is not None else []
+                        self.graph.neighbor_arrays(cur_node, use_ilocs=True)
+                        if cur_node != -1
+                        else []
                     )
                     if len(neighbours) == 0:
                         # Either node is unconnected or is in directed graph with no out-nodes.
@@ -890,14 +894,14 @@ def _sample_neighbours(self, rs, node, idx, size):
             The fixed-length list of neighbouring nodes (or None values
             if the neighbourhood is empty).
         """
-        if node is None:
+        if node == -1:
             # Non-node, e.g. previously sampled from empty neighbourhood
             return [-1] * size
-        neighbours = list(
-            self.graph.in_nodes(node, use_ilocs=True)
-            if idx == 0
-            else self.graph.out_nodes(node, use_ilocs=True)
-        )
+
+        if idx == 0:
+            neighbours = self.graph.in_node_arrays(node, use_ilocs=True)
+        else:
+            neighbours = self.graph.out_node_arrays(node, use_ilocs=True)
         if len(neighbours) == 0:
             # Sampling from empty neighbourhood
             return [-1] * size
@@ -1054,7 +1058,7 @@ def run(
         walks = []
         num_cw_curr = 0
 
-        edges, times = self.graph.edges(include_edge_weight=True)
+        sources, targets, _, times = self.graph.edge_arrays(include_edge_weight=True)
         edge_biases = self._temporal_biases(
             times, None, bias_type=initial_edge_bias, is_forward=False,
         )
@@ -1071,8 +1075,9 @@ def not_progressing_enough():
 
         # loop runs until we have enough context windows in total
         while num_cw_curr < num_cw:
-            first_edge_index = self._sample(len(edges), edge_biases, np_rs)
-            src, dst = edges[first_edge_index]
+            first_edge_index = self._sample(len(times), edge_biases, np_rs)
+            src = sources[first_edge_index]
+            dst = targets[first_edge_index]
             t = times[first_edge_index]
 
             remaining_length = num_cw - num_cw_curr + cw_size - 1
@@ -1125,18 +1130,15 @@ def _step(self, node, time, bias_type, np_rs):
         Perform 1 temporal step from a node. Returns None if a dead-end is reached.
 
         """
+        neighbours, times = self.graph.neighbor_arrays(node, include_edge_weight=True)
+        neighbours = neighbours[times > time]
+        times = times[times > time]
 
-        neighbours = [
-            (neighbour, t)
-            for neighbour, t in self.graph.neighbors(node, include_edge_weight=True)
-            if t > time
-        ]
-
-        if neighbours:
-            times = [t for _, t in neighbours]
+        if len(neighbours) > 0:
             biases = self._temporal_biases(times, time, bias_type, is_forward=True)
             chosen_neighbour_index = self._sample(len(neighbours), biases, np_rs)
-            next_node, next_time = neighbours[chosen_neighbour_index]
+            next_node = neighbours[chosen_neighbour_index]
+            next_time = times[chosen_neighbour_index]
             return next_node, next_time
         else:
             return None

diff --git a/tests/core/test_graph.py b/tests/core/test_graph.py
@@ -810,7 +810,7 @@ def test_benchmark_get_neighbours(benchmark, use_ilocs):
     # get the neigbours of every node in the graph
     def f():
         for i in range(num_nodes):
-            sg.neighbors(i, use_ilocs=use_ilocs)
+            sg.neighbor_arrays(i, use_ilocs=use_ilocs)
 
     benchmark(f)