From 5ea6ef83102830a429cca9a88c55d87abacd404a Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Tue, 13 Oct 2020 14:46:00 -0500 Subject: [PATCH 1/7] Add several new abstract algorithms --- metagraph/algorithms/centrality.py | 37 +++++++++++++++++ metagraph/algorithms/clustering.py | 12 ++++++ metagraph/algorithms/flow.py | 18 ++++++++- metagraph/algorithms/subgraph.py | 58 ++++++++++++++++++++++++++- metagraph/algorithms/utility.py | 5 +++ metagraph/core/resolver.py | 4 +- metagraph/core/typing.py | 3 ++ metagraph/plugins/numpy/algorithms.py | 6 +-- 8 files changed, 136 insertions(+), 7 deletions(-) diff --git a/metagraph/algorithms/centrality.py b/metagraph/algorithms/centrality.py index c7b286f2..b18d68ca 100644 --- a/metagraph/algorithms/centrality.py +++ b/metagraph/algorithms/centrality.py @@ -1,6 +1,16 @@ import metagraph as mg from metagraph import abstract_algorithm from metagraph.types import Graph, NodeMap, NodeSet, NodeID +from typing import Tuple + + +@abstract_algorithm("centrality.degree") +def degree_centrality(graph: Graph, normalize: bool = True) -> NodeMap: + """ + Computes the degree of each node. + If scaled, the value is scaled by (# of nodes - 1) + """ + pass # pragma: no cover @abstract_algorithm("centrality.betweenness") @@ -31,3 +41,30 @@ def pagerank( tolerance: float = 1e-05, ) -> NodeMap: pass # pragma: no cover + + +@abstract_algorithm("centrality.closeness") +def closeness_centrality( + graph: Graph(edge_type="map", edge_dtype={"int", "float"}), + nodes: mg.Optional[NodeSet] = None, + normalize: bool = True, +) -> NodeMap: + pass # pragma: no cover + + +@abstract_algorithm("centrality.eigenvector") +def eigenvector_centrality( + graph: Graph(edge_type="map", edge_dtype={"int", "float"}), +) -> NodeMap: + pass # pragma: no cover + + +@abstract_algorithm("centrality.hits") +def hits_centrality( + graph: Graph(edge_type="map", edge_dtype={"int", "float"}, is_directed=True), + max_iter: int = 100, + tol: float = 1e-05, + normalize: bool = True, +) -> Tuple[NodeMap, NodeMap]: + """Return (hubs, authority)""" + pass # pragma: no cover diff --git a/metagraph/algorithms/clustering.py b/metagraph/algorithms/clustering.py index 66ac7351..dbad67fc 100644 --- a/metagraph/algorithms/clustering.py +++ b/metagraph/algorithms/clustering.py @@ -26,7 +26,19 @@ def louvain_community_step( pass # pragma: no cover +# TODO: why is this "cluster" instead of "clustering"? @abstract_algorithm("cluster.triangle_count") def triangle_count(graph: Graph(is_directed=False)) -> int: """Counts the number of unique triangles in an undirected graph""" pass # pragma: no cover + + +@abstract_algorithm("clustering.coloring.greedy") +def greedy_coloring(graph: Graph(is_directed=False)) -> Tuple[NodeMap, int]: + """ + Attempts to find the minimum number of colors required to color the graph such that no connected + nodes have the same color. Color is simply represented as a value from 0..n + + Returns color for each node and # of colors required + """ + pass # pragma: no covert diff --git a/metagraph/algorithms/flow.py b/metagraph/algorithms/flow.py index 113ff7a9..513bae70 100644 --- a/metagraph/algorithms/flow.py +++ b/metagraph/algorithms/flow.py @@ -10,5 +10,21 @@ def max_flow( source_node: NodeID, target_node: NodeID, ) -> Tuple[float, Graph]: - """The returned graph is a graph whose edge weights represent the outward flow. It contains all the nodes of the input graph""" + """ + Returns the maximum flow and a graph whose edge weights represent the flow. + It contains all the nodes of the input graph + """ + pass # pragma: no cover + + +@abstract_algorithm("flow.min_cut") +def min_cut( + graph: Graph(edge_type="map", edge_dtype={"int", "float"}), + source_node: NodeID, + target_node: NodeID, +) -> Tuple[float, Graph]: + """ + Returns the sum of the minimum cut weights and a graph containing only those edges + which are part of the minimum cut. + """ pass # pragma: no cover diff --git a/metagraph/algorithms/subgraph.py b/metagraph/algorithms/subgraph.py index b010a177..5cc6c952 100644 --- a/metagraph/algorithms/subgraph.py +++ b/metagraph/algorithms/subgraph.py @@ -1,5 +1,6 @@ +import metagraph as mg from metagraph import abstract_algorithm -from metagraph.types import NodeSet, Graph +from metagraph.types import NodeSet, Graph, NodeID @abstract_algorithm("subgraph.extract_subgraph") @@ -10,3 +11,58 @@ def extract_subgraph(graph: Graph, nodes: NodeSet) -> Graph: @abstract_algorithm("subgraph.k_core") def k_core(graph: Graph(is_directed=False), k: int) -> Graph: pass # pragma: no cover + + +@abstract_algorithm("subgraph.k_truss") +def k_truss(graph: Graph(is_directed=False), k: int) -> Graph: + pass # pragma: no cover + + +@abstract_algorithm("subgraph.maximal_independent_set") +def maximal_independent_set(graph: Graph) -> NodeSet: + pass # pragma: no cover + + +@abstract_algorithm("subgraph.subisomorphic") +def subisomorphic(graph: Graph, subgraph: Graph) -> bool: + pass # pragma: no cover + + +@abstract_algorithm("subgraph.sample.node_sampling") +def node_sampling(graph: Graph, p: float = 0.20) -> Graph: + pass # pragma: no cover + + +@abstract_algorithm("subgraph.sample.edge_sampling") +def edge_sampling(graph: Graph, p: float = 0.20) -> Graph: + pass # pragma: no cover + + +@abstract_algorithm("subgraph.sample.ties") +def totally_induced_edge_sampling(graph: Graph, p: float = 0.20) -> Graph: + """ + Totally Induced Edge Sampling method + https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=2743&context=cstech + """ + pass # pragma: no cover + + +@abstract_algorithm("subgraph.sample.random_walk") +def random_walk_sampling( + graph: Graph, + num_steps: mg.Optional[int] = None, + num_nodes: mg.Optional[int] = None, + num_edges: mg.Optional[int] = None, + jump_probability: float = 0.15, + start_node: mg.Optional[NodeID] = None, +) -> Graph: + """ + Sample using random walks + + Sampling ends when number of steps, nodes, or edges are reached (first to occur if multiple are specified). + For each step, there is a jump_probability to reset the walk. + When resetting the walk, if start_node is specified, always reset to this node. If not specified, every reset + picks a new node in the graph at random. + """ + # TODO: check that `num_*` variables aren't all `None` + pass # pragma: no cover diff --git a/metagraph/algorithms/utility.py b/metagraph/algorithms/utility.py index fba4cf54..81403994 100644 --- a/metagraph/algorithms/utility.py +++ b/metagraph/algorithms/utility.py @@ -98,3 +98,8 @@ def graph_collapse_by_label( aggregator: Callable[[Any, Any], Any], ) -> Graph: pass # pragma: no cover + + +@abstract_algorithm("util.graph.isomorphic") +def graph_isomorphic(g1: Graph, g2: Graph,) -> bool: + pass # pragma: no cover diff --git a/metagraph/core/resolver.py b/metagraph/core/resolver.py index 77fc9664..6492240b 100644 --- a/metagraph/core/resolver.py +++ b/metagraph/core/resolver.py @@ -440,9 +440,9 @@ def _check_abstract_type(self, abst_algo, obj, msg): # Non-abstract type class is assumed to be Python type return obj, False if isinstance(obj, mgtyping.Combo): - if obj.kind not in {"python", "abstract"}: + if obj.kind not in {"python", "abstract", "node_id"}: raise TypeError( - f"{abst_algo.func.__qualname__} {msg} may not have Concrete types not allowed in Union" + f"{abst_algo.func.__qualname__} {msg} may not have Concrete types in Union" ) return obj, False if isinstance(obj, AbstractType): diff --git a/metagraph/core/typing.py b/metagraph/core/typing.py index 0f19b0e8..88d5f119 100644 --- a/metagraph/core/typing.py +++ b/metagraph/core/typing.py @@ -4,6 +4,7 @@ ex. typing.Optional[MyAbstractType] works, but typing.Optional[MyAbstractType(some_prop=True)] fails """ from .plugin import AbstractType, ConcreteType, MetaWrapper +from ..types import NodeID class Combo: @@ -30,6 +31,8 @@ def __init__(self, types, *, optional=False, strict=None): this_kind = "abstract" elif isinstance(t, ConcreteType): this_kind = "concrete" + elif t is NodeID: + this_kind = "node_id" else: raise TypeError(f"type within Union or Optional may not be {type(t)}") diff --git a/metagraph/plugins/numpy/algorithms.py b/metagraph/plugins/numpy/algorithms.py index 67e5734e..2dc95519 100644 --- a/metagraph/plugins/numpy/algorithms.py +++ b/metagraph/plugins/numpy/algorithms.py @@ -66,11 +66,11 @@ def np_nodemap_select(x: NumpyNodeMap, nodes: NumpyNodeSet) -> NumpyNodeMap: ) present_value_positions = np.flatnonzero(selected_node_map.mask) positions_to_remove = np.setdiff1d( - present_value_positions, nodes.value, assume_unique=True + present_value_positions, nodes.node_set, assume_unique=True ) selected_node_map.mask[positions_to_remove] = False else: - if len(nodes_mask) == len(x.mask): + if len(nodes) == len(x.mask): nodes_mask = nodes.mask else: nodes_mask = nodes.mask.copy() @@ -104,7 +104,7 @@ def np_nodemap_apply(x: NumpyNodeMap, func: Callable[[Any], Any]) -> NumpyNodeMa if x.id2pos is not None: new_node_map = NumpyNodeMap(func_vectorized(x.value), node_ids=x.pos2id.copy()) elif x.mask is not None: - results = func_vectorized(new_node_map.value[new_node_map.mask]) + results = func_vectorized(x.value[x.mask]) new_data = np.empty_like(x.value, dtype=results.dtype) new_data[x.mask] = results new_node_map = NumpyNodeMap(new_data, mask=x.mask.copy()) From 5868c917bcdeda14209602ea88190eaca1714324 Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Wed, 14 Oct 2020 17:47:56 -0500 Subject: [PATCH 2/7] Add more algorithms and tests --- metagraph/algorithms/utility.py | 2 +- metagraph/core/multiverify.py | 14 +++ metagraph/plugins/networkx/algorithms.py | 113 ++++++++++++++++-- metagraph/plugins/networkx/types.py | 8 +- ...nness_centrality.py => test_centrality.py} | 88 ++++++++++++++ metagraph/tests/algorithms/test_clustering.py | 54 ++++++++- .../tests/algorithms/test_densesparse.py | 36 ------ metagraph/tests/algorithms/test_embedding.py | 2 +- metagraph/tests/algorithms/test_flow.py | 46 +++++++ .../tests/algorithms/test_katz_centrality.py | 45 ------- metagraph/tests/algorithms/test_pagerank.py | 34 ------ metagraph/tests/algorithms/test_subgraph.py | 79 ++++++++++++ metagraph/tests/algorithms/test_traversal.py | 2 +- .../tests/algorithms/test_triangle_count.py | 32 ----- metagraph/tests/algorithms/test_utility.py | 22 ++++ 15 files changed, 417 insertions(+), 160 deletions(-) rename metagraph/tests/algorithms/{test_betweenness_centrality.py => test_centrality.py} (52%) delete mode 100644 metagraph/tests/algorithms/test_densesparse.py delete mode 100644 metagraph/tests/algorithms/test_katz_centrality.py delete mode 100644 metagraph/tests/algorithms/test_pagerank.py delete mode 100644 metagraph/tests/algorithms/test_triangle_count.py diff --git a/metagraph/algorithms/utility.py b/metagraph/algorithms/utility.py index 8a09b28f..b2d9ff88 100644 --- a/metagraph/algorithms/utility.py +++ b/metagraph/algorithms/utility.py @@ -111,7 +111,7 @@ def graph_collapse_by_label( @abstract_algorithm("util.graph.isomorphic") -def graph_isomorphic(g1: Graph, g2: Graph,) -> bool: +def graph_isomorphic(g1: Graph, g2: Graph) -> bool: pass # pragma: no cover diff --git a/metagraph/core/multiverify.py b/metagraph/core/multiverify.py index 3bd6f159..6e4e8d39 100644 --- a/metagraph/core/multiverify.py +++ b/metagraph/core/multiverify.py @@ -4,6 +4,14 @@ from metagraph import ConcreteType from metagraph.core.resolver import Resolver, Dispatcher, ExactDispatcher from dask import is_dask_collection +import warnings + +try: + import pytest + + has_pytest = True +except ImportError: + has_pytest = False class UnsatisfiableAlgorithmError(Exception): @@ -122,6 +130,12 @@ def compute(self, algo: Union[Dispatcher, AnyStr], *args, **kwargs): ) all_concrete_algos = set(self.resolver.concrete_algorithms[algo]) + if not all_concrete_algos: + msg = f"No concrete algorithms exist which implement {algo}" + if has_pytest: + pytest.skip(msg) + else: + warnings.warn(msg) # pragma: no cover plans = self.resolver.find_algorithm_solutions(algo, *args, **kwargs) # Check if any concrete algorithm failed to find a valid plan for plan in plans: diff --git a/metagraph/plugins/networkx/algorithms.py b/metagraph/plugins/networkx/algorithms.py index 27fb9ff1..395441f6 100644 --- a/metagraph/plugins/networkx/algorithms.py +++ b/metagraph/plugins/networkx/algorithms.py @@ -1,7 +1,7 @@ import metagraph as mg from metagraph import concrete_algorithm, NodeID from metagraph.plugins import has_networkx, has_community, has_pandas -from typing import Tuple, Iterable, Any, Callable +from typing import Tuple, Any, Callable if has_networkx: @@ -89,6 +89,25 @@ def nx_k_core(graph: NetworkXGraph, k: int) -> NetworkXGraph: edge_weight_label=graph.edge_weight_label, ) + if nx.__version__ >= "2.4": + + @concrete_algorithm("subgraph.k_truss") + def nx_k_truss(graph: NetworkXGraph, k: int) -> NetworkXGraph: + if nx.__version__ < "2.5": + # v2.4 uses `k` rather than `k-2` as everyone else uses + k -= 2 + k_truss_graph = nx.k_truss(graph.value, k) + return NetworkXGraph( + k_truss_graph, + node_weight_label=graph.node_weight_label, + edge_weight_label=graph.edge_weight_label, + ) + + @concrete_algorithm("subgraph.maximal_independent_set") + def maximal_independent_set(graph: NetworkXGraph) -> PythonNodeSet: + nodes = nx.maximal_independent_set(graph.value) + return PythonNodeSet(set(nodes)) + @concrete_algorithm("traversal.bellman_ford") def nx_bellman_ford( graph: NetworkXGraph, source_node: NodeID @@ -132,10 +151,7 @@ def nx_minimum_spanning_tree(graph: NetworkXGraph) -> NetworkXGraph: @concrete_algorithm("centrality.betweenness") def nx_betweenness_centrality( - graph: NetworkXGraph, - nodes: mg.Optional[PythonNodeSet], - normalize: bool, - # include_endpoints: bool, + graph: NetworkXGraph, nodes: mg.Optional[PythonNodeSet], normalize: bool, ) -> PythonNodeMap: if nodes is None: sources = targets = graph.value.nodes @@ -147,9 +163,28 @@ def nx_betweenness_centrality( targets=targets, normalized=normalize, weight=graph.edge_weight_label, - # endpoints=include_endpoints, ) - return PythonNodeMap(node_to_score_map,) + return PythonNodeMap(node_to_score_map) + + @concrete_algorithm("centrality.closeness") + def nx_closeness_centrality( + graph: NetworkXGraph, nodes: mg.Optional[PythonNodeSet], normalize: bool, + ) -> PythonNodeMap: + pass + + @concrete_algorithm("centrality.degree") + def nx_degree_centrality(graph: NetworkXGraph, normalize: bool,) -> PythonNodeMap: + pass + + @concrete_algorithm("centrality.eigenvector") + def nx_eigenvector_centrality(graph: NetworkXGraph,) -> PythonNodeMap: + pass + + @concrete_algorithm("centrality.hits") + def nx_hits_centrality( + graph: NetworkXGraph, max_iter: int, tol: float, normalize: bool, + ) -> Tuple[PythonNodeMap, PythonNodeMap]: + pass @concrete_algorithm("traversal.bfs_iter") def nx_breadth_first_search( @@ -191,6 +226,29 @@ def nx_max_flow( ) return (flow_value, flow_graph) + @concrete_algorithm("flow.min_cut") + def nx_min_cut( + graph: NetworkXGraph, source_node: NodeID, target_node: NodeID, + ) -> Tuple[float, NetworkXGraph]: + g = graph.value + flow_value, (reachable, non_reachable) = nx.minimum_cut( + g, source_node, target_node, capacity=graph.edge_weight_label + ) + # Build graph containing cut edges + nx_cut_graph = type(g)() + nx_cut_graph.add_nodes_from(g.nodes(data=True)) + for u, nbrs in ((n, g[n]) for n in reachable): + for v in nbrs: + if v in non_reachable: + edge_attrs = g.edges[u, v] + nx_cut_graph.add_edge(u, v, **edge_attrs) + cut_graph = NetworkXGraph( + nx_cut_graph, + node_weight_label=graph.node_weight_label, + edge_weight_label=graph.edge_weight_label, + ) + return flow_value, cut_graph + @concrete_algorithm("util.graph.aggregate_edges") def nx_graph_aggregate_edges( graph: NetworkXGraph, @@ -244,6 +302,47 @@ def nx_graph_assign_uniform_weight( result_nx_graph, graph.node_weight_label, graph.edge_weight_label ) + @concrete_algorithm("clustering.coloring.greedy") + def nx_greedy_coloring(graph: NetworkXGraph) -> Tuple[PythonNodeMap, int]: + colors = nx.greedy_color(graph.value) + unique_colors = set(colors.values()) + return PythonNodeMap(colors), len(unique_colors) + + @concrete_algorithm("subgraph.sample.node_sampling") + def nx_node_sampling(graph: NetworkXGraph, p: float) -> NetworkXGraph: + pass # pragma: no cover + + @concrete_algorithm("subgraph.sample.edge_sampling") + def nx_edge_sampling(graph: NetworkXGraph, p: float) -> NetworkXGraph: + pass # pragma: no cover + + @concrete_algorithm("subgraph.sample.ties") + def nx_ties(graph: NetworkXGraph, p: float) -> NetworkXGraph: + """ + Totally Induced Edge Sampling method + https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=2743&context=cstech + """ + pass # pragma: no cover + + @concrete_algorithm("subgraph.sample.random_walk") + def nx_random_walk_sampling( + graph: NetworkXGraph, + num_steps: mg.Optional[int], + num_nodes: mg.Optional[int], + num_edges: mg.Optional[int], + jump_probability: float, + start_node: mg.Optional[NodeID], + ) -> NetworkXGraph: + """ + Sample using random walks + + Sampling ends when number of steps, nodes, or edges are reached (first to occur if multiple are specified). + For each step, there is a jump_probability to reset the walk. + When resetting the walk, if start_node is specified, always reset to this node. If not specified, every reset + picks a new node in the graph at random. + """ + pass # pragma: no cover + if has_networkx and has_community: import community as community_louvain diff --git a/metagraph/plugins/networkx/types.py b/metagraph/plugins/networkx/types.py index 580f5696..fcd7c4fd 100644 --- a/metagraph/plugins/networkx/types.py +++ b/metagraph/plugins/networkx/types.py @@ -121,8 +121,12 @@ def assert_equal( g1 = obj1.value g2 = obj2.value # Compare - assert g1.nodes() == g2.nodes(), f"{g1.nodes()} != {g2.nodes()}" - assert g1.edges() == g2.edges(), f"{g1.edges()} != {g2.edges()}" + assert ( + g1.nodes() == g2.nodes() + ), f"node mismatch: {g1.nodes()} != {g2.nodes()}" + assert ( + g1.edges() == g2.edges() + ), f"edge mismatch: {g1.edges()} != {g2.edges()}" if aprops1.get("node_type") == "map": for n, d1 in g1.nodes(data=True): diff --git a/metagraph/tests/algorithms/test_betweenness_centrality.py b/metagraph/tests/algorithms/test_centrality.py similarity index 52% rename from metagraph/tests/algorithms/test_betweenness_centrality.py rename to metagraph/tests/algorithms/test_centrality.py index 88e869f6..6dfb6850 100644 --- a/metagraph/tests/algorithms/test_betweenness_centrality.py +++ b/metagraph/tests/algorithms/test_centrality.py @@ -1,9 +1,14 @@ +import pytest from metagraph.tests.util import default_plugin_resolver import networkx as nx import numpy as np from . import MultiVerify +def test_degree_centrality(default_plugin_resolver): + pytest.xfail() + + def test_betweenness_centrality_single_hub(default_plugin_resolver): r""" 0 <--2-- 1 5 --10-> 6 @@ -96,3 +101,86 @@ def test_betweenness_centrality_multiple_hubs(default_plugin_resolver): MultiVerify(dpr).compute( "centrality.betweenness", graph, normalize=False, ).assert_equal(expected_answer) + + +def test_katz_centrality(default_plugin_resolver): + r""" + +-+ + ------> |1| ---------------------------- + | +-+ | + | | + | | | + | v | + V + +-+ <-- +-+ +-+ +-+ +-+ + |0| |2| <-- |3| --> |4| <-- |5| + +-+ --> +-+ +-+ +-+ +-+ + """ + dpr = default_plugin_resolver + networkx_graph_data = [ + (0, 1), + (0, 2), + (2, 0), + (1, 2), + (1, 5), + (3, 2), + (3, 4), + (5, 4), + ] + networkx_graph = nx.DiGraph() + networkx_graph.add_edges_from(networkx_graph_data) + data = { + 0: 0.4069549895218489, + 1: 0.40687482321632046, + 2: 0.41497162410274485, + 3: 0.40280527348222406, + 4: 0.410902066312543, + 5: 0.4068740216338262, + } + expected_val = dpr.wrappers.NodeMap.PythonNodeMap(data) + graph = dpr.wrappers.Graph.NetworkXGraph(networkx_graph) + MultiVerify(dpr).compute("centrality.katz", graph, tolerance=1e-7).assert_equal( + expected_val, rel_tol=1e-5 + ) + + +def test_pagerank_centrality(default_plugin_resolver): + r""" + +-+ + ------> |1| + | +-+ + | + | | + | v + + +-+ <-- +-+ +-+ + |0| |2| <-- |3| + +-+ --> +-+ +-+ + """ + dpr = default_plugin_resolver + networkx_graph_data = [(0, 1), (0, 2), (2, 0), (1, 2), (3, 2)] + networkx_graph = nx.DiGraph() + networkx_graph.add_edges_from(networkx_graph_data) + data = { + 0: 0.37252685132844066, + 1: 0.19582391181458728, + 2: 0.3941492368569718, + 3: 0.037500000000000006, + } + expected_val = dpr.wrappers.NodeMap.PythonNodeMap(data) + graph = dpr.wrappers.Graph.NetworkXGraph(networkx_graph) + MultiVerify(dpr).compute( + dpr.algos.centrality.pagerank, graph, tolerance=1e-7 + ).assert_equal(expected_val, rel_tol=1e-5) + + +def test_closeness_centrality(default_plugin_resolver): + pytest.xfail() + + +def test_eigenvector_centrality(default_plugin_resolver): + pytest.xfail() + + +def test_hits_centrality(default_plugin_resolver): + pytest.xfail() diff --git a/metagraph/tests/algorithms/test_clustering.py b/metagraph/tests/algorithms/test_clustering.py index 3bbda5a7..8f34622a 100644 --- a/metagraph/tests/algorithms/test_clustering.py +++ b/metagraph/tests/algorithms/test_clustering.py @@ -89,7 +89,35 @@ def cmp_func(x): ).normalize(PythonNodeMap.Type).custom_compare(cmp_func) -def test_louvain(default_plugin_resolver): +def test_triangle_count(default_plugin_resolver): + dpr = default_plugin_resolver + # Simple graph with 5 triangles + # 0 - 1 5 - 6 + # | X | | / + # 3 - 4 -- 2 - 7 + simple_graph_data = [ + [0, 1, 100], + [0, 3, 200], + [0, 4, 300], + [1, 3, 50], + [1, 4, 55], + [2, 4, 60], + [2, 5, 65], + [2, 6, 70], + [3, 4, 75], + [5, 6, 20], + [6, 7, 10], + ] + # Build simple graph with 5 triangles + simple_graph = nx.Graph() + simple_graph.add_weighted_edges_from(simple_graph_data) + # Convert to wrapper + graph = dpr.wrappers.Graph.NetworkXGraph(simple_graph) + + MultiVerify(dpr).compute("cluster.triangle_count", graph).assert_equal(5) + + +def test_louvain_step(default_plugin_resolver): r""" 0 ---2-- 1 5 --10-- 6 | / | | / @@ -167,3 +195,27 @@ def cmp_func(x): MultiVerify(dpr).compute("clustering.label_propagation_community", graph).normalize( PythonNodeMap ).custom_compare(cmp_func) + + +def test_greedy_coloring(default_plugin_resolver): + # 0 1 2 3 4 5 Node Coloring + # 0 - 1 - 1 - 1 0 0 + # 1 1 - 1 - - - 1 1 + # 2 - 1 - - 1 - 2 0 + # 3 1 - - - - 1 3 1 + # 4 - - 1 - - 1 4 1 + # 5 1 - - 1 1 - 5 2 + dpr = default_plugin_resolver + g = nx.Graph() + g.add_edges_from([(0, 1), (0, 3), (0, 5), (1, 2), (2, 4), (3, 5), (4, 5)]) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(colors): + # Check that the triangle in the graph (0, 3, 5) all have different colors + assert {colors[0], colors[3], colors[5]} == {0, 1, 2} + + results = MultiVerify(dpr).compute("clustering.coloring.greedy", graph) + # Check number of colors required + results[1].assert_equal(3) + # Check coloring of triangle in the graph + results[0].normalize(dpr.types.NodeMap.PythonNodeMapType).custom_compare(cmp_func) diff --git a/metagraph/tests/algorithms/test_densesparse.py b/metagraph/tests/algorithms/test_densesparse.py deleted file mode 100644 index d450890f..00000000 --- a/metagraph/tests/algorithms/test_densesparse.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np - - -# TODO: update these once methods are available: fillna, sparsify - -# def test_vector_dense_to_sparse(default_plugin_resolver): -# dpr = default_plugin_resolver -# x = dpr.wrapper.DenseVector.NumpyVector(np.array([0, 4, 2, 3, 4, 99])) -# y = dpr.algo.casting.vector_dense_to_sparse(x, missing_value=4) -# assert y.nnz == 4 -# -# -# def test_vector_sparse_to_dense(default_plugin_resolver): -# dpr = default_plugin_resolver -# x = dpr.wrapper.SparseVector.NumpySparseVector( -# np.array([0, 4, 2, 3, 4, 99]), missing_value=4 -# ) -# y = dpr.algo.casting.vector_sparse_to_dense(x, fill_value=42) -# assert (y.value == np.array([0, 42, 2, 3, 42, 99])).all() -# -# -# def test_matrix_dense_to_sparse(default_plugin_resolver): -# dpr = default_plugin_resolver -# m = np.array([[1, 2, 3], [2, 5, 6], [7, 8, 2]]) -# x = dpr.wrapper.DenseMatrix.NumpyMatrix(m) -# y = dpr.algo.casting.matrix_dense_to_sparse(x, missing_value=2) -# assert y.nnz == 6 -# -# -# def test_matrix_sparse_to_dense(default_plugin_resolver): -# dpr = default_plugin_resolver -# m = np.array([[1, 2, 3, 4], [2, 5, 6, -4], [7, 8, 2, 2]]) -# x = dpr.wrapper.SparseMatrix.NumpySparseMatrix(m, missing_value=2) -# y = dpr.algo.casting.matrix_sparse_to_dense(x, fill_value=0) -# result = np.array([[1, 0, 3, 4], [0, 5, 6, -4], [7, 8, 0, 0]]) -# assert (y.value == result).all().all() diff --git a/metagraph/tests/algorithms/test_embedding.py b/metagraph/tests/algorithms/test_embedding.py index 8000e66d..61bbe472 100644 --- a/metagraph/tests/algorithms/test_embedding.py +++ b/metagraph/tests/algorithms/test_embedding.py @@ -59,7 +59,7 @@ def cmp_func(embedding): a_to_b = euclidean_dist(a_vector, b_vector) assert a_to_a_center < a_to_b - assert a_to_a_center < a_to_b + assert b_to_b_center < a_to_b embedding.normalize(dpr.types.NodeEmbedding.NumpyNodeEmbeddingType).custom_compare( cmp_func diff --git a/metagraph/tests/algorithms/test_flow.py b/metagraph/tests/algorithms/test_flow.py index 7c2cc988..55a84e68 100644 --- a/metagraph/tests/algorithms/test_flow.py +++ b/metagraph/tests/algorithms/test_flow.py @@ -72,3 +72,49 @@ def test_max_flow(default_plugin_resolver): "util.nodemap.select.core_python", in_edges, bottleneck_nodes ) in_bottleneck.assert_equal(expected_nodemap) + + +def test_min_cut(default_plugin_resolver): + r""" + 0 ---9-> 1 5 --1--> 6 + | ^ | ^ | / + | / | / | / + 10 2 3 1 5 11 + | _/ | / | / + v / v / v v + 3 --8--> 4 ---4-> 2 --6--> 7 + """ + dpr = default_plugin_resolver + source_node = 0 + target_node = 7 + ebunch = [ + (0, 1, 9), + (0, 3, 10), + (1, 4, 3), + (2, 7, 6), + (3, 1, 2), + (3, 4, 8), + (4, 5, 1), + (4, 2, 4), + (5, 2, 5), + (5, 6, 1), + (6, 2, 11), + ] + nx_graph = nx.DiGraph() + nx_graph.add_weighted_edges_from(ebunch) + graph = dpr.wrappers.Graph.NetworkXGraph(nx_graph, edge_weight_label="weight") + + expected_flow_value = 5 + cut_edges = nx.DiGraph() + cut_edges.add_nodes_from(nx_graph.nodes) + cut_edges.add_weighted_edges_from([(4, 5, 1), (4, 2, 4)]) + expected_cut_edges = dpr.wrappers.Graph.NetworkXGraph(cut_edges) + + mv = MultiVerify(dpr) + results = mv.compute("flow.min_cut", graph, source_node, target_node) + + # Compare flow rate + results[0].assert_equal(expected_flow_value) + + # Compare cut graph + results[1].assert_equal(expected_cut_edges) diff --git a/metagraph/tests/algorithms/test_katz_centrality.py b/metagraph/tests/algorithms/test_katz_centrality.py deleted file mode 100644 index 9364986b..00000000 --- a/metagraph/tests/algorithms/test_katz_centrality.py +++ /dev/null @@ -1,45 +0,0 @@ -from metagraph.tests.util import default_plugin_resolver -import networkx as nx -import numpy as np -from . import MultiVerify - - -def test_katz_centrality(default_plugin_resolver): - r""" - +-+ - ------> |1| ---------------------------- - | +-+ | - | | - | | | - | v | - V - +-+ <-- +-+ +-+ +-+ +-+ - |0| |2| <-- |3| --> |4| <-- |5| - +-+ --> +-+ +-+ +-+ +-+ - """ - dpr = default_plugin_resolver - networkx_graph_data = [ - (0, 1), - (0, 2), - (2, 0), - (1, 2), - (1, 5), - (3, 2), - (3, 4), - (5, 4), - ] - networkx_graph = nx.DiGraph() - networkx_graph.add_edges_from(networkx_graph_data) - data = { - 0: 0.4069549895218489, - 1: 0.40687482321632046, - 2: 0.41497162410274485, - 3: 0.40280527348222406, - 4: 0.410902066312543, - 5: 0.4068740216338262, - } - expected_val = dpr.wrappers.NodeMap.PythonNodeMap(data) - graph = dpr.wrappers.Graph.NetworkXGraph(networkx_graph) - MultiVerify(dpr).compute("centrality.katz", graph, tolerance=1e-7).assert_equal( - expected_val, rel_tol=1e-5 - ) diff --git a/metagraph/tests/algorithms/test_pagerank.py b/metagraph/tests/algorithms/test_pagerank.py deleted file mode 100644 index f685e825..00000000 --- a/metagraph/tests/algorithms/test_pagerank.py +++ /dev/null @@ -1,34 +0,0 @@ -from metagraph.tests.util import default_plugin_resolver -import networkx as nx -import numpy as np -from . import MultiVerify - - -def test_pagerank(default_plugin_resolver): - r""" - +-+ - ------> |1| - | +-+ - | - | | - | v - - +-+ <-- +-+ +-+ - |0| |2| <-- |3| - +-+ --> +-+ +-+ - """ - dpr = default_plugin_resolver - networkx_graph_data = [(0, 1), (0, 2), (2, 0), (1, 2), (3, 2)] - networkx_graph = nx.DiGraph() - networkx_graph.add_edges_from(networkx_graph_data) - data = { - 0: 0.37252685132844066, - 1: 0.19582391181458728, - 2: 0.3941492368569718, - 3: 0.037500000000000006, - } - expected_val = dpr.wrappers.NodeMap.PythonNodeMap(data) - graph = dpr.wrappers.Graph.NetworkXGraph(networkx_graph) - MultiVerify(dpr).compute( - dpr.algos.centrality.pagerank, graph, tolerance=1e-7 - ).assert_equal(expected_val, rel_tol=1e-5) diff --git a/metagraph/tests/algorithms/test_subgraph.py b/metagraph/tests/algorithms/test_subgraph.py index 2f65fac8..343d41d1 100644 --- a/metagraph/tests/algorithms/test_subgraph.py +++ b/metagraph/tests/algorithms/test_subgraph.py @@ -1,3 +1,4 @@ +import pytest from metagraph.tests.util import default_plugin_resolver import networkx as nx from . import MultiVerify @@ -52,3 +53,81 @@ def test_k_core(default_plugin_resolver): graph = dpr.wrappers.Graph.NetworkXGraph(nx_graph) k_core_graph = dpr.wrappers.Graph.NetworkXGraph(nx_k_core_graph) MultiVerify(dpr).compute("subgraph.k_core", graph, k).assert_equal(k_core_graph) + + +def test_k_truss(default_plugin_resolver): + r""" + 0 ---- 1 ---- 2 + |\ /| / | + | \ / | / | + | \/ | / | + | /\ | / | + | / \ |/ | + 3 -----4 5 + """ + dpr = default_plugin_resolver + nx_graph = nx.Graph() + nx_graph.add_edges_from( + [(0, 1), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (3, 4)] + ) + nx_3_truss_graph = nx.Graph( + [(0, 1), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 4), (3, 4)] + ) + nx_4_truss_graph = nx.Graph([(0, 1), (0, 3), (0, 4), (1, 3), (1, 4), (3, 4)]) + graph = dpr.wrappers.Graph.NetworkXGraph(nx_graph) + three_truss_graph = dpr.wrappers.Graph.NetworkXGraph(nx_3_truss_graph) + four_truss_graph = dpr.wrappers.Graph.NetworkXGraph(nx_4_truss_graph) + MultiVerify(dpr).compute("subgraph.k_truss", graph, 3).assert_equal( + three_truss_graph + ) + MultiVerify(dpr).compute("subgraph.k_truss", graph, 4).assert_equal( + four_truss_graph + ) + + +def test_maximial_independent_set(default_plugin_resolver): + dpr = default_plugin_resolver + g = nx.generators.classic.barbell_graph(5, 6) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(nodeset): + # Verify that every node in the graph is either: + # 1. in the nodeset + # 2. directly connected to the nodeset + ns = nodeset.value + for node in g.nodes(): + if node in ns: + continue + for nbr in g.neighbors(node): + if nbr in ns: + break + else: + raise AssertionError(f"node {node} is independent of the set") + # Verify that nodes in the nodeset are not connected to each other + for node in ns: + for nbr in g.neighbors(node): + assert nbr not in ns, f"nodes {node} and {nbr} are connected" + + MultiVerify(dpr).compute("subgraph.maximal_independent_set", graph).normalize( + dpr.wrappers.NodeSet.PythonNodeSet + ).custom_compare(cmp_func) + + +def test_subisomorphic(default_plugin_resolver): + pytest.xfail() + + +def test_node_sampling(default_plugin_resolver): + pytest.xfail() + + +def test_edge_sampling(default_plugin_resolver): + pytest.xfail() + + +def test_totally_induced_edge_sampling(default_plugin_resolver): + pytest.xfail() + + +def test_random_walk_sampling(default_plugin_resolver): + pytest.xfail() diff --git a/metagraph/tests/algorithms/test_traversal.py b/metagraph/tests/algorithms/test_traversal.py index c626da70..21dfc890 100644 --- a/metagraph/tests/algorithms/test_traversal.py +++ b/metagraph/tests/algorithms/test_traversal.py @@ -197,7 +197,7 @@ def test_minimum_spanning_tree(default_plugin_resolver): ) -def test_minimum_spanning_tree_dicsconnected(default_plugin_resolver): +def test_minimum_spanning_tree_disconnected(default_plugin_resolver): r""" 0 ---2-- 1 5 --10-- 6 | / | | / diff --git a/metagraph/tests/algorithms/test_triangle_count.py b/metagraph/tests/algorithms/test_triangle_count.py deleted file mode 100644 index 5ddc0030..00000000 --- a/metagraph/tests/algorithms/test_triangle_count.py +++ /dev/null @@ -1,32 +0,0 @@ -from metagraph.tests.util import default_plugin_resolver -import networkx as nx -from . import MultiVerify - -# Simple graph with 5 triangles -# 0 - 1 5 - 6 -# | X | | / -# 3 - 4 -- 2 - 7 -simple_graph_data = [ - [0, 1, 100], - [0, 3, 200], - [0, 4, 300], - [1, 3, 50], - [1, 4, 55], - [2, 4, 60], - [2, 5, 65], - [2, 6, 70], - [3, 4, 75], - [5, 6, 20], - [6, 7, 10], -] - - -def test_triangle_count(default_plugin_resolver): - dpr = default_plugin_resolver - # Build simple graph with 5 triangles - simple_graph = nx.Graph() - simple_graph.add_weighted_edges_from(simple_graph_data) - # Convert to wrapper - graph = dpr.wrappers.Graph.NetworkXGraph(simple_graph) - - MultiVerify(dpr).compute("cluster.triangle_count", graph).assert_equal(5) diff --git a/metagraph/tests/algorithms/test_utility.py b/metagraph/tests/algorithms/test_utility.py index 161e3040..b064a116 100644 --- a/metagraph/tests/algorithms/test_utility.py +++ b/metagraph/tests/algorithms/test_utility.py @@ -1,3 +1,4 @@ +import pytest from metagraph.tests.util import default_plugin_resolver import networkx as nx import numpy as np @@ -495,3 +496,24 @@ def test_node_embedding_apply(default_plugin_resolver): embedding, dpr.wrappers.Vector.NumpyVector(np.array([9991, 9990])), ).assert_equal(dpr.wrappers.Matrix.NumpyMatrix(np.array([[3, 4, 5], [0, 1, 2]]))) + + +def test_isomorphic(default_plugin_resolver): + dpr = default_plugin_resolver + # 0 1 2 3 4 0 1 2 3 4 + # 0 1 1 - 1 - 2 -> 0 - - - 1 1 + # 1 - - 1 - - 4 -> 1 - - 1 - - + # 2 1 1 - - - 3 -> 2 1 - 1 - 1 + # 3 - 1 1 - - 0 -> 3 - - 1 - 1 + # 4 1 - - - - 1 -> 4 - - - 1 - + g1 = nx.DiGraph() + g1.add_edges_from( + [(0, 0), (0, 1), (0, 3), (1, 2), (2, 0), (2, 1), (3, 1), (3, 2), (4, 0)] + ) + g2 = nx.DiGraph() + g2.add_edges_from( + [(0, 3), (0, 4), (1, 2), (2, 0), (2, 2), (2, 4), (3, 2), (3, 4), (4, 3)] + ) + graph1 = dpr.wrappers.Graph.NetworkXGraph(g1) + graph2 = dpr.wrappers.Graph.NetworkXGraph(g2) + MultiVerify(dpr).compute("util.graph.isomorphic", graph1, graph2).assert_equal(True) From a60ca247f9213334d45fbe966a545f6260f751db Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Thu, 15 Oct 2020 17:16:47 -0500 Subject: [PATCH 3/7] Add more centrality algorithms --- docs/community_resources/existing_plugins.rst | 4 +- docs/getting_started/installation.rst | 2 +- metagraph/algorithms/centrality.py | 10 --- metagraph/algorithms/utility.py | 7 ++ metagraph/plugins/networkx/algorithms.py | 47 +++++++++-- metagraph/tests/algorithms/test_centrality.py | 83 ++++++++++++++++--- metagraph/tests/algorithms/test_subgraph.py | 65 ++++++++++++++- metagraph/tests/algorithms/test_utility.py | 39 +++++++++ 8 files changed, 223 insertions(+), 34 deletions(-) diff --git a/docs/community_resources/existing_plugins.rst b/docs/community_resources/existing_plugins.rst index ee1f2872..24ba7a50 100644 --- a/docs/community_resources/existing_plugins.rst +++ b/docs/community_resources/existing_plugins.rst @@ -16,5 +16,5 @@ plugin, see the :ref:`plugin author guide`. Plugins we know about --------------------- - - metagraph-cuda - - metagraph-igraph + - `metagraph-cuda `__ + - `metagraph-igraph `__ diff --git a/docs/getting_started/installation.rst b/docs/getting_started/installation.rst index 383b1e62..6c69fd56 100644 --- a/docs/getting_started/installation.rst +++ b/docs/getting_started/installation.rst @@ -18,7 +18,7 @@ Installing using conda :: - conda install -c conda-forge metagraph + conda install -c metagraph metagraph Installing from PyPI diff --git a/metagraph/algorithms/centrality.py b/metagraph/algorithms/centrality.py index b18d68ca..675bcceb 100644 --- a/metagraph/algorithms/centrality.py +++ b/metagraph/algorithms/centrality.py @@ -4,15 +4,6 @@ from typing import Tuple -@abstract_algorithm("centrality.degree") -def degree_centrality(graph: Graph, normalize: bool = True) -> NodeMap: - """ - Computes the degree of each node. - If scaled, the value is scaled by (# of nodes - 1) - """ - pass # pragma: no cover - - @abstract_algorithm("centrality.betweenness") def betweenness_centrality( graph: Graph(edge_type="map", edge_dtype={"int", "float"}), @@ -47,7 +38,6 @@ def pagerank( def closeness_centrality( graph: Graph(edge_type="map", edge_dtype={"int", "float"}), nodes: mg.Optional[NodeSet] = None, - normalize: bool = True, ) -> NodeMap: pass # pragma: no cover diff --git a/metagraph/algorithms/utility.py b/metagraph/algorithms/utility.py index b2d9ff88..ffe1cc35 100644 --- a/metagraph/algorithms/utility.py +++ b/metagraph/algorithms/utility.py @@ -56,6 +56,13 @@ def edgemap_from_edgeset(edgeset: EdgeSet, default_value: Any) -> EdgeMap: pass # pragma: no cover +@abstract_algorithm("util.graph.degree") +def graph_degree( + graph: Graph, in_edges: bool = False, out_edges: bool = True, +) -> NodeMap: + pass # pragma: no cover + + @abstract_algorithm("util.graph.aggregate_edges") def graph_aggregate_edges( graph: Graph(edge_type="map"), diff --git a/metagraph/plugins/networkx/algorithms.py b/metagraph/plugins/networkx/algorithms.py index 395441f6..2d4988d5 100644 --- a/metagraph/plugins/networkx/algorithms.py +++ b/metagraph/plugins/networkx/algorithms.py @@ -168,23 +168,36 @@ def nx_betweenness_centrality( @concrete_algorithm("centrality.closeness") def nx_closeness_centrality( - graph: NetworkXGraph, nodes: mg.Optional[PythonNodeSet], normalize: bool, + graph: NetworkXGraph, nodes: mg.Optional[PythonNodeSet], ) -> PythonNodeMap: - pass - - @concrete_algorithm("centrality.degree") - def nx_degree_centrality(graph: NetworkXGraph, normalize: bool,) -> PythonNodeMap: - pass + if nodes is None: + result = nx.closeness_centrality( + graph.value, distance=graph.edge_weight_label + ) + else: + result = { + node: nx.closeness_centrality( + graph.value, node, distance=graph.edge_weight_label + ) + for node in nodes.value + } + return PythonNodeMap(result) @concrete_algorithm("centrality.eigenvector") - def nx_eigenvector_centrality(graph: NetworkXGraph,) -> PythonNodeMap: - pass + def nx_eigenvector_centrality( + graph: NetworkXGraph, maxiter: bool = 100, tol: float = 1e-6 + ) -> PythonNodeMap: + result = nx.eigenvector_centrality( + graph.value, maxiter, tol, weight=graph.edge_weight_label + ) + return PythonNodeMap(result) @concrete_algorithm("centrality.hits") def nx_hits_centrality( graph: NetworkXGraph, max_iter: int, tol: float, normalize: bool, ) -> Tuple[PythonNodeMap, PythonNodeMap]: - pass + hubs, authority = nx.hits(graph.value, max_iter, tol, normalized=normalize) + return PythonNodeMap(hubs), PythonNodeMap(authority) @concrete_algorithm("traversal.bfs_iter") def nx_breadth_first_search( @@ -249,6 +262,22 @@ def nx_min_cut( ) return flow_value, cut_graph + @concrete_algorithm("util.graph.degree") + def nx_graph_degree( + graph: NetworkXGraph, in_edges: bool, out_edges: bool + ) -> PythonNodeMap: + if in_edges and out_edges: + ins = graph.value.in_degree() + outs = graph.value.out_degree() + d = {n: ins[n] + o for n, o in outs} + elif in_edges: + d = dict(graph.value.in_degree()) + elif out_edges: + d = dict(graph.value.out_degree()) + else: + d = {n: 0 for n in graph.value.nodes()} + return PythonNodeMap(d) + @concrete_algorithm("util.graph.aggregate_edges") def nx_graph_aggregate_edges( graph: NetworkXGraph, diff --git a/metagraph/tests/algorithms/test_centrality.py b/metagraph/tests/algorithms/test_centrality.py index 6dfb6850..34211591 100644 --- a/metagraph/tests/algorithms/test_centrality.py +++ b/metagraph/tests/algorithms/test_centrality.py @@ -3,13 +3,10 @@ import networkx as nx import numpy as np from . import MultiVerify +from metagraph.plugins.networkx.types import NetworkXGraph -def test_degree_centrality(default_plugin_resolver): - pytest.xfail() - - -def test_betweenness_centrality_single_hub(default_plugin_resolver): +def build_standard_graph(): r""" 0 <--2-- 1 5 --10-> 6 | ^ | ^ ^ / @@ -19,7 +16,6 @@ def test_betweenness_centrality_single_hub(default_plugin_resolver): v v / v 3 --8--> 4 <--4-- 2 --6--> 7 """ - dpr = default_plugin_resolver ebunch = [ (0, 3, 1), (1, 0, 2), @@ -35,7 +31,12 @@ def test_betweenness_centrality_single_hub(default_plugin_resolver): ] nx_graph = nx.DiGraph() nx_graph.add_weighted_edges_from(ebunch) - graph = dpr.wrappers.Graph.NetworkXGraph(nx_graph) + return NetworkXGraph(nx_graph) + + +def test_betweenness_centrality_single_hub(default_plugin_resolver): + dpr = default_plugin_resolver + graph = build_standard_graph() nodes = dpr.wrappers.NodeSet.PythonNodeSet({0, 1, 2, 3, 4, 5, 6, 7}) expected_answer_unwrapped = { 0: 1.0, @@ -175,12 +176,74 @@ def test_pagerank_centrality(default_plugin_resolver): def test_closeness_centrality(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + graph = build_standard_graph() + nodes = dpr.wrappers.NodeSet.PythonNodeSet({0, 1, 2, 3, 4, 5, 6, 7}) + expected = dpr.wrappers.NodeMap.PythonNodeMap( + { + 0: 0.051948051948051945, + 1: 0.03809523809523809, + 2: 0.02990033222591362, + 3: 0.14285714285714285, + 4: 0.08035714285714285, + 5: 0.06679035250463822, + 6: 0.04250295159386069, + 7: 0.03271028037383177, + } + ) + MultiVerify(dpr).compute("centrality.closeness", graph).assert_equal(expected) + MultiVerify(dpr).compute("centrality.closeness", graph, nodes).assert_equal( + expected + ) def test_eigenvector_centrality(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + graph = build_standard_graph() + expected = dpr.wrappers.NodeMap.PythonNodeMap( + { + 0: 3.718912841322492e-24, + 1: 4.4815545088477956e-24, + 2: 0.5668908376472616, + 3: 1.54302627577451e-24, + 4: 0.2304676227496987, + 5: 0.4988989293616064, + 6: 0.5070599863741589, + 7: 0.34570143412454807, + } + ) + MultiVerify(dpr).compute("centrality.eigenvector", graph).assert_equal( + expected, rel_tol=1e-3 + ) def test_hits_centrality(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + graph = build_standard_graph() + hubs = dpr.wrappers.NodeMap.PythonNodeMap( + { + 0: 1.0693502568464412e-135, + 1: 0.0940640958864079, + 2: 0.3219827031019462, + 3: 0.36559982252958123, + 4: 0.2183519269850825, + 5: 1.069350256846441e-11, + 6: 1.451486288792823e-06, + 7: 0.0, + } + ) + authority = dpr.wrappers.NodeMap.PythonNodeMap( + { + 0: 0.014756025909040777, + 1: 0.2007333553742929, + 2: 1.5251309332182024e-06, + 3: 1.2359669426636484e-134, + 4: 0.35256375000871987, + 5: 0.2804151003457033, + 6: 1.2359669426636479e-11, + 7: 0.15153024321895017, + } + ) + MultiVerify(dpr).compute("centrality.hits", graph, tol=1e-06).assert_equal( + (hubs, authority), rel_tol=1e-3 + ) diff --git a/metagraph/tests/algorithms/test_subgraph.py b/metagraph/tests/algorithms/test_subgraph.py index 343d41d1..63f138ed 100644 --- a/metagraph/tests/algorithms/test_subgraph.py +++ b/metagraph/tests/algorithms/test_subgraph.py @@ -4,7 +4,7 @@ from . import MultiVerify -def test_extract_graph(default_plugin_resolver): +def test_extract_subgraph(default_plugin_resolver): r""" 0 ---2-- 1 5 --10-- 6 / | | / @@ -114,7 +114,68 @@ def cmp_func(nodeset): def test_subisomorphic(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + # 0 1 2 3 4 | 5 6 7 8 0 1 2 3 4 + # 0 1 1 - 1 - | - 1 - - 2 -> 0 - - - 1 1 + # 1 - - 1 - - | - - - - 4 -> 1 - - 1 - - + # 2 1 1 - - - | 1 - 1 1 3 -> 2 1 - 1 - 1 + # 3 - 1 1 - - | - - - 1 0 -> 3 - - 1 - 1 + # 4 1 - - - - | - 1 - 1 1 -> 4 - - - 1 - + # ------------- + # 5 - - 1 - 1 - - - - + # 6 - - - - 1 1 - 1 - + # 7 - 1 - - 1 - 1 1 - + # 8 - - 1 - - 1 1 - - + big_g = nx.DiGraph() + big_g.add_edges_from( + [ + (0, 0), + (0, 1), + (0, 3), + (0, 6), + (1, 2), + (2, 0), + (2, 1), + (2, 5), + (2, 7), + (2, 8), + (3, 1), + (3, 2), + (3, 8), + (4, 0), + (4, 6), + (4, 8), + (5, 2), + (5, 4), + (6, 4), + (6, 5), + (6, 7), + (7, 1), + (7, 4), + (7, 6), + (7, 7), + (8, 2), + (8, 5), + (8, 6), + ] + ) + g1 = nx.DiGraph() + g1.add_edges_from( + [(0, 0), (0, 1), (0, 3), (1, 2), (2, 0), (2, 1), (3, 1), (3, 2), (4, 0)] + ) + g2 = nx.DiGraph() + g2.add_edges_from( + [(0, 3), (0, 4), (1, 2), (2, 0), (2, 2), (2, 4), (3, 2), (3, 4), (4, 3)] + ) + big_graph = dpr.wrappers.Graph.NetworkXGraph(big_g) + graph1 = dpr.wrappers.Graph.NetworkXGraph(g1) + graph2 = dpr.wrappers.Graph.NetworkXGraph(g2) + MultiVerify(dpr).compute("subgraph.subisomorphic", big_graph, graph1).assert_equal( + True + ) + MultiVerify(dpr).compute("subgraph.subisomorphic", big_graph, graph2).assert_equal( + True + ) def test_node_sampling(default_plugin_resolver): diff --git a/metagraph/tests/algorithms/test_utility.py b/metagraph/tests/algorithms/test_utility.py index b064a116..a74c490a 100644 --- a/metagraph/tests/algorithms/test_utility.py +++ b/metagraph/tests/algorithms/test_utility.py @@ -101,6 +101,45 @@ def test_nodemap_reduce(default_plugin_resolver): ) +def test_graph_degree(default_plugin_resolver): + dpr = default_plugin_resolver + # 0 1 2 3 4 + # 0 - 5 - - 2 + # 1 - - 2 - - + # 2 1 - 4 - - + # 3 1 - - - 7 + # 4 6 - - - - + g = nx.DiGraph() + g.add_weighted_edges_from( + [ + (0, 1, 5), + (0, 4, 2), + (1, 2, 2), + (2, 0, 1), + (2, 2, 4), + (3, 0, 1), + (3, 4, 7), + (4, 0, 6), + ] + ) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + e_out = dpr.wrappers.NodeMap.PythonNodeMap({0: 2, 1: 1, 2: 2, 3: 2, 4: 1}) + e_in = dpr.wrappers.NodeMap.PythonNodeMap({0: 3, 1: 1, 2: 2, 3: 0, 4: 2}) + e_all = dpr.wrappers.NodeMap.PythonNodeMap({0: 5, 1: 2, 2: 4, 3: 2, 4: 3}) + e_none = dpr.wrappers.NodeMap.PythonNodeMap({i: 0 for i in range(5)}) + mv = MultiVerify(dpr) + mv.compute("util.graph.degree", graph).assert_equal(e_out) + mv.compute("util.graph.degree", graph, in_edges=True, out_edges=False).assert_equal( + e_in + ) + mv.compute("util.graph.degree", graph, in_edges=True, out_edges=True).assert_equal( + e_all + ) + mv.compute( + "util.graph.degree", graph, in_edges=False, out_edges=False + ).assert_equal(e_none) + + def test_graph_aggregate_edges_directed(default_plugin_resolver): r""" 0 <--2-- 1 5 --10-> 6 From 61b69317b1db1cdee9e1cc727d6365b7b7ead6d2 Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Fri, 16 Oct 2020 12:08:48 -0500 Subject: [PATCH 4/7] Add sampling functions and tests --- metagraph/plugins/networkx/algorithms.py | 135 +++++++++++++++++++- metagraph/tests/algorithms/test_subgraph.py | 112 +++++++++++++++- 2 files changed, 238 insertions(+), 9 deletions(-) diff --git a/metagraph/plugins/networkx/algorithms.py b/metagraph/plugins/networkx/algorithms.py index 2d4988d5..71128981 100644 --- a/metagraph/plugins/networkx/algorithms.py +++ b/metagraph/plugins/networkx/algorithms.py @@ -2,6 +2,7 @@ from metagraph import concrete_algorithm, NodeID from metagraph.plugins import has_networkx, has_community, has_pandas from typing import Tuple, Any, Callable +import random if has_networkx: @@ -339,11 +340,61 @@ def nx_greedy_coloring(graph: NetworkXGraph) -> Tuple[PythonNodeMap, int]: @concrete_algorithm("subgraph.sample.node_sampling") def nx_node_sampling(graph: NetworkXGraph, p: float) -> NetworkXGraph: - pass # pragma: no cover + if p <= 0 or p > 1: + raise ValueError(f"Probability `p` must be between 0 and 1, found {p}") + aprops = NetworkXGraph.Type.compute_abstract_properties( + graph, {"node_type", "edge_type"} + ) + g = graph.value + ns = set(n for n in g.nodes() if random.random() < p) + subgraph = type(g)() + if aprops["node_type"] == "map": + for n in ns: + subgraph.add_node(n, **g.nodes[n]) + else: + subgraph.add_nodes_from(ns) + # Add edges which exist between selected nodes + if aprops["edge_type"] == "map": + for n in ns: + for nbr in set(g.neighbors(n)) & ns: + subgraph.add_edge(n, nbr, **g[n][nbr]) + else: + for n in ns: + for nbr in set(g.neighbors(n)) & ns: + subgraph.add_edge(n, nbr) + return NetworkXGraph( + subgraph, + node_weight_label=graph.node_weight_label, + edge_weight_label=graph.edge_weight_label, + ) @concrete_algorithm("subgraph.sample.edge_sampling") def nx_edge_sampling(graph: NetworkXGraph, p: float) -> NetworkXGraph: - pass # pragma: no cover + if p <= 0 or p > 1: + raise ValueError(f"Probability `p` must be between 0 and 1, found {p}") + aprops = NetworkXGraph.Type.compute_abstract_properties( + graph, {"node_type", "edge_type"} + ) + g = graph.value + es = set(e for e in g.edges() if random.random() < p) + ns = set(src for src, dst in es) | set(dst for src, dst in es) + subgraph = type(g)() + if aprops["node_type"] == "map": + for n in ns: + subgraph.add_node(n, **g.nodes[n]) + else: + subgraph.add_nodes_from(ns) + # Add edges which exist between selected nodes + if aprops["edge_type"] == "map": + for e in es: + subgraph.add_edge(*e, **g.edges[e]) + else: + subgraph.add_edges_from(es) + return NetworkXGraph( + subgraph, + node_weight_label=graph.node_weight_label, + edge_weight_label=graph.edge_weight_label, + ) @concrete_algorithm("subgraph.sample.ties") def nx_ties(graph: NetworkXGraph, p: float) -> NetworkXGraph: @@ -351,7 +402,34 @@ def nx_ties(graph: NetworkXGraph, p: float) -> NetworkXGraph: Totally Induced Edge Sampling method https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=2743&context=cstech """ - pass # pragma: no cover + if p <= 0 or p > 1: + raise ValueError(f"Probability `p` must be between 0 and 1, found {p}") + aprops = NetworkXGraph.Type.compute_abstract_properties( + graph, {"node_type", "edge_type"} + ) + g = graph.value + es = set(e for e in g.edges() if random.random() < p) + ns = set(src for src, dst in es) | set(dst for src, dst in es) + subgraph = type(g)() + if aprops["node_type"] == "map": + for n in ns: + subgraph.add_node(n, **g.nodes[n]) + else: + subgraph.add_nodes_from(ns) + # Add edges which exist between selected nodes + if aprops["edge_type"] == "map": + for n in ns: + for nbr in set(g.neighbors(n)) & ns: + subgraph.add_edge(n, nbr, **g[n][nbr]) + else: + for n in ns: + for nbr in set(g.neighbors(n)) & ns: + subgraph.add_edge(n, nbr) + return NetworkXGraph( + subgraph, + node_weight_label=graph.node_weight_label, + edge_weight_label=graph.edge_weight_label, + ) @concrete_algorithm("subgraph.sample.random_walk") def nx_random_walk_sampling( @@ -370,7 +448,56 @@ def nx_random_walk_sampling( When resetting the walk, if start_node is specified, always reset to this node. If not specified, every reset picks a new node in the graph at random. """ - pass # pragma: no cover + if jump_probability <= 0 or jump_probability > 1: + raise ValueError( + f"`jump_probability` must be between 0 and 1, found {jump_probability}" + ) + if num_steps is None and num_nodes is None and num_edges is None: + raise ValueError( + "Must specify at least one of num_steps, num_nodes, or num_edges" + ) + + g = graph.value + out_g = type(g)() + if start_node is None: + possible_nodes = list(g.nodes()) + current_node = random.choice(possible_nodes) + else: + current_node = start_node + if len(g[current_node]) == 0: + # We always start with this, and there are no out edges, so we will never finish + out_g.add_node(current_node, **g.nodes[current_node]) + return NetworkXGraph(out_g) + + out_g.add_node(current_node, **g.nodes[current_node]) + + counter = 0 + while True: + counter += 1 + + nbrs = list(g[current_node]) + if len(nbrs) > 0 and random.random() >= jump_probability: + prev_node = current_node + current_node = random.choice(nbrs) + out_g.add_node(current_node, **g.nodes[current_node]) + out_g.add_edge( + prev_node, current_node, **g.edges[prev_node, current_node] + ) + else: + # Reset + if start_node is None: + current_node = random.choice(possible_nodes) + else: + current_node = start_node + + if num_steps is not None and counter >= num_steps: + break + if num_nodes is not None and len(out_g) >= num_nodes: + break + if num_edges is not None and out_g.size() >= num_edges: + break + + return NetworkXGraph(out_g) if has_networkx and has_community: diff --git a/metagraph/tests/algorithms/test_subgraph.py b/metagraph/tests/algorithms/test_subgraph.py index 63f138ed..6496f6c1 100644 --- a/metagraph/tests/algorithms/test_subgraph.py +++ b/metagraph/tests/algorithms/test_subgraph.py @@ -179,16 +179,118 @@ def test_subisomorphic(default_plugin_resolver): def test_node_sampling(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + # Build a complete graph, then add a bunch of disconnected nodes + # Node Sampling should pick some of the disconnected nodes for the subgraph + g = nx.complete_graph(25) + g.add_nodes_from(range(25, 50)) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(subgraph): + subg = subgraph.value + assert 0 < len(subg.nodes()) < len(g.nodes()), f"# nodes = {len(subg.nodes())}" + # Verify some of the isolated nodes were chosen + assert subg.nodes() & set(range(25, 50)), f"no isolated nodes found in subgraph" + # Verify edges from complete portion of the graph were added + complete_nodes = subg.nodes() & set(range(25)) + assert len(complete_nodes) > 0, f"no complete nodes found in subgraph" + for n in complete_nodes: + assert ( + len(subg[n]) == len(complete_nodes) - 1 + ) # definition of complete graph + + results = MultiVerify(dpr).compute("subgraph.sample.node_sampling", graph, 0.4) + results.normalize(dpr.wrappers.Graph.NetworkXGraph).custom_compare(cmp_func) def test_edge_sampling(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + # Build a complete graph, then add a bunch of disconnected nodes + # Edge Sampling should not pick any of the disconnected nodes for the subgraph + # For the nodes attached to chosen edges, additional edges should not be added to the subgraph + g = nx.complete_graph(25) + g.add_nodes_from(range(25, 50)) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(subgraph): + subg = subgraph.value + assert 0 < len(subg.nodes()) < len(g.nodes()), f"# nodes = {len(subg.nodes())}" + # Verify none of the isolated nodes were chosen + assert not subg.nodes() & set( + range(25, 50) + ), f"isolated nodes found in subgraph" + # Verify not all edges from complete portion of the graph were added + possible_edges = len(subg.nodes()) - 1 + for n in subg.nodes(): + assert len(subg[n]) < possible_edges, f"all possible edges were added" + + results = MultiVerify(dpr).compute("subgraph.sample.edge_sampling", graph, 0.4) + results.normalize(dpr.wrappers.Graph.NetworkXGraph).custom_compare(cmp_func) def test_totally_induced_edge_sampling(default_plugin_resolver): - pytest.xfail() + dpr = default_plugin_resolver + # Build a complete graph, then add a bunch of disconnected nodes + # TIES should not pick any of the disconnected nodes for the subgraph + # For the nodes attached to chosen edges, all additional edges should be added to the subgraph + g = nx.complete_graph(25) + g.add_nodes_from(range(25, 50)) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(subgraph): + subg = subgraph.value + assert 0 < len(subg.nodes()) < len(g.nodes()), f"# nodes = {len(subg.nodes())}" + # Verify none of the isolated nodes were chosen + assert not subg.nodes() & set( + range(25, 50) + ), f"isolated nodes found in subgraph" + # Verify all edges from complete portion of the graph were added + possible_edges = len(subg.nodes()) - 1 + for n in subg.nodes(): + assert len(subg[n]) == possible_edges, f"not all possible edges were added" + + results = MultiVerify(dpr).compute("subgraph.sample.ties", graph, 0.4) + results.normalize(dpr.wrappers.Graph.NetworkXGraph).custom_compare(cmp_func) -def test_random_walk_sampling(default_plugin_resolver): - pytest.xfail() +def test_random_walk_sampling_1(default_plugin_resolver): + dpr = default_plugin_resolver + # Build a long chain so random sampling has no randomness + g = nx.Graph() + for i in range(50): + g.add_edge(i, i + 1) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(subgraph): + subg = subgraph.value + assert set(subg.nodes()) == set(range(21)) + + results = MultiVerify(dpr).compute( + "subgraph.sample.random_walk", + graph, + num_edges=20, + start_node=0, + jump_probability=0.015, + ) + results.normalize(dpr.wrappers.Graph.NetworkXGraph).custom_compare(cmp_func) + + +def test_random_walk_sampling_2(default_plugin_resolver): + dpr = default_plugin_resolver + # Build two disconnected components. Randomly sampling should never leave the starting component. + # Keep going until all nodes in the starting component have been visited + g1 = nx.complete_graph(7) + g2 = nx.complete_graph(range(10, 17)) + g = nx.Graph() + g.update(g1) + g.update(g2) + graph = dpr.wrappers.Graph.NetworkXGraph(g) + + def cmp_func(subgraph): + subg = subgraph.value + assert set(subg.nodes()) == set(range(10, 17)) + + results = MultiVerify(dpr).compute( + "subgraph.sample.random_walk", graph, num_nodes=7, start_node=12 + ) + results.normalize(dpr.wrappers.Graph.NetworkXGraph).custom_compare(cmp_func) From 456eb50f0ff4836503d33e102ae2e8a42f7db10a Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Fri, 16 Oct 2020 15:18:20 -0500 Subject: [PATCH 5/7] Update docs with new algorithms and types --- docs/user_guide/algorithm_list.rst | 100 ++++++++++++++++++- docs/user_guide/type_list.rst | 24 +++++ metagraph/tests/algorithms/test_embedding.py | 11 +- 3 files changed, 125 insertions(+), 10 deletions(-) diff --git a/docs/user_guide/algorithm_list.rst b/docs/user_guide/algorithm_list.rst index 25d430fa..e741514e 100644 --- a/docs/user_guide/algorithm_list.rst +++ b/docs/user_guide/algorithm_list.rst @@ -47,6 +47,14 @@ Graphs often have natural structure which can be discovered, allowing them to be This algorithms returns the total number of triangles in the graph. +.. py:function:: clustering.coloring.greedy(graph: Graph(is_directed=False)) -> Tuple[NodeMap, int] + + Attempts to find the minimum number of colors required to label the graph such that no connected nodes have the + same color. Color is represented as a value from 0..n. + + :rtype: (color for each node, number of unique colors) + + Traversal --------- @@ -123,6 +131,22 @@ Many algorithms assign a ranking or value to each vertex/node in the graph based This algorithm determines the importance of a given node in the network based on links between important nodes. +.. py:function:: centrality.closeness(graph: Graph(edge_type="map", edge_dtype={"int", "float"}), nodes: Optional[NodeSet] = None) -> NodeMap + + Calculates the closeness centrality metric, which estimates the average distance from a node to all other nodes. + A high closeness score indicates a small average distance to other nodes. + +.. py:function:: centrality.eigenvector(graph: Graph(edge_type="map", edge_dtype={"int", "float"})) -> NodeMap + + Calculates the eigenvector centrality, which estimates the importance of a node in the graph. + +.. py:function:: centrality.hits(graph: Graph(edge_type="map", edge_dtype={"int", "float"}), max_iter: int = 100, tol: float = 1e-05, normalize: bool = True) -> Tuple[NodeMap, NodeMap] + + Hyperlink-Induced Topic Search (HITS) centrality ranks nodes based on incoming and outgoing edges. + + :rtype: (hubs, authority) + + Subgraph -------- @@ -139,6 +163,49 @@ Graphs are often too large to handle, so a portion of the graph is extracted. Of This algorithm finds a maximal subgraph that contains nodes of at least degree *k*. +.. py:function:: subgraph.k_truss(graph: Graph(is_directed=False), k: int) -> Graph + + Finds the subgraph of edges with are part of k-2 triangles. + + +.. py:function:: subgraph.maximal_independent_set(graph: Graph) -> NodeSet + + Finds a maximal set of independent nodes, meaning the nodes in the set share no edges with each other + and no additional nodes in the graph can be added which satisfy this criteria. + + +.. py:function:: subgraph.subisomorphic(graph: Graph, subgraph: Graph) -> bool + + Indicates whether subgraph is an isomorphic subcomponent of graph. + + +.. py:function:: subgraph.sample.node_sampling(graph: Graph, p: float = 0.20) -> Graph + + Returns a subgraph created by randomly sampling nodes and including edges which exist between sampled + nodes in the original graph. + + +.. py:function:: subgraph.sample.edge_sampling(graph: Graph, p: float = 0.20) -> Graph + + Returns a subgraph created by randomly sampling edges and including both node endpoints. + + +.. py:function:: subgraph.sample.ties(graph: Graph, p: float = 0.20) -> Graph + + Totally Induced Edge Sampling extends edge sampling by also including any edges between the nodes + which exist in the original graph. See the `paper `__ + for more details. + + +.. py:function:: subgraph.sample.random_walk(graph: Graph, num_steps: Optional[int] = None, num_nodes: Optional[int] = None, num_edges: Optional[int] = None, jump_probability: int = 0.15, start_node: Optional[NodeID] = None) -> Graph + + Samples the graph using a random walk. For each step, there is a jump_probability to reset the walk. + When resetting the walk, if the start_node is specified, it always returns to this node. Otherwise a random + node is chosen for each resetting. The sampling stops when any of num_steps, num_nodes, or num_edges is + reached. + + + Bipartite --------- @@ -158,7 +225,16 @@ Algorithms pertaining to the flow capacity of edges. Compute the maximum flow possible from source_node to target_node - :rtype: (max_flow_rate, compute_flow_graph) + :rtype: (max flow rate, computed flow graph) + + +.. py:function:: flow.min_cut(graph: Graph(edge_type="map", edge_dtype={"int", "float"}), source_node: NodeID, target_node: NodeID) -> Tuple[float, Graph] + + Compute the minimum cut to separate source from target node. This is the list of edges which disconnect the graph + along edges with sum to the minimum weight. + Performing this computation yields the maximum flow. + + :rtype: (max flow rate, graph containing cut edges) Utility @@ -198,6 +274,10 @@ These algorithms are small utility functions which perform common operations nee Converts and EdgeSet into an EdgeMap by giving each edge a default value. +.. py:function:: util.graph.degree(graph: Graph, in_edges: bool = False, out_edges: bool = True) -> NodeMap + + Computes the degree of each node. ``in_edges`` and ``out_edges`` can be used to control which degree is computed. + .. py:function:: util.graph.aggregate_edges(graph: Graph(edge_type="map"), func: Callable[[Any, Any], Any]), initial_value: Any, in_edges: bool = False, out_edges: bool = True) -> NodeMap Aggregates the edge weights around a node, returning a single value per node. @@ -225,3 +305,21 @@ These algorithms are small utility functions which perform common operations nee Collapse a Graph into a smaller Graph by combining clusters of nodes into a single node. ``labels`` indicates the node groupings. ``aggregator`` indicates how to combine edge weights. + +.. py:function:: util.graph.isomorphic(g1: Graph, g2: Graph) -> bool + + Indicates whether ``g1`` and ``g2`` are isomorphic. + +.. py:function:: util.node_embedding.apply(embedding: NodeEmbedding, nodes: Vector) -> Matrix + + Returns a dense matrix given an embedding and a vector of NodeIDs. + + +Embedding +--------- + +Embeddings convert graph nodes or whole graphs into a dense vector representations. + +.. py:function:: embedding.train.node2vec(graph: Graph, p: float, q: float, walks_per_node: int, walk_length: int, embedding_size: int, epochs: int, learning_rate: float) -> NodeEmbedding + + Computes the node2vec embedding. diff --git a/docs/user_guide/type_list.rst b/docs/user_guide/type_list.rst index ce915b82..c3873cfe 100644 --- a/docs/user_guide/type_list.rst +++ b/docs/user_guide/type_list.rst @@ -449,3 +449,27 @@ If any node has a weight, all nodes must have a weight. This includes nodes from both node sets 0 and 1. If any edge has a weight, all edges must have a weight. + + +NodeEmbedding +------------- + +Holds an embedding for each node, extracted from a graph. +Conceptually, this can be thought of as a dense matrix with each row applying to a single NodeID. + +Abstract Properties: + +- matrix_dtype: ["str", "float", "int", "bool"] + +→ NumpyNodeEmbedding +~~~~~~~~~~~~~~~~~~~~ + +:ConcreteType: ``NumpyNodeEmbedding.Type`` +:value_type: ``NumpyNodeEmbedding`` +:data objects: + ``.matrix``: ``NumpyMatrix`` + + ``.nodes``: optional ``NumpyNodeMap`` + +If ``nodes`` is None, the nodes are assumed to be fully sequential, corresponding to the height +of the matrix. diff --git a/metagraph/tests/algorithms/test_embedding.py b/metagraph/tests/algorithms/test_embedding.py index 61bbe472..298abad4 100644 --- a/metagraph/tests/algorithms/test_embedding.py +++ b/metagraph/tests/algorithms/test_embedding.py @@ -7,7 +7,6 @@ def test_node2vec(default_plugin_resolver): dpr = default_plugin_resolver - # make uneven barbell graph a_nodes = np.arange(10) b_nodes = np.arange(80, 100) @@ -18,16 +17,14 @@ def test_node2vec(default_plugin_resolver): nx_graph.add_edge(node, node + 1) nx_graph.add_edge(50, 80) # have non-consecutive node ids graph = dpr.wrappers.Graph.NetworkXGraph(nx_graph) - mv = MultiVerify(dpr) - p = 1.0 q = 0.5 walks_per_node = 8 walk_length = 8 embedding_size = 25 - epochs = 10_000 - learning_rate = 1e-3 + epochs = 2 + learning_rate = 5e-2 embedding = mv.compute( "embedding.train.node2vec", graph, @@ -39,7 +36,6 @@ def test_node2vec(default_plugin_resolver): epochs=epochs, learning_rate=learning_rate, ) - euclidean_dist = lambda a, b: np.linalg.norm(a - b) def cmp_func(embedding): @@ -48,16 +44,13 @@ def cmp_func(embedding): np_matrix = embedding.matrix.as_dense(copy=False) a_centroid = np_matrix[a_indices].mean(0) b_centroid = np_matrix[b_indices].mean(0) - for a_index in a_indices: for b_index in b_indices: a_vector = np_matrix[a_index] b_vector = np_matrix[b_index] - a_to_a_center = euclidean_dist(a_vector, a_centroid) b_to_b_center = euclidean_dist(b_vector, b_centroid) a_to_b = euclidean_dist(a_vector, b_vector) - assert a_to_a_center < a_to_b assert b_to_b_center < a_to_b From 362f4a7f13e486d8c538ff3d97b18ce3614b61d2 Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Fri, 16 Oct 2020 18:33:44 -0500 Subject: [PATCH 6/7] Bugfix for translator --- metagraph/plugins/scipy/translators.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metagraph/plugins/scipy/translators.py b/metagraph/plugins/scipy/translators.py index e972713d..c9f81e23 100644 --- a/metagraph/plugins/scipy/translators.py +++ b/metagraph/plugins/scipy/translators.py @@ -104,7 +104,9 @@ def edgemap_from_graphblas(x: GrblasEdgeMap, **props) -> ScipyEdgeMap: gm = x.value[active_nodes, active_nodes].new() rows, cols, vals = gm.to_values() sm = ss.coo_matrix( - (vals, (rows, cols)), dtype=dtype_grblas_to_mg[x.value.dtype.name] + (vals, (rows, cols)), + dtype=dtype_grblas_to_mg[x.value.dtype.name], + shape=gm.shape, ) return ScipyEdgeMap(sm, node_list=active_nodes) From f1718ffe2f5098dc8ab4aaff865dd22e6ae5e738 Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Thu, 22 Oct 2020 15:07:34 -0500 Subject: [PATCH 7/7] Updates based on PR feedback --- docs/user_guide/algorithm_list.rst | 20 +++++++++---------- metagraph/algorithms/centrality.py | 6 ++++-- metagraph/plugins/networkx/algorithms.py | 8 ++++---- metagraph/tests/algorithms/test_centrality.py | 12 +++++------ 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/user_guide/algorithm_list.rst b/docs/user_guide/algorithm_list.rst index e741514e..1cd92e56 100644 --- a/docs/user_guide/algorithm_list.rst +++ b/docs/user_guide/algorithm_list.rst @@ -12,7 +12,7 @@ Graphs often have natural structure which can be discovered, allowing them to be .. py:function:: clustering.connected_components(graph: Graph(is_directed=False)) -> NodeMap - The connected components algorithm groups nodes of an **undirected** graph into subgraphs where all subgraph nodes + The connected components algorithm groups nodes of an undirected graph into subgraphs where all subgraph nodes are reachable within a component. :rtype: a dense NodeMap where each node is assigned an integer indicating the component. @@ -160,12 +160,12 @@ Graphs are often too large to handle, so a portion of the graph is extracted. Of .. py:function:: subgraph.k_core(graph: Graph(is_directed=False), k: int) -> Graph - This algorithm finds a maximal subgraph that contains nodes of at least degree *k*. + This algorithm finds a maximal subgraph that contains nodes of at least degree ``k``. .. py:function:: subgraph.k_truss(graph: Graph(is_directed=False), k: int) -> Graph - Finds the subgraph of edges with are part of k-2 triangles. + Finds the maximal subgraph whose edges are supported by ``k`` - 2 other edges forming triangles. .. py:function:: subgraph.maximal_independent_set(graph: Graph) -> NodeSet @@ -176,7 +176,7 @@ Graphs are often too large to handle, so a portion of the graph is extracted. Of .. py:function:: subgraph.subisomorphic(graph: Graph, subgraph: Graph) -> bool - Indicates whether subgraph is an isomorphic subcomponent of graph. + Indicates whether ``subgraph`` is an isomorphic subcomponent of ``graph``. .. py:function:: subgraph.sample.node_sampling(graph: Graph, p: float = 0.20) -> Graph @@ -199,9 +199,9 @@ Graphs are often too large to handle, so a portion of the graph is extracted. Of .. py:function:: subgraph.sample.random_walk(graph: Graph, num_steps: Optional[int] = None, num_nodes: Optional[int] = None, num_edges: Optional[int] = None, jump_probability: int = 0.15, start_node: Optional[NodeID] = None) -> Graph - Samples the graph using a random walk. For each step, there is a jump_probability to reset the walk. - When resetting the walk, if the start_node is specified, it always returns to this node. Otherwise a random - node is chosen for each resetting. The sampling stops when any of num_steps, num_nodes, or num_edges is + Samples the graph using a random walk. For each step, there is a ``jump_probability`` to reset the walk. + When resetting the walk, if the ``start_node`` is specified, it always returns to this node. Otherwise a random + node is chosen for each resetting. The sampling stops when any of ``num_steps``, ``num_nodes``, or ``num_edges`` is reached. @@ -223,7 +223,7 @@ Algorithms pertaining to the flow capacity of edges. .. py:function:: flow.max_flow(graph: Graph(edge_type="map", edge_dtype={"int", "float"}), source_node: NodeID, target_node: NodeID) -> Tuple[float, Graph] - Compute the maximum flow possible from source_node to target_node + Compute the maximum flow possible from ``source_node`` to ``target_node``. :rtype: (max flow rate, computed flow graph) @@ -244,7 +244,7 @@ These algorithms are small utility functions which perform common operations nee .. py:function:: util.nodeset.choose_random(x: NodeSet, k: int) -> NodeSet - Given a set of nodes, choose k random nodes (no duplicates). + Given a set of nodes, choose ``k`` random nodes (no duplicates). .. py:function:: util.nodeset.from_vector(x: Vector) -> NodeSet @@ -322,4 +322,4 @@ Embeddings convert graph nodes or whole graphs into a dense vector representatio .. py:function:: embedding.train.node2vec(graph: Graph, p: float, q: float, walks_per_node: int, walk_length: int, embedding_size: int, epochs: int, learning_rate: float) -> NodeEmbedding - Computes the node2vec embedding. + Computes the `node2vec `__ embedding. diff --git a/metagraph/algorithms/centrality.py b/metagraph/algorithms/centrality.py index 675bcceb..8515ebd5 100644 --- a/metagraph/algorithms/centrality.py +++ b/metagraph/algorithms/centrality.py @@ -45,6 +45,8 @@ def closeness_centrality( @abstract_algorithm("centrality.eigenvector") def eigenvector_centrality( graph: Graph(edge_type="map", edge_dtype={"int", "float"}), + maxiter: int = 50, + tolerance: float = 1e-05, ) -> NodeMap: pass # pragma: no cover @@ -52,8 +54,8 @@ def eigenvector_centrality( @abstract_algorithm("centrality.hits") def hits_centrality( graph: Graph(edge_type="map", edge_dtype={"int", "float"}, is_directed=True), - max_iter: int = 100, - tol: float = 1e-05, + maxiter: int = 50, + tolerance: float = 1e-05, normalize: bool = True, ) -> Tuple[NodeMap, NodeMap]: """Return (hubs, authority)""" diff --git a/metagraph/plugins/networkx/algorithms.py b/metagraph/plugins/networkx/algorithms.py index 71128981..5771b447 100644 --- a/metagraph/plugins/networkx/algorithms.py +++ b/metagraph/plugins/networkx/algorithms.py @@ -186,18 +186,18 @@ def nx_closeness_centrality( @concrete_algorithm("centrality.eigenvector") def nx_eigenvector_centrality( - graph: NetworkXGraph, maxiter: bool = 100, tol: float = 1e-6 + graph: NetworkXGraph, maxiter: int, tolerance: float ) -> PythonNodeMap: result = nx.eigenvector_centrality( - graph.value, maxiter, tol, weight=graph.edge_weight_label + graph.value, maxiter, tolerance, weight=graph.edge_weight_label ) return PythonNodeMap(result) @concrete_algorithm("centrality.hits") def nx_hits_centrality( - graph: NetworkXGraph, max_iter: int, tol: float, normalize: bool, + graph: NetworkXGraph, maxiter: int, tolerance: float, normalize: bool, ) -> Tuple[PythonNodeMap, PythonNodeMap]: - hubs, authority = nx.hits(graph.value, max_iter, tol, normalized=normalize) + hubs, authority = nx.hits(graph.value, maxiter, tolerance, normalized=normalize) return PythonNodeMap(hubs), PythonNodeMap(authority) @concrete_algorithm("traversal.bfs_iter") diff --git a/metagraph/tests/algorithms/test_centrality.py b/metagraph/tests/algorithms/test_centrality.py index 34211591..b76f4165 100644 --- a/metagraph/tests/algorithms/test_centrality.py +++ b/metagraph/tests/algorithms/test_centrality.py @@ -212,9 +212,9 @@ def test_eigenvector_centrality(default_plugin_resolver): 7: 0.34570143412454807, } ) - MultiVerify(dpr).compute("centrality.eigenvector", graph).assert_equal( - expected, rel_tol=1e-3 - ) + MultiVerify(dpr).compute( + "centrality.eigenvector", graph, tolerance=1e-06 + ).assert_equal(expected, rel_tol=1e-3) def test_hits_centrality(default_plugin_resolver): @@ -244,6 +244,6 @@ def test_hits_centrality(default_plugin_resolver): 7: 0.15153024321895017, } ) - MultiVerify(dpr).compute("centrality.hits", graph, tol=1e-06).assert_equal( - (hubs, authority), rel_tol=1e-3 - ) + MultiVerify(dpr).compute( + "centrality.hits", graph, maxiter=100, tolerance=1e-06 + ).assert_equal((hubs, authority), rel_tol=1e-3)