diff --git a/doc/source/reference/release_2.0.rst b/doc/source/reference/release_2.0.rst index af30de5decc..ff499e5704b 100644 --- a/doc/source/reference/release_2.0.rst +++ b/doc/source/reference/release_2.0.rst @@ -115,3 +115,8 @@ API changes Most of the shortest_path algorithms now raise a NodeNotFound exception when a source or a target are not present in the graph. +* [`#2420 `_] + New community detection algorithm provided. Fluid Communities is an asynchronous + algorithm based on the simple idea of fluids interacting in an environment, + expanding and pushing each other. The algorithm is completly described in + [`https://arxiv.org/pdf/1703.09307.pdf `_]. diff --git a/networkx/algorithms/community/__init__.py b/networkx/algorithms/community/__init__.py index 1d0fbbdf1d1..40da8b2b5bf 100644 --- a/networkx/algorithms/community/__init__.py +++ b/networkx/algorithms/community/__init__.py @@ -1,4 +1,5 @@ from networkx.algorithms.community.asyn_lpa import * +from networkx.algorithms.community.asyn_fluidc import * from networkx.algorithms.community.centrality import * from networkx.algorithms.community.community_generators import * from networkx.algorithms.community.kclique import * diff --git a/networkx/algorithms/community/asyn_fluidc.py b/networkx/algorithms/community/asyn_fluidc.py new file mode 100644 index 00000000000..93da2d568f7 --- /dev/null +++ b/networkx/algorithms/community/asyn_fluidc.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2017 +# All rights reserved. +# BSD license. +# Author: Ferran Parés +"""Asynchronous Fluid Communities algorithm for community detection.""" + +from collections import Counter +import random +from networkx.exception import NetworkXError +from networkx.algorithms.components import is_connected +from networkx.utils import groups +from networkx.utils.decorators import not_implemented_for + +__all__ = ['asyn_fluidc'] + + +@not_implemented_for('directed', 'multigraph') +def asyn_fluidc(G, k, max_iter=100): + """Returns communities in `G` as detected by Fluid Communities algorithm. + + The asynchronous fluid communities algorithm is described in + [1]. The algorithm is based on the simple idea of fluids interacting + in an environment, expanding and pushing each other. It's initialization is + random, so found communities may vary on different executions. + + The algorithm proceeds as follows. First each of the initial k communities + is initialized in a random vertex in the graph. Then the algorithm iterates + over all vertices in a random order, updating the community of each vertex + based on its own community and the communities of its neighbours. This + process is performed several times until convergence. + At all times, each community has a total density of 1, which is equally + distributed among the vertices it contains. If a vertex changes of + community, vertex densities of affected communities are adjusted + immediately. When a complete iteration over all vertices is done, such that + no vertex changes the community it belongs to, the algorithm has converged + and returns. + + This is the original version of the algorithm described in [1]. + Unfortunately, it does not support weighted graphs yet. + + Parameters + ---------- + G : Graph + + k : integer + The number of communities to be found. + + max_iter : integer + The number of maximum iterations allowed. By default 15. + + Returns + ------- + communities : iterable + Iterable of communities given as sets of nodes. + + Notes + ------ + k variable is not an optional argument. + + References + ---------- + .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A + Competitive and Highly Scalable Community Detection Algorithm". + [https://arxiv.org/pdf/1703.09307.pdf]. + """ + # Initial checks + if not isinstance(k, int): + raise NetworkXError("k muts be an integer.") + if not k>0: + raise NetworkXError("k muts be greater than 0.") + if not is_connected(G): + raise NetworkXError("Fluid Communities can only be run on connected\ + Graphs.") + if len(G) < k: + raise NetworkXError("k must be greater than graph size.") + # Initialization + max_density = 1.0 + vertices = list(G) + random.shuffle(vertices) + communities = {n: i for i, n in enumerate(vertices[:k])} + density = {} + com_to_numvertices = {} + for vertex in communities.keys(): + com_to_numvertices[communities[vertex]] = 1 + density[communities[vertex]] = max_density + # Set up control variables and start iterating + iter_count = 0 + cont = True + while cont: + cont = False + iter_count += 1 + # Loop over all vertices in graph in a random order + vertices = list(G) + random.shuffle(vertices) + for vertex in vertices: + # Updating rule + com_counter = Counter() + # Take into account self vertex community + try: + com_counter.update({communities[vertex]: \ + density[communities[vertex]]}) + except KeyError: + pass + # Gather neighbour vertex communities + for v in G[vertex]: + try: + com_counter.update({communities[v]: \ + density[communities[v]]}) + except KeyError: + continue + # Check which is the community with highest density + new_com = -1 + if len(com_counter.keys()) > 0: + max_freq = max(com_counter.values()) + best_communities = [com for com, freq in com_counter.items() + if (max_freq - freq) < 0.0001] + # If actual vertex com in best communities, it is preserved + try: + if communities[vertex] in best_communities: + new_com = communities[vertex] + except KeyError: + pass + # If vertex community changes... + if new_com == -1: + # Set flag of non-convergence + cont = True + # Randomly chose a new community from candidates + new_com = random.choice(best_communities) + # Update previous community status + try: + com_to_numvertices[communities[vertex]] -= 1 + density[communities[vertex]] = max_density / \ + com_to_numvertices[communities[vertex]] + except KeyError: + pass + # Update new community status + communities[vertex] = new_com + com_to_numvertices[communities[vertex]] += 1 + density[communities[vertex]] = max_density / \ + com_to_numvertices[communities[vertex]] + # If maximum iterations reached --> output actual results + if iter_count > max_iter: + break + # Return results by grouping communities as list of vertices + return iter(groups(communities).values()) diff --git a/networkx/algorithms/community/tests/test_asyn_fluidc.py b/networkx/algorithms/community/tests/test_asyn_fluidc.py new file mode 100644 index 00000000000..6482856908e --- /dev/null +++ b/networkx/algorithms/community/tests/test_asyn_fluidc.py @@ -0,0 +1,120 @@ +from nose.tools import assert_equal, assert_in +from networkx import Graph +from networkx.algorithms.community.asyn_fluidc import * +import random + +def test_single_node(): + test = Graph() + + test.add_node('a') + + # ground truth + ground_truth = set([frozenset(['a'])]) + + communities = asyn_fluidc(test, 1) + result = {frozenset(c) for c in communities} + assert_equal(result, ground_truth) + + +def test_two_nodes(): + test = Graph() + + test.add_edge('a', 'b') + + # ground truth + ground_truth = set([frozenset(['a']), frozenset(['b'])]) + + communities = asyn_fluidc(test, 2) + result = {frozenset(c) for c in communities} + assert_equal(result, ground_truth) + + +def test_two_clique_communities(): + random.seed(7) + test = Graph() + + # c1 + test.add_edge('a', 'b') + test.add_edge('a', 'c') + test.add_edge('b', 'c') + + # connection + test.add_edge('c', 'd') + + # c2 + test.add_edge('d', 'e') + test.add_edge('d', 'f') + test.add_edge('f', 'e') + + # ground truth + ground_truth = set([frozenset(['a', 'c', 'b']), + frozenset(['e', 'd', 'f'])]) + + communities = asyn_fluidc(test, 2) + result = {frozenset(c) for c in communities} + assert_equal(result, ground_truth) + + +def five_clique_ring(): + """Not auto-tested (not named test_...) due to cross-version seed issues""" + random.seed(9) + test = Graph() + + # c1 + test.add_edge('1a', '1b') + test.add_edge('1a', '1c') + test.add_edge('1a', '1d') + test.add_edge('1b', '1c') + test.add_edge('1b', '1d') + test.add_edge('1c', '1d') + + # c2 + test.add_edge('2a', '2b') + test.add_edge('2a', '2c') + test.add_edge('2a', '2d') + test.add_edge('2b', '2c') + test.add_edge('2b', '2d') + test.add_edge('2c', '2d') + + # c3 + test.add_edge('3a', '3b') + test.add_edge('3a', '3c') + test.add_edge('3a', '3d') + test.add_edge('3b', '3c') + test.add_edge('3b', '3d') + test.add_edge('3c', '3d') + + # c4 + test.add_edge('4a', '4b') + test.add_edge('4a', '4c') + test.add_edge('4a', '4d') + test.add_edge('4b', '4c') + test.add_edge('4b', '4d') + test.add_edge('4c', '4d') + + # c5 + test.add_edge('5a', '5b') + test.add_edge('5a', '5c') + test.add_edge('5a', '5d') + test.add_edge('5b', '5c') + test.add_edge('5b', '5d') + test.add_edge('5c', '5d') + + # connections + test.add_edge('1a', '2c') + test.add_edge('2a', '3c') + test.add_edge('3a', '4c') + test.add_edge('4a', '5c') + test.add_edge('5a', '1c') + + # ground truth + ground_truth = set([frozenset(['1a', '1b', '1c', '1d']), + frozenset(['2a', '2b', '2c', '2d']), + frozenset(['3a', '3b', '3c', '3d']), + frozenset(['4a', '4b', '4c', '4d']), + frozenset(['5a', '5b', '5c', '5d'])]) + + communities = asyn_fluidc(test, 5) + result = {frozenset(c) for c in communities} + assert_equal(result, ground_truth) +