Skip to content

Commit

Permalink
Fluid communities algorithm implementation (#2420)
Browse files Browse the repository at this point in the history
* Adding asyn_fluidc.py file containing fluid communities algorithm implementation

* Fixing some indentations in the docstring

* Adapting code to PEP8 format

* Tests for Fluid Communities algorithm

* Initial checks implemented & not_implemented_for decorator added

* import of asyn_fluidc added

* Import error fixed, random seed included to ensure test case from stochasticity

* Import error fixed & Authorship added

* Tests adapted due to minor error

* Import error fixed

* Fluid Communities added

* random.seed adapted to work same way on Python2.X and Python3.X

* Try a test seed for all versions

* Take five_clique_ring out of nosetests but leave code for it
  • Loading branch information
FerranPares authored and dschult committed Jun 23, 2017
1 parent ddef8c2 commit e43a7b0
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 0 deletions.
5 changes: 5 additions & 0 deletions doc/source/reference/release_2.0.rst
Expand Up @@ -115,3 +115,8 @@ API changes
Most of the shortest_path algorithms now raise a NodeNotFound exception
when a source or a target are not present in the graph.

* [`#2420 <https://github.com/networkx/networkx/pull/2420>`_]
New community detection algorithm provided. Fluid Communities is an asynchronous
algorithm based on the simple idea of fluids interacting in an environment,
expanding and pushing each other. The algorithm is completly described in
[`https://arxiv.org/pdf/1703.09307.pdf <https://arxiv.org/pdf/1703.09307.pdf>`_].
1 change: 1 addition & 0 deletions networkx/algorithms/community/__init__.py
@@ -1,4 +1,5 @@
from networkx.algorithms.community.asyn_lpa import *
from networkx.algorithms.community.asyn_fluidc import *
from networkx.algorithms.community.centrality import *
from networkx.algorithms.community.community_generators import *
from networkx.algorithms.community.kclique import *
Expand Down
146 changes: 146 additions & 0 deletions networkx/algorithms/community/asyn_fluidc.py
@@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2017
# All rights reserved.
# BSD license.
# Author: Ferran Parés <ferran.pares@bsc.es>
"""Asynchronous Fluid Communities algorithm for community detection."""

from collections import Counter
import random
from networkx.exception import NetworkXError
from networkx.algorithms.components import is_connected
from networkx.utils import groups
from networkx.utils.decorators import not_implemented_for

__all__ = ['asyn_fluidc']


@not_implemented_for('directed', 'multigraph')
def asyn_fluidc(G, k, max_iter=100):
"""Returns communities in `G` as detected by Fluid Communities algorithm.
The asynchronous fluid communities algorithm is described in
[1]. The algorithm is based on the simple idea of fluids interacting
in an environment, expanding and pushing each other. It's initialization is
random, so found communities may vary on different executions.
The algorithm proceeds as follows. First each of the initial k communities
is initialized in a random vertex in the graph. Then the algorithm iterates
over all vertices in a random order, updating the community of each vertex
based on its own community and the communities of its neighbours. This
process is performed several times until convergence.
At all times, each community has a total density of 1, which is equally
distributed among the vertices it contains. If a vertex changes of
community, vertex densities of affected communities are adjusted
immediately. When a complete iteration over all vertices is done, such that
no vertex changes the community it belongs to, the algorithm has converged
and returns.
This is the original version of the algorithm described in [1].
Unfortunately, it does not support weighted graphs yet.
Parameters
----------
G : Graph
k : integer
The number of communities to be found.
max_iter : integer
The number of maximum iterations allowed. By default 15.
Returns
-------
communities : iterable
Iterable of communities given as sets of nodes.
Notes
------
k variable is not an optional argument.
References
----------
.. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
Competitive and Highly Scalable Community Detection Algorithm".
[https://arxiv.org/pdf/1703.09307.pdf].
"""
# Initial checks
if not isinstance(k, int):
raise NetworkXError("k muts be an integer.")
if not k>0:
raise NetworkXError("k muts be greater than 0.")
if not is_connected(G):
raise NetworkXError("Fluid Communities can only be run on connected\
Graphs.")
if len(G) < k:
raise NetworkXError("k must be greater than graph size.")
# Initialization
max_density = 1.0
vertices = list(G)
random.shuffle(vertices)
communities = {n: i for i, n in enumerate(vertices[:k])}
density = {}
com_to_numvertices = {}
for vertex in communities.keys():
com_to_numvertices[communities[vertex]] = 1
density[communities[vertex]] = max_density
# Set up control variables and start iterating
iter_count = 0
cont = True
while cont:
cont = False
iter_count += 1
# Loop over all vertices in graph in a random order
vertices = list(G)
random.shuffle(vertices)
for vertex in vertices:
# Updating rule
com_counter = Counter()
# Take into account self vertex community
try:
com_counter.update({communities[vertex]: \
density[communities[vertex]]})
except KeyError:
pass
# Gather neighbour vertex communities
for v in G[vertex]:
try:
com_counter.update({communities[v]: \
density[communities[v]]})
except KeyError:
continue
# Check which is the community with highest density
new_com = -1
if len(com_counter.keys()) > 0:
max_freq = max(com_counter.values())
best_communities = [com for com, freq in com_counter.items()
if (max_freq - freq) < 0.0001]
# If actual vertex com in best communities, it is preserved
try:
if communities[vertex] in best_communities:
new_com = communities[vertex]
except KeyError:
pass
# If vertex community changes...
if new_com == -1:
# Set flag of non-convergence
cont = True
# Randomly chose a new community from candidates
new_com = random.choice(best_communities)
# Update previous community status
try:
com_to_numvertices[communities[vertex]] -= 1
density[communities[vertex]] = max_density / \
com_to_numvertices[communities[vertex]]
except KeyError:
pass
# Update new community status
communities[vertex] = new_com
com_to_numvertices[communities[vertex]] += 1
density[communities[vertex]] = max_density / \
com_to_numvertices[communities[vertex]]
# If maximum iterations reached --> output actual results
if iter_count > max_iter:
break
# Return results by grouping communities as list of vertices
return iter(groups(communities).values())
120 changes: 120 additions & 0 deletions networkx/algorithms/community/tests/test_asyn_fluidc.py
@@ -0,0 +1,120 @@
from nose.tools import assert_equal, assert_in
from networkx import Graph
from networkx.algorithms.community.asyn_fluidc import *
import random

def test_single_node():
test = Graph()

test.add_node('a')

# ground truth
ground_truth = set([frozenset(['a'])])

communities = asyn_fluidc(test, 1)
result = {frozenset(c) for c in communities}
assert_equal(result, ground_truth)


def test_two_nodes():
test = Graph()

test.add_edge('a', 'b')

# ground truth
ground_truth = set([frozenset(['a']), frozenset(['b'])])

communities = asyn_fluidc(test, 2)
result = {frozenset(c) for c in communities}
assert_equal(result, ground_truth)


def test_two_clique_communities():
random.seed(7)
test = Graph()

# c1
test.add_edge('a', 'b')
test.add_edge('a', 'c')
test.add_edge('b', 'c')

# connection
test.add_edge('c', 'd')

# c2
test.add_edge('d', 'e')
test.add_edge('d', 'f')
test.add_edge('f', 'e')

# ground truth
ground_truth = set([frozenset(['a', 'c', 'b']),
frozenset(['e', 'd', 'f'])])

communities = asyn_fluidc(test, 2)
result = {frozenset(c) for c in communities}
assert_equal(result, ground_truth)


def five_clique_ring():
"""Not auto-tested (not named test_...) due to cross-version seed issues"""
random.seed(9)
test = Graph()

# c1
test.add_edge('1a', '1b')
test.add_edge('1a', '1c')
test.add_edge('1a', '1d')
test.add_edge('1b', '1c')
test.add_edge('1b', '1d')
test.add_edge('1c', '1d')

# c2
test.add_edge('2a', '2b')
test.add_edge('2a', '2c')
test.add_edge('2a', '2d')
test.add_edge('2b', '2c')
test.add_edge('2b', '2d')
test.add_edge('2c', '2d')

# c3
test.add_edge('3a', '3b')
test.add_edge('3a', '3c')
test.add_edge('3a', '3d')
test.add_edge('3b', '3c')
test.add_edge('3b', '3d')
test.add_edge('3c', '3d')

# c4
test.add_edge('4a', '4b')
test.add_edge('4a', '4c')
test.add_edge('4a', '4d')
test.add_edge('4b', '4c')
test.add_edge('4b', '4d')
test.add_edge('4c', '4d')

# c5
test.add_edge('5a', '5b')
test.add_edge('5a', '5c')
test.add_edge('5a', '5d')
test.add_edge('5b', '5c')
test.add_edge('5b', '5d')
test.add_edge('5c', '5d')

# connections
test.add_edge('1a', '2c')
test.add_edge('2a', '3c')
test.add_edge('3a', '4c')
test.add_edge('4a', '5c')
test.add_edge('5a', '1c')

# ground truth
ground_truth = set([frozenset(['1a', '1b', '1c', '1d']),
frozenset(['2a', '2b', '2c', '2d']),
frozenset(['3a', '3b', '3c', '3d']),
frozenset(['4a', '4b', '4c', '4d']),
frozenset(['5a', '5b', '5c', '5d'])])

communities = asyn_fluidc(test, 5)
result = {frozenset(c) for c in communities}
assert_equal(result, ground_truth)

0 comments on commit e43a7b0

Please sign in to comment.