Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fluid communities algorithm implementation (#2420)
* Adding asyn_fluidc.py file containing fluid communities algorithm implementation * Fixing some indentations in the docstring * Adapting code to PEP8 format * Tests for Fluid Communities algorithm * Initial checks implemented & not_implemented_for decorator added * import of asyn_fluidc added * Import error fixed, random seed included to ensure test case from stochasticity * Import error fixed & Authorship added * Tests adapted due to minor error * Import error fixed * Fluid Communities added * random.seed adapted to work same way on Python2.X and Python3.X * Try a test seed for all versions * Take five_clique_ring out of nosetests but leave code for it
- Loading branch information
1 parent
ddef8c2
commit e43a7b0
Showing
4 changed files
with
272 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# -*- coding: utf-8 -*- | ||
# Copyright (C) 2017 | ||
# All rights reserved. | ||
# BSD license. | ||
# Author: Ferran Parés <ferran.pares@bsc.es> | ||
"""Asynchronous Fluid Communities algorithm for community detection.""" | ||
|
||
from collections import Counter | ||
import random | ||
from networkx.exception import NetworkXError | ||
from networkx.algorithms.components import is_connected | ||
from networkx.utils import groups | ||
from networkx.utils.decorators import not_implemented_for | ||
|
||
__all__ = ['asyn_fluidc'] | ||
|
||
|
||
@not_implemented_for('directed', 'multigraph') | ||
def asyn_fluidc(G, k, max_iter=100): | ||
"""Returns communities in `G` as detected by Fluid Communities algorithm. | ||
The asynchronous fluid communities algorithm is described in | ||
[1]. The algorithm is based on the simple idea of fluids interacting | ||
in an environment, expanding and pushing each other. It's initialization is | ||
random, so found communities may vary on different executions. | ||
The algorithm proceeds as follows. First each of the initial k communities | ||
is initialized in a random vertex in the graph. Then the algorithm iterates | ||
over all vertices in a random order, updating the community of each vertex | ||
based on its own community and the communities of its neighbours. This | ||
process is performed several times until convergence. | ||
At all times, each community has a total density of 1, which is equally | ||
distributed among the vertices it contains. If a vertex changes of | ||
community, vertex densities of affected communities are adjusted | ||
immediately. When a complete iteration over all vertices is done, such that | ||
no vertex changes the community it belongs to, the algorithm has converged | ||
and returns. | ||
This is the original version of the algorithm described in [1]. | ||
Unfortunately, it does not support weighted graphs yet. | ||
Parameters | ||
---------- | ||
G : Graph | ||
k : integer | ||
The number of communities to be found. | ||
max_iter : integer | ||
The number of maximum iterations allowed. By default 15. | ||
Returns | ||
------- | ||
communities : iterable | ||
Iterable of communities given as sets of nodes. | ||
Notes | ||
------ | ||
k variable is not an optional argument. | ||
References | ||
---------- | ||
.. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A | ||
Competitive and Highly Scalable Community Detection Algorithm". | ||
[https://arxiv.org/pdf/1703.09307.pdf]. | ||
""" | ||
# Initial checks | ||
if not isinstance(k, int): | ||
raise NetworkXError("k muts be an integer.") | ||
if not k>0: | ||
raise NetworkXError("k muts be greater than 0.") | ||
if not is_connected(G): | ||
raise NetworkXError("Fluid Communities can only be run on connected\ | ||
Graphs.") | ||
if len(G) < k: | ||
raise NetworkXError("k must be greater than graph size.") | ||
# Initialization | ||
max_density = 1.0 | ||
vertices = list(G) | ||
random.shuffle(vertices) | ||
communities = {n: i for i, n in enumerate(vertices[:k])} | ||
density = {} | ||
com_to_numvertices = {} | ||
for vertex in communities.keys(): | ||
com_to_numvertices[communities[vertex]] = 1 | ||
density[communities[vertex]] = max_density | ||
# Set up control variables and start iterating | ||
iter_count = 0 | ||
cont = True | ||
while cont: | ||
cont = False | ||
iter_count += 1 | ||
# Loop over all vertices in graph in a random order | ||
vertices = list(G) | ||
random.shuffle(vertices) | ||
for vertex in vertices: | ||
# Updating rule | ||
com_counter = Counter() | ||
# Take into account self vertex community | ||
try: | ||
com_counter.update({communities[vertex]: \ | ||
density[communities[vertex]]}) | ||
except KeyError: | ||
pass | ||
# Gather neighbour vertex communities | ||
for v in G[vertex]: | ||
try: | ||
com_counter.update({communities[v]: \ | ||
density[communities[v]]}) | ||
except KeyError: | ||
continue | ||
# Check which is the community with highest density | ||
new_com = -1 | ||
if len(com_counter.keys()) > 0: | ||
max_freq = max(com_counter.values()) | ||
best_communities = [com for com, freq in com_counter.items() | ||
if (max_freq - freq) < 0.0001] | ||
# If actual vertex com in best communities, it is preserved | ||
try: | ||
if communities[vertex] in best_communities: | ||
new_com = communities[vertex] | ||
except KeyError: | ||
pass | ||
# If vertex community changes... | ||
if new_com == -1: | ||
# Set flag of non-convergence | ||
cont = True | ||
# Randomly chose a new community from candidates | ||
new_com = random.choice(best_communities) | ||
# Update previous community status | ||
try: | ||
com_to_numvertices[communities[vertex]] -= 1 | ||
density[communities[vertex]] = max_density / \ | ||
com_to_numvertices[communities[vertex]] | ||
except KeyError: | ||
pass | ||
# Update new community status | ||
communities[vertex] = new_com | ||
com_to_numvertices[communities[vertex]] += 1 | ||
density[communities[vertex]] = max_density / \ | ||
com_to_numvertices[communities[vertex]] | ||
# If maximum iterations reached --> output actual results | ||
if iter_count > max_iter: | ||
break | ||
# Return results by grouping communities as list of vertices | ||
return iter(groups(communities).values()) |
120 changes: 120 additions & 0 deletions
120
networkx/algorithms/community/tests/test_asyn_fluidc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
from nose.tools import assert_equal, assert_in | ||
from networkx import Graph | ||
from networkx.algorithms.community.asyn_fluidc import * | ||
import random | ||
|
||
def test_single_node(): | ||
test = Graph() | ||
|
||
test.add_node('a') | ||
|
||
# ground truth | ||
ground_truth = set([frozenset(['a'])]) | ||
|
||
communities = asyn_fluidc(test, 1) | ||
result = {frozenset(c) for c in communities} | ||
assert_equal(result, ground_truth) | ||
|
||
|
||
def test_two_nodes(): | ||
test = Graph() | ||
|
||
test.add_edge('a', 'b') | ||
|
||
# ground truth | ||
ground_truth = set([frozenset(['a']), frozenset(['b'])]) | ||
|
||
communities = asyn_fluidc(test, 2) | ||
result = {frozenset(c) for c in communities} | ||
assert_equal(result, ground_truth) | ||
|
||
|
||
def test_two_clique_communities(): | ||
random.seed(7) | ||
test = Graph() | ||
|
||
# c1 | ||
test.add_edge('a', 'b') | ||
test.add_edge('a', 'c') | ||
test.add_edge('b', 'c') | ||
|
||
# connection | ||
test.add_edge('c', 'd') | ||
|
||
# c2 | ||
test.add_edge('d', 'e') | ||
test.add_edge('d', 'f') | ||
test.add_edge('f', 'e') | ||
|
||
# ground truth | ||
ground_truth = set([frozenset(['a', 'c', 'b']), | ||
frozenset(['e', 'd', 'f'])]) | ||
|
||
communities = asyn_fluidc(test, 2) | ||
result = {frozenset(c) for c in communities} | ||
assert_equal(result, ground_truth) | ||
|
||
|
||
def five_clique_ring(): | ||
"""Not auto-tested (not named test_...) due to cross-version seed issues""" | ||
random.seed(9) | ||
test = Graph() | ||
|
||
# c1 | ||
test.add_edge('1a', '1b') | ||
test.add_edge('1a', '1c') | ||
test.add_edge('1a', '1d') | ||
test.add_edge('1b', '1c') | ||
test.add_edge('1b', '1d') | ||
test.add_edge('1c', '1d') | ||
|
||
# c2 | ||
test.add_edge('2a', '2b') | ||
test.add_edge('2a', '2c') | ||
test.add_edge('2a', '2d') | ||
test.add_edge('2b', '2c') | ||
test.add_edge('2b', '2d') | ||
test.add_edge('2c', '2d') | ||
|
||
# c3 | ||
test.add_edge('3a', '3b') | ||
test.add_edge('3a', '3c') | ||
test.add_edge('3a', '3d') | ||
test.add_edge('3b', '3c') | ||
test.add_edge('3b', '3d') | ||
test.add_edge('3c', '3d') | ||
|
||
# c4 | ||
test.add_edge('4a', '4b') | ||
test.add_edge('4a', '4c') | ||
test.add_edge('4a', '4d') | ||
test.add_edge('4b', '4c') | ||
test.add_edge('4b', '4d') | ||
test.add_edge('4c', '4d') | ||
|
||
# c5 | ||
test.add_edge('5a', '5b') | ||
test.add_edge('5a', '5c') | ||
test.add_edge('5a', '5d') | ||
test.add_edge('5b', '5c') | ||
test.add_edge('5b', '5d') | ||
test.add_edge('5c', '5d') | ||
|
||
# connections | ||
test.add_edge('1a', '2c') | ||
test.add_edge('2a', '3c') | ||
test.add_edge('3a', '4c') | ||
test.add_edge('4a', '5c') | ||
test.add_edge('5a', '1c') | ||
|
||
# ground truth | ||
ground_truth = set([frozenset(['1a', '1b', '1c', '1d']), | ||
frozenset(['2a', '2b', '2c', '2d']), | ||
frozenset(['3a', '3b', '3c', '3d']), | ||
frozenset(['4a', '4b', '4c', '4d']), | ||
frozenset(['5a', '5b', '5c', '5d'])]) | ||
|
||
communities = asyn_fluidc(test, 5) | ||
result = {frozenset(c) for c in communities} | ||
assert_equal(result, ground_truth) | ||
|