Skip to content

Commit

Permalink
Modularity partitions (#41)
Browse files Browse the repository at this point in the history
* Commit of current status prior to actually updating the tests.  The current tests are less tests and more an exploration in why our modularity calculation differed from the python-louvai modularity calculation

* Updating tests and documentation

* Wanted to make sure we handled disconnected nodes appropriately

* Added release notes

* Restricting us to the 2.x versions of Sphinx, 3.0 breaks us

Co-authored-by: Dwayne Pryce <dwpryce@microsoft.com>
  • Loading branch information
Dwayne Pryce and Dwayne Pryce committed May 6, 2020
1 parent f1b4362 commit da04407
Show file tree
Hide file tree
Showing 8 changed files with 159,360 additions and 14 deletions.
2 changes: 2 additions & 0 deletions docs/release_notes.md
@@ -1,4 +1,6 @@
# Release Notes
## 0.1.3
- Added `modularity` and `modularity_components` functions, and deprecated `q_score`.
## 0.1.2
- Rename `self_loop_augmentation` to `diagonal_augmentation` and use weighted degree to perform calculation instead of degree only.
- Fix bug when getting the length of edges when performing graph augmentations.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -52,7 +52,7 @@ def handle_version() -> str:
'pytest',
'flake8',
'mypy',
'sphinx',
'sphinx>=2.4.4,<3.0.0',
'sphinx-rtd-theme',
'testfixtures',
'recommonmark'
Expand Down
77 changes: 68 additions & 9 deletions tests/partition/test_modularity.py
Expand Up @@ -4,8 +4,24 @@
import networkx as nx
import numpy as np
from topologic import PartitionedGraph
from topologic.partition import q_score
from topologic.partition import modularity, modularity_components, q_score
import unittest
from typing import Dict
import community # python-louvain module

from tests.utils import data_file


def _modularity_graph() -> nx.Graph:
graph = nx.Graph()
graph.add_edge("a", "b", weight=4.0)
graph.add_edge("b", "c", weight=3.0)
graph.add_edge("e", "f", weight=5.0)

return graph


_PARTITIONS: Dict[str, int] = {'a': 0, 'b': 0, 'c': 0, 'e': 1, 'f': 1}


class TestModularity(unittest.TestCase):
Expand All @@ -14,13 +30,56 @@ def test_wrong_type(self):
q_score("foo")

def test_q_score(self):
graph = nx.Graph()
graph.add_edge("a", "b", weight=4.0)
graph.add_edge("b", "c", weight=3.0)
graph.add_edge("e", "f", weight=5.0)
graph = _modularity_graph()

partition = {'a': 0, 'b': 0, 'c': 0, 'e': 1, 'f': 1}
partition = _PARTITIONS
part_graph = PartitionedGraph(graph, partition)
modularity = q_score(part_graph)
self.assertIsInstance(modularity, float)
np.testing.assert_almost_equal(0.48611111111111105, modularity)
modularity_value = q_score(part_graph)
self.assertIsInstance(modularity_value, float)
np.testing.assert_almost_equal(0.48611111111111105, modularity_value)

def test_modularity(self):
graph = _modularity_graph() # links = 12.0
partition = _PARTITIONS # in community degree for -> 0: 14, 1: 10, community degree -> 0:14, 1:10
# modularity component for partition 0: (14.0 / (2.0 * 12.0)) - (1.0 * ((14.0 / (2 * 12.0)) ** 2.0))
# (cont): 0.5833333333333334 - 0.34027777777777785 = 0.24305555555555552
# modularity component for partition 1: (10.0 / (2.0 * 12.0)) - (1.0 * ((10.0 / (2 * 12.0)) ** 2.0))
# (cont): 0.4166666666666667 - 0.17361111111111113 = 0.24305555555555555
modularity_value = modularity(graph, partition)

np.testing.assert_almost_equal(0.48611111111111105, modularity_value)

def test_modularity_components(self):
graph = nx.Graph()
with open(data_file("large-graph.csv"), "r") as edge_list_io:
for line in edge_list_io:
source, target, weight = line.strip().split(",")
previous_weight = graph.get_edge_data(source, target, {"weight": 0})["weight"]
weight = float(weight) + previous_weight
graph.add_edge(source, target, weight=weight)

partitions = {}
with open(data_file("large-graph-partitions.csv"), "r") as communities_io:
for line in communities_io:
vertex, comm = line.strip().split(",")
partitions[vertex] = int(comm)

partition_count = max(partitions.values())

graph.add_node("disconnected_node")
partitions["disconnected_node"] = partition_count + 1

components = modularity_components(graph, partitions)

# from python louvain
community_modularity = community.modularity(partitions, graph)
total_modularity = sum(components.values())

self.assertSetEqual(set(components.keys()), set(partitions.values()))
self.assertEqual(0, components[partition_count + 1])

# the following test is not super inspiring. I am not a floating point number specialist, but as far as I can
# tell it's because networkx.Graph().degree() returns 2 times the edge weight for each value, which
# we then divide by 2.0 immediately and sum, whereas in our version we don't do this step.
# aside from (not) doing that, the only other difference is using math.pow instead of `**`.
np.testing.assert_almost_equal(community_modularity, total_modularity, decimal=3)

0 comments on commit da04407

Please sign in to comment.