Merge pull request #619 from opencobra/fix-duplicates

fix: change how duplicated reactions are reported
opencobra · Mar 6, 2019 · 44a7c8b · 44a7c8b
2 parents 0c5a595 + 5938662
commit 44a7c8b
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,6 +22,7 @@ parts/
 sdist/
 var/
 wheels/
+pip-wheel-metadata/
 *.egg-info/
 .installed.cfg
 *.egg

diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,8 @@ History
 
 Next Release
 ------------
+* The number of duplicated reactions is now reported uniquely rather than all
+  duplicate pairs.
 
 0.9.5 (2019-02-21)
 ------------------

diff --git a/memote/suite/tests/test_basic.py b/memote/suite/tests/test_basic.py
@@ -652,13 +652,14 @@ def test_find_duplicate_reactions(model):
 
     """
     ann = test_find_duplicate_reactions.annotation
-    ann["data"] = basic.find_duplicate_reactions(model)
-    ann["metric"] = len(ann["data"]) / len(model.reactions)
+    duplicates, num = basic.find_duplicate_reactions(model)
+    ann["data"] = duplicates
+    ann["metric"] = num / len(model.reactions)
     ann["message"] = wrapper.fill(
         """Based on metabolites, directionality and compartment there are a
-        total of {} reactions in the model which have duplicates: {}""".format(
-            len(ann["data"]), truncate(ann["data"])))
-    assert len(ann["data"]) == 0, ann["message"]
+        total of {} reactions in the model which have duplicates:
+        {}""".format(num, truncate(duplicates)))
+    assert num == 0, ann["message"]
 
 
 @annotate(title="Reactions With Identical Genes", format_type="percent")

diff --git a/memote/support/basic.py b/memote/support/basic.py
@@ -17,7 +17,7 @@
 
 """Supporting functions for basic checks performed on the model object."""
 
-from __future__ import absolute_import
+from __future__ import absolute_import, division
 
 import logging
 from itertools import combinations
@@ -377,7 +377,7 @@ def map_metabolites_to_structures(metabolites, compartments):
         structure index.
 
     """
-    # TODO: Consider SMILES?
+    # TODO (Moritz Beber): Consider SMILES?
     unique_identifiers = ["inchikey", "inchi"]
     met2mol = {}
     molecules = {c: [] for c in compartments}
@@ -409,7 +409,7 @@ def map_metabolites_to_structures(metabolites, compartments):
 
 def find_duplicate_reactions(model):
     """
-    Return a list with sets of reactions that are functionally identical.
+    Return a list with pairs of reactions that are functionally identical.
 
     Identify duplicate reactions globally by checking if any
     two reactions have the same metabolites, same directionality and are in
@@ -435,7 +435,9 @@ def find_duplicate_reactions(model):
     Returns
     -------
     list
-        A list of sets of duplicate reactions based on metabolites.
+        A list of pairs of duplicate reactions based on metabolites.
+    int
+        The number of unique reactions that have a duplicates
 
     """
     met2mol = map_metabolites_to_structures(model.metabolites,
@@ -457,6 +459,7 @@ def find_duplicate_reactions(model):
         }
         structural.append((rxn, substrates, products))
     # Compare reactions using their structure-based stoichiometries.
+    num_duplicated = set()
     duplicates = []
     for (rxn_a, sub_a, prod_a), (rxn_b, sub_b, prod_b) in combinations(
             structural, 2):
@@ -469,10 +472,12 @@ def find_duplicate_reactions(model):
         # Compare whether they are both (ir-)reversible.
         if rxn_a.reversibility != rxn_b.reversibility:
             continue
-        # TODO: We could compare bounds here but it might be worth knowing
-        #  about the reactions even if their bounds differ?
+        # TODO (Moritz Beber): We could compare bounds here but it might be
+        #  worth knowing about the reactions even if their bounds differ?
         duplicates.append((rxn_a.id, rxn_b.id))
-    return duplicates
+        num_duplicated.add(rxn_a.id)
+        num_duplicated.add(rxn_b.id)
+    return duplicates, len(num_duplicated)
 
 
 def find_reactions_with_identical_genes(model):

diff --git a/setup.cfg b/setup.cfg
@@ -7,7 +7,7 @@ parse = (?P<major>\d+)
 	\.(?P<minor>\d+)
 	\.(?P<patch>\d+)
 	(?P<release>[a]?)(?P<num>\d*)
-serialize = 
+serialize =
 	{major}.{minor}.{patch}{release}{num}
 	{major}.{minor}.{patch}
 
@@ -17,7 +17,7 @@ url = https://github.com/opencobra/memote
 download_url = https://pypi.org/pypi/memote/
 author = Moritz E. Beber
 author_email = morbeb@biosustain.dtu.dk
-classifiers = 
+classifiers =
 	Development Status :: 5 - Production/Stable
 	Intended Audience :: Developers
 	Intended Audience :: Science/Research
@@ -33,7 +33,7 @@ classifiers =
 license = Apache Software License Version 2.0
 description = the genome-scale metabolic model test suite
 long_description = file: README.rst
-keywords = 
+keywords =
 	memote
 	metabolic
 	constrained-based
@@ -45,7 +45,7 @@ keywords =
 
 [options]
 zip_safe = False
-install_requires = 
+install_requires =
 	click <7.0
 	click-configfile
 	click-log
@@ -71,32 +71,32 @@ install_requires =
 	pylru
 	goodtables ==1.0.0
 	depinfo
-	equilibrator_api;python_version>='3.5'
+	equilibrator_api <0.2;python_version>='3.5'
 python_requires = >=2.7,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
-tests_require = 
+tests_require =
 	tox
 packages = find:
 
 [options.package_data]
-memote.experimental.schemata = 
+memote.experimental.schemata =
 	*.json
-memote.suite = 
+memote.suite =
 	tests/*.py
-memote.suite.templates = 
+memote.suite.templates =
 	*.html
 	*.yml
-memote.support.data = 
+memote.support.data =
 	*.csv
 	*.json
 
 [options.entry_points]
-console_scripts = 
+console_scripts =
 	memote = memote.suite.cli.runner:cli
 
 [bumpversion:part:release]
 optional_value = placeholder
 first_value = placeholder
-values = 
+values =
 	placeholder
 	a
 
@@ -124,7 +124,7 @@ universal = 1
 
 [flake8]
 max-line-length = 80
-exclude = 
+exclude =
 	__init__.py
 	docs
 
@@ -133,7 +133,7 @@ match_dir = memote
 
 [tool:pytest]
 testpaths = tests
-filterwarnings = 
+filterwarnings =
 	ignore::DeprecationWarning:libsbml
 
 [isort]

diff --git a/tests/test_for_support/test_for_basic.py b/tests/test_for_support/test_for_basic.py
@@ -855,17 +855,18 @@ def test_find_reactions_with_partially_identical_annotations(model, num):
     assert total == num
 
 
-@pytest.mark.parametrize("model, num", [
+@pytest.mark.parametrize("model, expected", [
     ("empty", 0),
-    ("dup_rxns", 1),
+    ("dup_rxns", 2),
     ("dup_rxns_rev", 0),
     ("dup_rxns_irrev", 0),
     ("dup_rxns_compartment", 0),
-    ("dup_rxns_irrev_exchanges", 1),
+    ("dup_rxns_irrev_exchanges", 2),
 ], indirect=["model"])
-def test_find_duplicate_reactions(model, num):
+def test_find_duplicate_reactions(model, expected):
     """Expect amount of duplicate reactions to be identified correctly."""
-    assert len(basic.find_duplicate_reactions(model)) == num
+    _, num = basic.find_duplicate_reactions(model)
+    assert num == expected
 
 
 @pytest.mark.parametrize("model, num", [