Merge pull request #614 from opencobra/refactor-metrics

refactor: add metrics to all test cases
opencobra · Feb 19, 2019 · dab595e · dab595e
2 parents f409293 + 5661aa5
commit dab595e
Show file tree

Hide file tree

Showing 10 changed files with 91 additions and 57 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,8 @@ History
 
 Next Release
 ------------
+* Add metrics, i.e., model-size independent test outcomes to almost all test 
+  cases.
 * Repair auto generation of API docs and update section on test suite.
 
 0.9.3 (2019-01-30)

diff --git a/memote/suite/tests/test_basic.py b/memote/suite/tests/test_basic.py
@@ -43,6 +43,7 @@ def test_model_id_presence(model):
     ann = test_model_id_presence.annotation
     assert hasattr(model, "id")
     ann["data"] = model.id
+    ann["metric"] = 1.0 - float(bool(ann["data"]))
     ann["message"] = "The model ID is {}".format(ann["data"])
     assert bool(model.id)
 
@@ -233,6 +234,7 @@ def test_ngam_presence(model):
     """
     ann = test_ngam_presence.annotation
     ann["data"] = get_ids(basic.find_ngam(model))
+    ann["metric"] = 1.0 - float(len(ann["data"]) == 1)
     ann["message"] = wrapper.fill(
         """A total of {} NGAM reactions could be identified:
         {}""".format(len(ann["data"]), truncate(ann["data"])))
@@ -268,7 +270,7 @@ def test_metabolic_coverage(model):
 @annotate(title="Total Compartments", format_type="count")
 def test_compartments_presence(model):
     """
-    Expect that more than two compartments are defined in the model.
+    Expect that two or more compartments are defined in the model.
 
     While simplified metabolic models may be perfectly viable, generally
     across the tree of life organisms contain at least one distinct
@@ -285,14 +287,14 @@ def test_compartments_presence(model):
     which should contain at least two sbml:compartment elements.
 
     """
-    # TODO: Fix the test in a later PR! Should expect 2 compartments instead!
     ann = test_compartments_presence.annotation
     assert hasattr(model, "compartments")
     ann["data"] = list(model.compartments)
+    ann["metric"] = 1.0 - float(len(ann["data"]) >= 2)
     ann["message"] = wrapper.fill(
         """A total of {:d} compartments are defined in the model: {}""".format(
             len(ann["data"]), truncate(ann["data"])))
-    assert len(ann["data"]) >= 3, ann["message"]
+    assert len(ann["data"]) >= 2, ann["message"]
 
 
 @annotate(title="Enzyme Complexes", format_type="count")
@@ -317,6 +319,7 @@ def test_protein_complex_presence(model):
     """
     ann = test_protein_complex_presence.annotation
     ann["data"] = get_ids(basic.find_protein_complexes(model))
+    ann["metric"] = len(ann["data"]) / len(model.reactions)
     ann["message"] = wrapper.fill(
         """A total of {:d} reactions are catalyzed by complexes defined
         through GPR rules in the model.""".format(len(ann["data"])))
@@ -574,6 +577,7 @@ def test_find_duplicate_metabolites_in_compartments(model):
     ann = test_find_duplicate_metabolites_in_compartments.annotation
     ann["data"] = basic.find_duplicate_metabolites_in_compartments(
         model)
+    ann["metric"] = len(ann["data"]) / len(model.metabolites)
     ann["message"] = wrapper.fill(
         """There are a total of {} metabolites in the model which
         have duplicates in the same compartment: {}""".format(
@@ -620,7 +624,7 @@ def test_find_reactions_with_partially_identical_annotations(model):
     assert total == 0, ann["message"]
 
 
-@annotate(title="Duplicate Reactions", format_type="count")
+@annotate(title="Duplicate Reactions", format_type="percent")
 def test_find_duplicate_reactions(model):
     """
     Expect there to be zero duplicate reactions.
@@ -649,6 +653,7 @@ def test_find_duplicate_reactions(model):
     """
     ann = test_find_duplicate_reactions.annotation
     ann["data"] = basic.find_duplicate_reactions(model)
+    ann["metric"] = len(ann["data"]) / len(model.reactions)
     ann["message"] = wrapper.fill(
         """Based on metabolites, directionality and compartment there are a
         total of {} reactions in the model which have duplicates: {}""".format(
@@ -676,14 +681,15 @@ def test_find_reactions_with_identical_genes(model):
 
     """
     ann = test_find_reactions_with_identical_genes.annotation
-    duplicates, total = basic.find_reactions_with_identical_genes(model)
-    ann["data"] = duplicates
-    ann["metric"] = total / len(model.reactions)
+    rxn_groups, num_dup = basic.find_reactions_with_identical_genes(model)
+    ann["data"] = rxn_groups
+    ann["metric"] = num_dup / len(model.reactions)
     ann["message"] = wrapper.fill(
         """Based only on equal genes there are {} different groups of
         identical reactions which corresponds to a total of {}
-        duplicated reactions in the model.""".format(len(duplicates), total))
-    assert total == 0, ann["message"]
+        duplicated reactions in the model.""".format(
+            len(rxn_groups), num_dup))
+    assert num_dup == 0, ann["message"]
 
 
 @annotate(title="Medium Components", format_type="count")
@@ -703,6 +709,10 @@ def test_find_medium_metabolites(model):
     """
     ann = test_find_medium_metabolites.annotation
     ann["data"] = basic.find_medium_metabolites(model)
+    num_ex = basic.find_external_metabolites(model)
+    ann["metric"] = len(ann["data"]) / num_ex
     ann["message"] = wrapper.fill(
         """There are a total of {} metabolites in the currently set medium
-        in the model: {}""".format(len(ann["data"]), truncate(ann["data"])))
+        (out of {} defined extra-cellular metabolites)
+        in the model: {}""".format(len(ann["data"]), num_ex,
+                                   truncate(ann["data"])))
diff --git a/memote/suite/tests/test_biomass.py b/memote/suite/tests/test_biomass.py
@@ -68,6 +68,7 @@ def test_biomass_presence(model):
     ann = test_biomass_presence.annotation
     ann["data"] = [
         rxn.id for rxn in helpers.find_biomass_reaction(model)]
+    ann["metric"] = 1.0 - float(len(ann["data"]) > 0)
     ann["message"] = wrapper.fill(
         """In this model {} the following biomass reactions were
         identified: {}""".format(
@@ -112,11 +113,11 @@ def test_biomass_consistency(model, reaction_id):
             which is outside of the 1e-03 margin from 1 mmol / g[CDW] / h.
             """.format(reaction_id, ann["data"][reaction_id])
         )
-    ann["metric"][reaction_id] = 1.0  # Placeholder value.
+    test_outcome = (1 - 1e-03) < ann["data"][reaction_id] < (1 + 1e-06)
+    ann["metric"][reaction_id] = 1.0 - float(test_outcome)
     # To account for numerical inaccuracies, a range from 1-1e0-3 to 1+1e-06
     # is implemented in the assertion check
-    assert (1 - 1e-03) < ann["data"][reaction_id] < (1 + 1e-06), \
-        ann["message"][reaction_id]
+    assert test_outcome, ann["message"][reaction_id]
 
 
 @pytest.mark.biomass
@@ -137,13 +138,14 @@ def test_biomass_default_production(model, reaction_id):
     """
     ann = test_biomass_default_production.annotation
     ann["data"][reaction_id] = helpers.run_fba(model, reaction_id)
-    ann["metric"][reaction_id] = 1.0  # Placeholder value.
+    test_outcome = ann["data"][reaction_id] > 1E-07
+    ann["metric"][reaction_id] = 1.0 - float(test_outcome)
     ann["message"][reaction_id] = wrapper.fill(
         """Using the biomass reaction {} this is the growth rate (1/h) that
         can be achieved when the model is simulated on the provided
         default medium: {}
         """.format(reaction_id, ann["data"][reaction_id]))
-    assert ann["data"][reaction_id] > 0.0, ann["message"][reaction_id]
+    assert test_outcome, ann["message"][reaction_id]
 
 
 @pytest.mark.biomass
@@ -165,13 +167,14 @@ def test_biomass_open_production(model, reaction_id):
     ann = test_biomass_open_production.annotation
     helpers.open_boundaries(model)
     ann["data"][reaction_id] = helpers.run_fba(model, reaction_id)
-    ann["metric"][reaction_id] = 1.0  # Placeholder value.
+    test_outcome = ann["data"][reaction_id] > 1E-07
+    ann["metric"] = 1.0 - float(test_outcome)
     ann["message"][reaction_id] = wrapper.fill(
         """Using the biomass reaction {} this is the growth rate that can be
         achieved when the model is simulated on a complete medium i.e.
         with all the boundary reactions unconstrained: {}
         """.format(reaction_id, ann["data"][reaction_id]))
-    assert ann["data"][reaction_id] > 0.0, ann["message"][reaction_id]
+    assert test_outcome, ann["message"][reaction_id]
 
 
 @pytest.mark.biomass
@@ -309,17 +312,19 @@ def test_gam_in_biomass(model, reaction_id):
     """
     ann = test_gam_in_biomass.annotation
     reaction = model.reactions.get_by_id(reaction_id)
-    ann["data"][reaction_id] = biomass.gam_in_biomass(model, reaction)
-    ann["metric"][reaction_id] = 1.0  # Placeholder value.
-    if ann["data"][reaction_id]:
+    test_outcome = biomass.gam_in_biomass(model, reaction)
+    ann["data"][reaction_id] = test_outcome
+    test_outcome = ann["data"][reaction_id]
+    ann["metric"][reaction_id] = 1.0 - float(test_outcome)
+    if test_outcome:
         ann["message"][reaction_id] = wrapper.fill(
             """Yes, {} contains a term for growth-associated maintenance.
             """.format(reaction_id))
     else:
         ann["message"][reaction_id] = wrapper.fill(
             """No, {} does not contain a term for growth-associated
             maintenance.""".format(reaction_id))
-    assert ann["data"][reaction_id], ann["message"][reaction_id]
+    assert test_outcome, ann["message"][reaction_id]
 
 
 @pytest.mark.biomass
@@ -346,8 +351,9 @@ def test_fast_growth_default(model, reaction_id):
 
     """
     ann = test_fast_growth_default.annotation
-    ann["data"][reaction_id] = helpers.run_fba(model, reaction_id) > 2.81
-    ann["metric"][reaction_id] = 1.0  # Placeholder value.
+    test_outcome = helpers.run_fba(model, reaction_id) > 2.81
+    ann["data"][reaction_id] = test_outcome
+    ann["metric"][reaction_id] = 1.0 - float(test_outcome)
 
     if ann["data"][reaction_id]:
         ann["message"][reaction_id] = wrapper.fill(
@@ -362,7 +368,7 @@ def test_fast_growth_default(model, reaction_id):
             the provided default medium the growth rate is *lower* than that
             of the fastest bacteria. This is to be expected for
             a majority of organisms.""".format(reaction_id))
-    assert ann["data"][reaction_id] > 2.81, ann["message"][reaction_id]
+    assert test_outcome, ann["message"][reaction_id]
 
 
 @pytest.mark.biomass

diff --git a/memote/suite/tests/test_consistency.py b/memote/suite/tests/test_consistency.py
@@ -98,6 +98,8 @@ def test_detect_energy_generating_cycles(model, met):
         pytest.skip("This test has been skipped since metabolite {} could "
                     "not be found in the model.".format(met))
     ann["data"][met] = consistency.detect_energy_generating_cycles(model, met)
+    # Report the number of cycles scaled by the number of reactions.
+    ann["metric"][met] = len(ann["data"][met]) / len(model.reactions)
     ann["message"][met] = wrapper.fill(
         """The model can produce '{}' without requiring resources. This is
         caused by improperly constrained reactions leading to erroneous

diff --git a/memote/suite/tests/test_matrix.py b/memote/suite/tests/test_matrix.py
@@ -39,8 +39,9 @@ def test_absolute_extreme_coefficient_ratio(model, threshold=1e9):
     data on solver performance becomes available.
 
     Implementation:
-    Compose the S-Matrix, then calculate absolute coefficients and lastly use
-    the maximal value and minimal non-zero value to calculate the ratio
+    Compose the stoichiometric matrix, then calculate absolute coefficients and
+    lastly use the maximal value and minimal non-zero value to calculate the
+    ratio.
 
     """
     ann = test_absolute_extreme_coefficient_ratio.annotation
@@ -55,8 +56,7 @@ def test_absolute_extreme_coefficient_ratio(model, threshold=1e9):
     assert ann["data"] < threshold, ann["message"]
 
 
-@annotate(title="Independent Conservation Relations",
-          format_type="raw")
+@annotate(title="Independent Conservation Relations", format_type="raw")
 def test_number_independent_conservation_relations(model):
     """
     Show the number of independent conservation relations in the model.
@@ -67,14 +67,17 @@ def test_number_independent_conservation_relations(model):
     is system-specific.
 
     Implementation:
-    Compose and transpose the S-Matrix, then calculate the left nullspace
-    using an algorithm based on the singular value decomposition adapted from
+    Calculate the left null space, i.e., the null space of the transposed
+    stoichiometric matrix, using an algorithm based on the singular value
+    decomposition adapted from
     https://scipy.github.io/old-wiki/pages/Cookbook/RankNullspace.html
-    Then, return the estimated dimension of the left nullspace.
+    Then, return the estimated dimension of that null space.
 
     """
     ann = test_number_independent_conservation_relations.annotation
     ann["data"] = matrix.number_independent_conservation_relations(model)
+    # Report the number of ICR scaled by the number of metabolites.
+    ann["metric"] = ann["data"] / len(model.metabolites)
     ann["message"] = wrapper.fill(
         """The number of independent conservation relations is {}.""".format(
             ann["data"]))
@@ -89,13 +92,15 @@ def test_matrix_rank(model):
     calculated using singular value decomposition (SVD).
 
     Implementation:
-    Compose the S-Matrix, then estimate the rank, i.e. the dimension of the
-    column space, of a matrix. The algorithm used by this function is based on
-    the singular value decomposition of the S-Matrix.
+    Compose the stoichiometric matrix, then estimate the rank, i.e. the
+    dimension of the column space, of a matrix. The algorithm used by this
+    function is based on the singular value decomposition of the matrix.
 
     """
     ann = test_matrix_rank.annotation
     ann["data"] = matrix.matrix_rank(model)
+    # Report the rank scaled by the number of reactions.
+    ann["metric"] = ann["data"] / len(model.reactions)
     ann["message"] = wrapper.fill(
         """The rank of the S-Matrix is {}.""".format(ann["data"]))
 
@@ -107,16 +112,18 @@ def test_degrees_of_freedom(model):
 
     The degrees of freedom of the stoichiometric matrix, i.e., the number
     of 'free variables' is system specific and corresponds to the dimension
-    of the right nullspace of the matrix.
+    of the (right) null space of the matrix.
 
     Implementation:
-    Compose the S-Matrix, then calculate the dimensionality of the right
-    nullspace using the rank-nullity theorem outlined by
+    Compose the stoichiometric matrix, then calculate the dimensionality of the
+    null space using the rank-nullity theorem outlined by
     Alama, J. The Rank+Nullity Theorem. Formalized Mathematics 15, (2007).
 
     """
     ann = test_degrees_of_freedom.annotation
     ann["data"] = matrix.degrees_of_freedom(model)
+    # Report the degrees of freedom scaled by the number of reactions.
+    ann["metric"] = ann["data"] / len(model.reactions)
     ann["message"] = wrapper.fill(
         """The degrees of freedom of the S-Matrix are {}.""".format(
             ann["data"]))
diff --git a/memote/suite/tests/test_sbml.py b/memote/suite/tests/test_sbml.py
@@ -39,6 +39,8 @@ def test_sbml_level(sbml_version):
         sbml_version[0], sbml_version[1])
     ann = test_sbml_level.annotation
     ann["data"] = version_tag
+    outcome = sbml_version[:2] >= (3, 1)
+    ann["metric"] = 1.0 - float(outcome)
     ann["message"] = wrapper.fill(
         """The SBML file uses: {}""".format(ann["data"]))
     assert sbml_version[:2] >= (3, 1), ann["message"]
@@ -64,6 +66,7 @@ def test_fbc_presence(sbml_version):
     fbc_present = sbml_version[2] is not None
     ann = test_fbc_presence.annotation
     ann["data"] = fbc_present
+    ann["metric"] = 1.0 - float(fbc_present)
     if fbc_present:
         ann["message"] = wrapper.fill("The FBC package *is* used.")
     else:

diff --git a/memote/support/basic.py b/memote/support/basic.py
@@ -23,6 +23,8 @@
 from itertools import combinations
 from pylru import lrudecorator
 
+from cobra.medium import find_external_compartment
+
 import memote.support.helpers as helpers
 from memote.support.gpr_helpers import find_top_level_complex
 from memote.utils import filter_none
@@ -529,3 +531,9 @@ def find_medium_metabolites(model):
     """Return the list of metabolites ingested/excreted by the model."""
     return [met.id for rxn in model.medium
             for met in model.reactions.get_by_id(rxn).metabolites]
+
+
+def find_external_metabolites(model):
+    """Return all metabolites in the external compartment."""
+    ex_comp = find_external_compartment(model)
+    return [met for met in model.metabolites if met.compartment == ex_comp]
diff --git a/memote/support/consistency_helpers.py b/memote/support/consistency_helpers.py
@@ -144,9 +144,9 @@ def rank(matrix, atol=1e-13, rtol=0):
     return int((sigma >= tol).sum())
 
 
-def nullspace(matrix, atol=1e-13, rtol=0.0):
+def nullspace(matrix, atol=1e-13, rtol=0.0):  # noqa: D402
     """
-    Compute an approximate basis for the nullspace of a matrix.
+    Compute an approximate basis for the null space (kernel) of a matrix.
 
     The algorithm used by this function is based on the singular value
     decomposition of the given matrix.