Skip to content

Commit

Permalink
Merge pull request #693 from opencobra/fix-find-biomass
Browse files Browse the repository at this point in the history
Fix find biomass
  • Loading branch information
Midnighter committed Aug 10, 2020
2 parents 4c6739e + c7a9b47 commit cdeb804
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 38 deletions.
11 changes: 4 additions & 7 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,17 @@ root = true
[*]
charset = utf-8
indent_style = space
indent_size = 2
indent_size = 4
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
max_line_length = 80

[*.{py,pyi}]
indent_size = 4

[*.{ini,cfg}]
indent_size = 4
[*.{json,yml}]
indent_size = 2

[*.{md,rst}]
trim_trailing_whitespace = false

[Makefile]
indent_style = tab
indent_size = 4
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ History

Next Release
------------
* Change the logic for identifying biomass reactions to be less eager.

0.11.0 (2020-06-26)
-------------------
Expand Down
2 changes: 1 addition & 1 deletion src/memote/experimental/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def read_tabular(filename, dtype_conversion=None):
filename, sep="\t", dtype=dtype_conversion, encoding="utf-8"
)
elif "xls" in ext or "xlsx" in ext:
df = pd.read_excel(filename, dtype=dtype_conversion, encoding="utf-8")
df = pd.read_excel(filename, dtype=dtype_conversion)
# TODO: Add a function to parse ODS data into a pandas data frame.
else:
raise ValueError("Unknown file format '{}'.".format(ext))
Expand Down
127 changes: 98 additions & 29 deletions src/memote/support/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,42 +277,74 @@ def find_converting_reactions(model, pair):
return frozenset(hits)


def filter_biomass(
component,
sbo_term,
buzzwords,
):
def filter_sbo_term(component, sbo_term):
"""
Return True if the component matches a biomass description.
Return true if the component is annotated with the given SBO term.
Parameters
----------
component : cobra.Reaction or cobra.Metabolite
Either a reaction or a metabolite instance.
sbo_term : str
The term for either biomass production or biomass.
buzzwords : collection of patterns
One or more regular expression patterns to match against the name or
identifier of the component.
"""
return component.annotation.get("sbo", "") == sbo_term


def filter_match_name(component, buzzwords):
"""
Return whether the component's name matches a biomass description.
Notes
-----
Regex patterns are necessary here to prevent, for example, 'non-growth' from
matching.
Parameters
----------
component : cobra.Reaction or cobra.Metabolite
Either a reaction or a metabolite instance.
buzzwords : collection of regex patterns
One or more regular expression patterns to match against the name of the
component.
Returns
-------
bool
True if there was any match at all.
"""
if component.annotation is not None and 'sbo' in component.annotation and \
component.annotation['sbo'] == sbo_term:
return True
if component.name is not None:
name = component.name.lower()
if any(b.match(name) for b in buzzwords):
return True
if component.id is not None:
identifier = component.id.lower()
if any(b.match(identifier) for b in buzzwords):
return True
return False
if component.name is None:
return False
name = component.name.lower()
return any(b.match(name) for b in buzzwords)


def filter_identifier(component, buzzwords):
"""
Return whether the component's identifier contains a biomass description.
Notes
-----
We check substring presence here because identifiers are often prefixed with
``M_`` or ``R_``.
Parameters
----------
component : cobra.Reaction or cobra.Metabolite
Either a reaction or a metabolite instance.
buzzwords : iterable of str
One or more buzzwords that the identifier should contain.
Returns
-------
bool
True if there was any match at all.
"""
identifier = component.id.lower()
return any(b in identifier for b in buzzwords)


@lrudecorator(size=2)
Expand Down Expand Up @@ -342,22 +374,59 @@ def find_biomass_reaction(model):
Identified biomass reactions (if any).
"""
# Patterns are necessary here to prevent, for example, 'non-growth' from
# matching.
buzzwords = (
boundary = frozenset(model.boundary)

# 1.
candidates = {
r for r in model.reactions
if filter_sbo_term(r, 'SBO:0000629')
}
candidates.difference_update(boundary)
if candidates:
return sorted(candidates, key=attrgetter('id'))

# 2.
name_buzzwords = (
re.compile(r'\bbiomass'), re.compile(r'\bgrowth'), re.compile(r'bof')
)
id_buzzwords = ('biomass',)
candidates = {
r for r in model.reactions
if filter_biomass(r, 'SBO:0000629', buzzwords)
if filter_match_name(r, name_buzzwords) or
filter_identifier(r, id_buzzwords)
}
candidates.difference_update(boundary)
if candidates:
return sorted(candidates, key=attrgetter('id'))

# 3.
name_buzzwords = (re.compile(r'\bbiomass'),)
id_buzzwords = ('biomass',)
sbo_metabolites = {
m for m in model.metabolites
if filter_sbo_term(m, 'SBO:0000649')
}
buzzwords = (re.compile(r'\bbiomass'),)
metabolites = {
m for m in model.metabolites
if filter_biomass(m, 'SBO:0000649', buzzwords)
if filter_match_name(m, name_buzzwords) or
filter_identifier(m, id_buzzwords)
}
candidates.update({r for m in metabolites for r in m.reactions})
return sorted(candidates.difference(model.boundary), key=attrgetter('id'))
# Many metabolites may match 'SBO:0000649', we filter those further by name
# and ID.
sbo_metabolites.intersection_update(metabolites)
if sbo_metabolites:
candidates = {r for m in sbo_metabolites for r in m.reactions}
candidates.difference_update(boundary)
if candidates:
return sorted(candidates, key=attrgetter('id'))

# 4.
candidates = {r for m in metabolites for r in m.reactions}
candidates.difference_update(boundary)
if candidates:
return sorted(candidates, key=attrgetter('id'))

return []


@lrudecorator(size=2)
Expand Down
24 changes: 23 additions & 1 deletion tests/test_for_support/test_for_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,12 +456,33 @@ def biomass_sbo(base):

@register_with(MODEL_REGISTRY)
def biomass_metabolite(base):
"""Provide a model with a reaction that will be identified as biomass."""
"""Provide a model with a metabolite that will be identified as biomass."""
a = cobra.Metabolite("Protein_c", compartment="c")
b = cobra.Metabolite("DNA_c", compartment="c")
c = cobra.Metabolite("RNA_c", compartment="c")
d = cobra.Metabolite("GAM_c", compartment="c")
e = cobra.Metabolite("Biomass_c", compartment="c")
rxn1 = cobra.Reaction("R0001")
rxn1.add_metabolites({a: -1, b: -1, c: -1, d: -1, e: 1})
rxn2 = cobra.Reaction("EX_Biomass")
rxn2.add_metabolites({e: -1})
base.add_reactions([rxn1, rxn2])
return base


@register_with(MODEL_REGISTRY)
def biomass_sbo_metabolite(base):
"""Provide a model with a metabolite that will be identified as biomass."""
a = cobra.Metabolite("Protein_c", compartment="c")
a.annotation["sbo"] = "SBO:0000649"
b = cobra.Metabolite("DNA_c", compartment="c")
b.annotation["sbo"] = "SBO:0000649"
c = cobra.Metabolite("RNA_c", compartment="c")
c.annotation["sbo"] = "SBO:0000649"
d = cobra.Metabolite("GAM_c", compartment="c")
d.annotation["sbo"] = "SBO:0000649"
e = cobra.Metabolite("Biomass_c", compartment="c")
e.annotation["sbo"] = "SBO:0000649"
rxn1 = cobra.Reaction("R0001")
rxn1.add_metabolites({a: -1, b: -1, c: -1, d: -1, e: 1})
rxn2 = cobra.Reaction("EX_Biomass")
Expand Down Expand Up @@ -653,6 +674,7 @@ def test_largest_compartment_id_met(model, expected):
("biomass_buzzwords", 1),
("biomass_sbo", 1),
("biomass_metabolite", 1),
("biomass_sbo_metabolite", 1),
], indirect=["model"])
def test_find_biomass_reaction(model, expected):
"""
Expand Down

0 comments on commit cdeb804

Please sign in to comment.