Skip to content

Commit

Permalink
Merge pull request #546 from opencobra/fix-complex
Browse files Browse the repository at this point in the history
Fix GPR complex discovery
  • Loading branch information
Midnighter committed Dec 21, 2018
2 parents 15e650c + 11abdc4 commit 1fa2b04
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 52 deletions.
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ History

Next Release
------------
* Refactor the test for enzyme complexes to only return an estimated size.

0.8.9 (2018-12-11)
------------------
Expand Down
6 changes: 3 additions & 3 deletions memote/suite/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,10 @@ def test_protein_complex_presence(read_only_model):
This might also be a relevant metric for other organisms.
"""
ann = test_protein_complex_presence.annotation
ann["data"] = list(basic.find_protein_complexes(read_only_model))
ann["data"] = get_ids(basic.find_protein_complexes(read_only_model))
ann["message"] = wrapper.fill(
"""A total of {:d} protein complexes are defined through GPR rules in
the model.""".format(len(ann["data"])))
"""A total of {:d} reactions are catalyzed by complexes defined
through GPR rules in the model.""".format(len(ann["data"])))
assert len(ann["data"]) >= 1, ann["message"]


Expand Down
24 changes: 12 additions & 12 deletions memote/support/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pylru import lrudecorator

import memote.support.helpers as helpers
from memote.support.gpr_helpers import find_top_level_complex
from memote.utils import filter_none

__all__ = ("check_metabolites_formula_presence",)
Expand Down Expand Up @@ -191,7 +192,7 @@ def calculate_metabolic_coverage(model):

def find_protein_complexes(model):
"""
Find tuples of gene identifiers that comprise functional enzyme complexes.
Find reactions that are catalyzed by at least a heterodimer.
Parameters
----------
Expand All @@ -200,20 +201,19 @@ def find_protein_complexes(model):
Returns
-------
set
A set of tuples of genes that are connected via "AND" in
gene-protein-reaction rules (GPR) and thus constitute protein
complexes.
list
Reactions whose gene-protein-reaction association contains at least one
logical AND combining different gene products (heterodimer).
"""
protein_complexes = set()
complexes = []
for rxn in model.reactions:
if rxn.gene_reaction_rule:
for candidate in helpers.find_functional_units(
rxn.gene_reaction_rule):
if len(candidate) >= 2:
protein_complexes.add(tuple(candidate))
return protein_complexes
if not rxn.gene_reaction_rule:
continue
size = find_top_level_complex(rxn.gene_reaction_rule)
if size >= 2:
complexes.append(rxn)
return complexes


@lrudecorator(size=2)
Expand Down
130 changes: 130 additions & 0 deletions memote/support/gpr_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-

# Copyright 2016 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Helper classes and functions for analyzing GPR associations."""


from __future__ import absolute_import

import ast
import logging
import re

__all__ = ("find_top_level_complex",)


logger = logging.getLogger(__name__)
logical_and = re.compile(r"(and)|([&]{1,2})", flags=re.IGNORECASE)
logical_or = re.compile(r"(or)|([|]{1,2})", flags=re.IGNORECASE)
escape_chars = re.compile(r"[.-]")


class DummySet(object):
"""Define a faux set implementation."""

def add(self, value):
"""Do nothing on adding a value."""
pass


class GPRVisitor(ast.NodeVisitor):
"""
Implement a visitor that walks all nodes of an expression tree.
An abstract syntax tree (AST), generated using ``ast.parse`` on a
gene-protein-reaction (GPR) association, is traversed and the unique left
and right elements (genes) of the top level logical AND are recorded.
Due to using the ``ast`` module, precedence of Boolean operators and general
syntax requirements follow the Python standard.
Attributes
----------
left : set
After walking an AST, this attribute contains unique elements of the
left branch of the top level binary operator AND. May be empty.
right : set
After walking an AST, this attribute contains unique elements of the
right branch of the top level binary operator AND. May be empty.
Examples
--------
>>> import ast
>>> expression = ast.parse("g1 or (g2 and g3)")
>>> walker = GPRVisitor()
>>> walker.visit(expression)
>>> walker.left
{'g2'}
>>> walker.right
{'g3'}
"""

def __init__(self, **kwargs):
"""Initialize a GPR visitor."""
super(GPRVisitor, self).__init__(**kwargs)
self.left = set()
self.right = set()
self._current = DummySet()
self._is_top = True

def generic_visit(self, node):
logger.debug("%s", type(node).__name__)
super(GPRVisitor, self).generic_visit(node)

def visit_BoolOp(self, node):
"""Set up recording of elements with this hook."""
if self._is_top and isinstance(node.op, ast.And):
self._is_top = False
self._current = self.left
self.visit(node.values[0])
self._current = self.right
for successor in node.values[1:]:
self.visit(successor)
else:
self.generic_visit(node)

def visit_Name(self, node):
"""Record node names on the current branch."""
self._current.add(node.id)


def find_top_level_complex(gpr):
"""
Find unique elements of both branches of the top level logical AND.
Parameters
----------
gpr : str
The gene-protein-reaction association as a string.
Returns
-------
int
The size of the symmetric difference between the set of elements to
the left of the top level logical AND and the right set.
"""
logger.debug("%r", gpr)
conform = logical_and.sub("and", gpr)
conform = logical_or.sub("or", conform)
conform = escape_chars.sub("_", conform)
expression = ast.parse(conform)
walker = GPRVisitor()
walker.visit(expression)
return len(walker.left ^ walker.right)
24 changes: 0 additions & 24 deletions memote/support/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import numpy as np
import pandas as pd
from six import iteritems, itervalues
from sympy import expand
from importlib_resources import open_text
from cobra.exceptions import Infeasible
from cobra.medium.boundary_types import find_boundary_types
Expand Down Expand Up @@ -471,29 +470,6 @@ def find_interchange_biomass_reactions(model, biomass=None):
return boundary | transporters | biomass


def find_functional_units(gpr_str):
"""
Return an iterator of gene IDs grouped by boolean rules from the gpr_str.
The gpr_str is first transformed into an algebraic expression, replacing
the boolean operators 'or' with '+' and 'and' with '*'. Treating the
gene IDs as sympy.symbols this allows a mathematical expansion of the
algebraic expression. The expanded form is then split again producing sets
of gene IDs that in the gpr_str had an 'and' relationship.
Parameters
----------
gpr_str : string
A string consisting of gene ids and the boolean expressions 'and'
and 'or'.
"""
algebraic_form = re.sub('[Oo]r', '+', re.sub('[Aa]nd', '*', gpr_str))
expanded = str(expand(algebraic_form))
for unit in expanded.replace('+', ',').split(' , '):
yield unit.split('*')


def run_fba(model, rxn_id, direction="max", single_value=True):
"""
Return the solution of an FBA to a set objective function.
Expand Down
6 changes: 3 additions & 3 deletions tests/test_for_support/test_for_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,10 @@ def test_compartments_presence(model, boolean):
@pytest.mark.parametrize("model, num", [
("gpr_present", 0),
("gpr_missing", 0),
("gpr_present_complex", 4)
("gpr_present_complex", 3)
], indirect=["model"])
def test_enzyme_complex_presence(model, num):
"""Expect amount of enzyme complexes to be identified correctly."""
def test_find_protein_complexes(model, num):
"""Expect the number of reactions to be identified correctly."""
assert len(basic.find_protein_complexes(model)) == num


Expand Down
34 changes: 34 additions & 0 deletions tests/test_for_support/test_for_gpr_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-

# Copyright 2016 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Ensure the expected functioning of ``memote.support.gpr_helpers``."""

from __future__ import absolute_import

import pytest

from memote.support.gpr_helpers import find_top_level_complex


@pytest.mark.parametrize("gpr, expected", [
("gene1 and gene2", 2),
("gene1 or gene2", 0),
("gene1 and (gene2 or gene3)", 3)
])
def test_find_functional_units(gpr, expected):
"""Expect the size of the unique elements in a complex to be correct."""
assert find_top_level_complex(gpr) == expected
10 changes: 0 additions & 10 deletions tests/test_for_support/test_for_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,16 +481,6 @@ def test_find_transport_reactions(model, num):
assert len(helpers.find_transport_reactions(model)) == num


@pytest.mark.parametrize("gpr_str, expected", [
("gene1 and gene2", [["gene1", "gene2"]]),
("gene1 or gene2", [["gene1"], ["gene2"]]),
("gene1 and (gene2 or gene3)", [["gene1", "gene2"], ["gene1", "gene3"]])
])
def test_find_functional_units(gpr_str, expected):
"""Expect type of enzyme complexes to be identified correctly."""
assert list(helpers.find_functional_units(gpr_str)) == expected


@pytest.mark.parametrize("model, met_pair, expected", [
("converting_reactions", ("MNXM3", "MNXM7"), 2),
("converting_reactions", ("MNXM51", "MNXM51"), 1)
Expand Down

0 comments on commit 1fa2b04

Please sign in to comment.