In [1]:
from jpype import *
from pprint import pprint
import csv

In [2]:
class ParseBioPax(object):
    def __init__(self, biopax_path=None, ec2rhea_path=None, download_files=False):
        """
        Obtain and instantiate rhea biopax file for to facilitate parsing reactions into python dictionaries
        
        :param biopax_path: local path to bio_pax owl file
        :param ec2rhea_path: local path to ec_numer rhea mapping file
        :param download_files: set to true if you want to check for a new version or don't have the files locally
        :return: paxtools object for parsing reactions
        """
        self.biopax_path = biopax_path
        self.ec2rhea_path = ec2rhea_path
        self.model = self.get_biopax_model()
        self.rhea2ec_map = {}
        self.ec_2_rhea_directional_mapping()
        
        if download_files:
            self.download_ftp_files()
        
    def download_ftp_files(self):        
        biopaxFTP = 'ftp://ftp.ebi.ac.uk/pub/databases/rhea/biopax/rhea-biopax_lite.owl.gz'
        urllib.request.urlretrieve(biopaxFTP, self.biopax_path)
        ec2rheaFTP = 'ftp://ftp.ebi.ac.uk/pub/databases/rhea/tsv/ec-rhea-dir.tsv'
        urllib.request.urlretrieve(ec2rheaFTP, self.ec2rhea_path)
                    
    def ec_2_rhea_directional_mapping(self):
        """
        returns dictionary with rhea key and ec value
        """
        ecmap = {}
        with open(self.ec2rhea_path, 'r') as ec2r:
            csvfile = csv.reader(ec2r, delimiter="\t")
            for line in csvfile:
                self.rhea2ec_map[line[1]] = line[2]
     
    def get_biopax_model(self):
        startJVM(getDefaultJVMPath(), "-ea", "-Xmx1g", "-Djava.class.path=paxtools-5.0.1.jar")
        paxPkg = JPackage("org.biopax.paxtools")
        javaIO = JPackage("java.io")
        io = paxPkg.io.SimpleIOHandler(paxPkg.model.BioPAXLevel.L2)
        fileIS = javaIO.FileInputStream(self.biopax_path)
        return io.convertFromOWL(fileIS)
    
    def get_reaction_by_rhea(self, rheaID):
        """
        input: rhea master Id
        return: reaction element
        """
        rhea_uri = "http://identifiers.org/rhea/{}".format(rheaID)
        return self.model.getByID(rhea_uri)

In [3]:
class ParseReaction(object):
    def __init__(self, bpmodel, rxn, ec):
        self.model = bpmodel
        self.rxn = rxn
        self.ec = ec
        self.parsed_rxn = {
            'rhea_id': self.get_rxn_rheaID(),
            'ec_number': self.ec,
            'name': self.get_rxn_name(),
            'lefts': self.get_rxn_participants('LEFT'),
            'rights': self.get_rxn_participants('RIGHT'),
            'direction': self.get_rxn_direction()
        }
        
    def get_rxn_rheaID(self):
        rhea_uri = self.rxn.getUri()
        return rhea_uri.split('/')[-1]
    
    def get_rxn_name(self):
        return self.rxn.getNAME()
    
    def get_rxn_participants(self, direction):
        if direction == 'LEFT':
            participants = self.rxn.getLEFT()
        if direction == 'RIGHT':
            participants = self.rxn.getRIGHT()  
        parts_list = []
        for part in participants:
            pe = part.getPHYSICAL_ENTITY()
            peName = pe.getNAME()
            peID = pe.xREF.toString()
            peID = peID.replace('CHEBI:', '').lstrip('[').rstrip(']')
            parts_list.append({peName : peID})
        return parts_list
    
    def get_rxn_direction(self):
        comment = rxn.getCOMMENT()
        comment_array = comment.toArray()
        direction = None
        for com in comment_array:
            if 'RHEA:Direction' in com:
                com = com.split('=')
                direction = com[1]
        return direction


    
    
    

In [4]:
bp_obj = ParseBioPax(biopax_path='data/rhea-biopax_lite.owl', ec2rhea_path='data/ec-rhea-dir.tsv')

In [5]:
rhea2ec = bp_obj.rhea2ec_map

In [6]:
count = 0 
for rhea, ec in rhea2ec.items():
    count += 1
    if count < 100:
        rxn = bp_obj.get_reaction_by_rhea(rheaID=rhea)
        prxn = ParseReaction(bpmodel=bp_obj.model, rxn=rxn, ec=ec)
        pprint(prxn.parsed_rxn)
     


{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'(1S,2S,4R)-endo-fenchol': 'ChEBI:15405'},
           {'NADP(+)': 'ChEBI:58349'}],
 'name': '(1S,2S,4R)-endo-fenchol + NADP(+) => (1S,4R)-fenchone + H(+) + NADPH',
 'rhea_id': '32336',
 'rights': [{'NADPH': 'ChEBI:57783'},
            {'H(+)': 'ChEBI:15378'},
            {'(1S,4R)-fenchone': 'ChEBI:165'}]}
{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'D-glyceraldehyde 3-phosphate': 'ChEBI:59776'},
           {'D-sedoheptulose 7-phosphate': 'ChEBI:57483'}],
 'name': 'D-glyceraldehyde 3-phosphate + D-sedoheptulose 7-phosphate => '
         'D-ribose 5-phosphate + D-xylulose 5-phosphate',
 'rhea_id': '10509',
 'rights': [{'D-ribose 5-phosphate': 'ChEBI:58273'},
            {'D-xylulose 5-phosphate': 'ChEBI:57737'}]}
{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'H2O': 'ChEBI:15377'},
           {'dimethylamine': 'ChEBI:58040'},
           {'oxidized [electron-transfer flavoprotein]': 'ChEBI:5769

            {'diphosphate': 'ChEBI:33019'},
            {'4-coumaroyl-CoA': 'ChEBI:57355'}]}
{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'1-(5-phospho-beta-D-ribosyl)-ATP': 'ChEBI:73183'},
           {'diphosphate': 'ChEBI:33019'}],
 'name': '1-(5-phospho-beta-D-ribosyl)-ATP + diphosphate => '
         '5-phospho-alpha-D-ribose 1-diphosphate + ATP',
 'rhea_id': '18474',
 'rights': [{'5-phospho-alpha-D-ribose 1-diphosphate': 'ChEBI:58017'},
            {'ATP': 'ChEBI:30616'}]}
{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'NADP(+)': 'ChEBI:58349'},
           {'3alpha-hydroxyglycyrrhetinate': 'ChEBI:57729'}],
 'name': '3alpha-hydroxyglycyrrhetinate + NADP(+) => 3-oxoglycyrrhetinate + '
         'H(+) + NADPH',
 'rhea_id': '20817',
 'rights': [{'3-oxoglycyrrhetinate': 'ChEBI:57760'},
            {'H(+)': 'ChEBI:15378'},
            {'NADPH': 'ChEBI:57783'}]}
{'direction': 'left to right',
 'ec_number': '1',
 'lefts': [{'acetyl-CoA': 'ChEBI:57288'},
     