In [1]:
import json
import urllib.request
from pymongo import MongoClient
import csv
import requests
from pprint import pprint
import xmltodict

In [5]:
class parseRheaReactions(object):
    
    def __init__(self, ec_number=None):
        self.client = MongoClient()
        self.rheadDB = self.client.rheaDB
        self.rhea2ec_collection = self.rheadDB.rhea2EC
        self.ec_number = ec_number
        self.rhea_masters = self.ec2rhea_trlst()
        self.endpoint = 'ftp://ftp.ebi.ac.uk/pub/databases/rhea/'
        self.restEndpoint = 'http://www.rhea-db.org/rest/1.0/ws/reaction/cmlreact/'
        
    def get_rhea_ec_tsv_data(self):
        """
        retrieve ec rhea mapping
        return populate mongo collection with ec 2 rhea mapping 
        """
        self.rhea2ec_collection.delete_many({})
        ec_path = 'tsv/rhea2ec.tsv'    
        ec2rhea = urllib.request.urlretrieve(self.endpoint + ec_path)
        with open(ec2rhea[0]) as ec:
            csvfile = csv.reader(ec, delimiter="\t")
            fieldnames = next(csvfile)
            rheaECList = []
            for line in csvfile:
                rhea2ec = dict(zip(fieldnames, line))
                rheaECList.append(rhea2ec)
        insert = self.rhea2ec_collection.insert_many(rheaECList)
        return len(insert.inserted_ids)
    
    def ec2rhea_trlst(self):
        """
        input ec_number
        return list of rhea master ids 
        """
        cursor = self.rhea2ec_collection.find({'ID': self.ec_number})
        rheaMasterIDs = []
        for i in cursor:
            rheaMasterIDs.append(i['MASTER_ID'])
        return rheaMasterIDs

    def rhea2reaction(self):
        """
        input rhea master_id
        return parsed JSON reaction document
        """
        for id in self.rhea_masters: 
            url = self.restEndpoint + id
            r = requests.get(url=url)
            reactionDICT = xmltodict.parse(r.text)
            parsed = json.loads(json.dumps(reactionDICT))
                     
            parsed_reaction = {
            'rheaId': parsed['reaction']['@title'],
            'ec_number': self.ec_number,
            'reactants': [],
            'products': []
            }

            reactants = parsed['reaction']['reactantList']
  
            if isinstance(reactants['reactant'], dict):
                parsed_reaction['reactants'].append({
                    'name': reactants['reactant']['molecule']['name'],
                    'chebi': reactants['reactant']['molecule']['identifier']['@value']
                })
            else:
                for reactant in reactants['reactant']:
                    parsed_reaction['reactants'].append({
                    'name': reactant['molecule']['name'],
                    'chebi': reactant['molecule']['identifier']['@value']
                })
                    
            products = parsed['reaction']['productList']
  
            if isinstance(products['product'], dict):
                parsed_reaction['products'].append({
                    'name': products['product']['molecule']['name'],
                    'chebi': products['product']['molecule']['identifier']['@value']
                })
            else:
                for product in products['product']:
                    parsed_reaction['products'].append({
                    'name': product['molecule']['name'],
                    'chebi': product['molecule']['identifier']['@value']
                })
                    
            return parsed_reaction
        
        
            

In [6]:
rhea = parseRheaReactions(ec_number='4.1.99.1')

In [7]:
rhea.rhea2reaction()

{'ec_number': '4.1.99.1',
 'products': [{'chebi': 'CHEBI:16881', 'name': 'indole'},
  {'chebi': 'CHEBI:28938', 'name': 'NH4(+)'},
  {'chebi': 'CHEBI:15361', 'name': 'pyruvate'}],
 'reactants': [{'chebi': 'CHEBI:57912', 'name': 'L-tryptophan'},
  {'chebi': 'CHEBI:15377', 'name': 'H2O'}],
 'rheaId': 'RHEA:19553'}