In [695]:
import pandas as pd
from io import StringIO
import requests
import lxml
import importlib

from cache import cached_reqest
from cache import generic_cached_reqest

Add the terms to search for on Brenda's website here:

In [696]:
terms = ['FAD','FMN','flavoenzyme','flavin', 'flavoprotein']

In [697]:
def brenda_request(url):
    headers = {
      "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
      "X-Requested-With": "XMLHttpRequest"
    }
    response = cached_reqest(url, headers=headers)
    return response
    

In [698]:
def search_ligands_brenda(term):
    columns = ['Ligand','EC Number', 'Role', 'Id', 'Structure', 'Discard']
    url = f'https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]={term}&T[3]=2&nolimit=1'
    response = brenda_request(url)
    df = pd.read_csv(StringIO(response), sep='\t', names=columns)
    return df

In [714]:
def search_enzymes_brenda(term):
    columns = ['EC Number', 'Recommended Name', 'Synonyms', 'Commentary', 'Discard']
    url = f'https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]={term}&T[2]=2&nolimit=1'    
    response = brenda_request(url)
    df = pd.read_csv(StringIO(response), sep='\t', names=columns)
    return df

In [715]:
def brenda_get_enzyme_data(id):
    link = f'https://www.brenda-enzymes.info/enzyme.php?ecno={id}#NATURAL%20SUBSTRATE'
    response = brenda_request(link)
    return response

In [716]:
def search_all_terms(terms, search_fn):
    all_dfs = []

    for term in terms:
        df = search_fn(term)
        if (len(df) < 2):
            print(f'[!] skipping search for {term} since nothing was found')
            continue
        else:
            all_dfs.append(df)
        
        print(f'len of {term} in {search_fn} is:{len(df)}')
    return pd.concat(all_dfs)

In [735]:
def get_all_ecs(terms):
    
    enzymes_list = set(search_all_terms(terms,search_enzymes_brenda)['EC Number'])
    ligands_list = set(search_all_terms(terms,search_ligands_brenda)['EC Number'])

    ec_set = enzymes_list|ligands_list
    print(f'total ecs found: {len(ec_set)}')

    return ec_set

# SOAP helper 

In [691]:
from zeep import Client
import hashlib

email = 'si485@dispostable.com'
password = 'si485@dispostable'

wsdl = "https://www.brenda-enzymes.org/soap/brenda_zeep.wsdl"
password = hashlib.sha256(password.encode("utf-8")).hexdigest()
client = Client(wsdl)

In [683]:
poster_child = '1.14.13.2'

In [682]:
def brendaSOAP(parameters, fn):
    # resp = client.service[fn](*parameters)
    resp = cache.generic_cached_reqest(request_name=f'brenda_{fn}', params=parameters, request_fn=client.service[fn])
    return resp

def getSynonyms(ecNumber):
    parameters = (email, password, f"ecNumber*{ecNumber}", 'organism*', 'synonyms*', 'commentary*', 'literature*')
    return brendaSOAP(parameters, 'getSynonyms')

def getReactions(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "reaction*", "commentary*", "literature*", "organism*")
    return brendaSOAP(parameters, 'getReaction')

def getSystematicName(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "systematicName*")
    return brendaSOAP(parameters, 'getSystematicName')

def getSubstrate(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "substrate*", "reactionPartners*", "ligandStructureId*")
    return brendaSOAP(parameters,'getSubstrate')

def getProduct(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "product*", "reactionPartners*", "ligandStructureId*")
    return brendaSOAP(parameters,'getProduct')

def getPdb(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "pdb*")
    return brendaSOAP(parameters,'getPdb')

In [738]:
all_ecs = get_all_ecs(terms)

Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=FAD&T[2]=2&nolimit=1
len of FAD in <function search_enzymes_brenda at 0x1253c36a8> is:104
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=FMN&T[2]=2&nolimit=1
len of FMN in <function search_enzymes_brenda at 0x1253c36a8> is:41
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=flavoenzyme&T[2]=2&nolimit=1
[!] skipping search for flavoenzyme since nothing was found
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=flavin&T[2]=2&nolimit=1
len of flavin in <function search_enzymes_brenda at 0x1253c36a8> is:69
Getting cached data...
https://www.brenda-enzymes.org/result_download

In [740]:
getReactions( poster_child)

getSynonyms( poster_child)

getReactions( poster_child)

getSystematicName( poster_child)

getSubstrate( poster_child)

getProduct( poster_child)

getPdb( poster_child)

Making a request for new data...
brenda_getReaction, params: ["si485@dispostable.com", "68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7", "ecNumber*1.14.13.2", "reaction*", "commentary*", "literature*", "organism*"]


[{
     'reaction': '4-hydroxybenzoate + NADPH + H+ + O2 = 3,4-dihydroxybenzoate + NADP+ + H2O',
     'commentary': 'bi uni uni uni ping-pong mechanism',
     'literature': [
         390023
     ],
     'organism': 'Pseudomonas fluorescens',
     'ecNumber': '1.14.13.2'
 },
 {
     'reaction': '4-hydroxybenzoate + NADPH + H+ + O2 = 3,4-dihydroxybenzoate + NADP+ + H2O',
     'commentary': 'rate of formation of the flavin hydroperoxide is not influenced by pH-change. Rate of hydroxylation reaction increases with pH. The H-bond network abstracts the phenolic proton from p-hydroxybenzoate in the transition state of oxygen transfer. Product deprotonation enhances the rate of a specific conformational change required for both product relase and the elimination of water',
     'literature': [
         658045
     ],
     'organism': 'Pseudomonas aeruginosa',
     'ecNumber': '1.14.13.2'
 },
 {
     'reaction': '4-hydroxybenzoate + NADPH + H+ + O2 = 3,4-dihydroxybenzoate + NADP+ + H2O',
     

In [None]:
brenda_enzymes = [
    {
        'SYSNAME': "...", # should this be an array?
        'REACTIONS': [{reaction},],
        'NAME': ['...',],
        'SUBSTRATE': [{substrate},],
        'PRODUCT': [{product},],
        'PDB': [{pdb}]
    },
]

In [747]:
sorted(list(all_ecs))[0]

'1.1.1.1'

In [772]:
def create_brenda_ec_entry(ec):
    return {
        'SYSNAME': getSystematicName(ec),
        'REACTIONS': getReactions(ec),
        'NAME': getSynonyms(ec),
        'SUBSTRATE': getSubstrate(ec),
        'PRODUCT': getProduct(ec),
        'PDB': getPdb(ec)
    }
     

In [782]:
def create_brenda_db(list_of_ecs):
    db = {}
    for index, ec in enumerate(list_of_ecs):
        try:
            entry = create_brenda_ec_entry(ec)
            sysnames = entry['SYSNAME']
            entry_name = sorted(sysnames)[0]['systematicName']
            db[entry_name] = entry
        except:
            print(f"couldn't fetch info for {ec}")
            
        print(f'----> Progress: {round(index/len(db)*100)}% done')
    return db

In [783]:
short_list = sorted(list(all_ecs))[:10]

In [784]:
short_db = create_brenda_db(short_list)

Getting cached data...
brenda_getSystematicName?68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7=68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7&ecNumber*1.1.1.1=ecNumber*1.1.1.1&organism*=organism*&si485@dispostable.com=si485@dispostable.com&systematicName*=systematicName*
Getting cached data...
brenda_getReaction?68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7=68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7&commentary*=commentary*&ecNumber*1.1.1.1=ecNumber*1.1.1.1&literature*=literature*&organism*=organism*&reaction*=reaction*&si485@dispostable.com=si485@dispostable.com
Getting cached data...
brenda_getSynonyms?68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7=68a8b3ecc5cecaa6b4bf94e0c89ff02d5d77366ca90f1774aae28e37f3b202a7&commentary*=commentary*&ecNumber*1.1.1.1=ecNumber*1.1.1.1&literature*=literature*&organism*=organism*&si485@dispostable.com=si485@dispostable.com&synonyms*=synonyms*
Getting cached d

In [792]:
# check if there are ever more then two sysnames
[short_db[ez_name]['SYSNAME'][0]['ecNumber'] for ez_name in short_db.keys() if len(short_db[ez_name]['SYSNAME']) > 1]

[]