In [664]:
import pandas as pd
from io import StringIO
import requests
import lxml
import importlib
import json

# importlib.reload(cache)
from cache import cached_reqest
from cache import generic_cached_reqest

In [108]:
terms = ['FAD','FMN','flavoenzyme','flavin', 'flavoprotein']

In [109]:
def brenda_request(url):
    headers = {
      "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
      "X-Requested-With": "XMLHttpRequest"
    }
    response = cached_reqest(url, headers=headers)
    return response
    

In [299]:
def search_ligands_brenda(term):
    columns = ['Ligand','EC Number', 'Role', 'Id', 'Structure', 'Discard']
    url = f'https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]={term}&T[3]=2&nolimit=1'
    response = brenda_request(url)
    df = pd.read_csv(StringIO(response), sep='\t', names=columns)
    return df

In [300]:
def search_enzymes_brenda(term):
    columns = ['EC Number', 'Recommended Name', 'Synonyms', 'Commentary', 'Discard']
    url = f'https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]={term}&T[2]=2&nolimit=1'    
    response = brenda_request(url)
    df = pd.read_csv(StringIO(response), sep='\t', names=columns)
    return df['EC Number']

In [301]:
def brenda_get_enzyme_data(id):
    link = f'https://www.brenda-enzymes.info/enzyme.php?ecno={id}#NATURAL%20SUBSTRATE'
    response = brenda_request(link)
    return response

In [349]:
def search_all_terms(terms, search_fn):
    all_dfs = []

    for term in terms:
        df = search_fn(term)
        if (len(df) < 2):
            print(f'[!] skipping search for {term} since nothing was found')
            continue
        else:
            all_dfs.append(df)
        
        print(f'len of {term} in {search_fn} is:{len(df)}')
    return pd.concat(all_dfs)

In [351]:
(search_all_terms(terms,search_ligands_brenda))

Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]=FAD&T[3]=2&nolimit=1
len of FAD in <function search_ligands_brenda at 0x1243e79d8> is:720
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]=FMN&T[3]=2&nolimit=1
len of FMN in <function search_ligands_brenda at 0x1243e79d8> is:351
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]=flavoenzyme&T[3]=2&nolimit=1
[!] skipping search for flavoenzyme since nothing was found
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=13&RN=&RNV=1&os=1&pt=&FNV=1&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&V[1]=1&V[2]=2&W[3]=flavin&T[3]=2&nolimit=1
len of flavin in <function search_ligands_brenda at 0x1243e79d8> is:452
Get

Unnamed: 0,Ligand,EC Number,Role,Id,Structure,Discard
0,FAD,1.1.1.184,Inhibitor,20,CHEBI:16238,
1,FAD,1.1.1.189,Inhibitor,20,CHEBI:16238,
2,FAD,1.1.1.215,Cofactor,20,CHEBI:16238,
3,FAD,1.1.1.216,Activating Compound,20,CHEBI:16238,
4,FAD,1.1.1.28,Cofactor,20,CHEBI:16238,
...,...,...,...,...,...,...
87,semiquinone electron transferring flavoprotein,1.5.5.1,Substrate,109584,-,
88,hydroquinone electron transferring flavoprotein,1.5.5.1,Product,115821,-,
89,flavoprotein,1.14.14.1,Cofactor,135195,-,
90,reduced alphaR237K mutant electron transferrin...,1.5.8.2,Product,146007,-,


In [352]:
def get_all_ecs(terms):    
    enzymes_list = list(search_all_terms(terms,search_enzymes_brenda)['EC Number'])
    ligands_list = list(search_all_terms(terms,search_ligands_brenda)['EC Number'])

    ec_set = set(enzymes_list+ligands_list)
    print(len(ec_set))

    return ec_set

In [362]:
all_ecs = get_all_ecs(terms)

Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=FAD&T[2]=2&nolimit=1
len of FAD in <function search_enzymes_brenda at 0x1243e76a8> is:104
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=FMN&T[2]=2&nolimit=1
len of FMN in <function search_enzymes_brenda at 0x1243e76a8> is:41
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=flavoenzyme&T[2]=2&nolimit=1
[!] skipping search for flavoenzyme since nothing was found
Getting cached data...
https://www.brenda-enzymes.org/result_download.php?a=9&RN=&RNV=1&os=1&pt=&FNV=&tt=&SYN=&Textmining=&T[0]=2&T[1]=2&W[2]=flavin&T[2]=2&nolimit=1
len of flavin in <function search_enzymes_brenda at 0x1243e76a8> is:69
Getting cached data...
https://www.brenda-enzymes.org/result_download

In [13]:
rand_enzyme = results.sample(1)
id = rand_enzyme.iloc[0]['EC Number']

headers = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}




DATA_URL = f'https://www.brenda-enzymes.info/enzyme.php?ecno={id}#NATURAL%20SUBSTRATE'
html = lxml.html.parse(DATA_URL)
res = html.xpath('//table[@id=\"tab17_head\"]/tr')
if six.PY2:
    sarr = [etree.tostring(node) for node in res]
else:
    sarr = [etree.tostring(node).decode('utf-8') for node in res]
sarr = ''.join(sarr)
sarr = sarr.replace('<font color="red">*</font>', '')
sarr = f'<table>{sarr}</table>'
df = pd.read_html(StringIO(sarr))
df
# pd.read_html(io = f'https://www.brenda-enzymes.info/enzyme.php?ecno={id}#NATURAL%20SUBSTRATE', header=headers)
# pd.read_html(StringIO(brenda_get_enzyme_data(id)))

AttributeError: module 'lxml' has no attribute 'html'

# Trying to use SOAP

In [691]:
from zeep import Client
import hashlib

email = 'si485@dispostable.com'
password = 'si485@dispostable'

wsdl = "https://www.brenda-enzymes.org/soap/brenda_zeep.wsdl"
password = hashlib.sha256(password.encode("utf-8")).hexdigest()
client = Client(wsdl)

In [683]:
poster_child = '1.14.13.2'

In [682]:
def brendaSOAP(parameters, fn):
    # resp = client.service[fn](*parameters)
    resp = cache.generic_cached_reqest(request_name=f'brenda_{fn}', params=parameters, request_fn=client.service[fn])
    return resp

def getSynonyms(ecNumber):
    parameters = (email, password, f"ecNumber*{ecNumber}", 'organism*', 'synonyms*', 'commentary*', 'literature*')
    return brendaSOAP(parameters, 'getSynonyms')

def getReactions(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "reaction*", "commentary*", "literature*", "organism*")
    return brendaSOAP(parameters, 'getReaction')

def getSystematicName(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "systematicName*")
    return brendaSOAP(parameters, 'getSystematicName')

def getSubstrate(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "substrate*", "reactionPartners*", "ligandStructureId*")
    return brendaSOAP(parameters,'getSubstrate')

def getProduct(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "product*", "reactionPartners*", "ligandStructureId*")
    return brendaSOAP(parameters,'getProduct')

def getPdb(ecNumber):
    parameters = (email, password,f"ecNumber*{ecNumber}", "organism*", "pdb*")
    return brendaSOAP(parameters,'getPdb')