## Use BioThings_client to get drug metabolism enzymes info from drugbank

In [1]:
import biothings_client

In [2]:
# use the mychem client
mc = biothings_client.get_client('drug')

In [3]:
# get all protein targets from mychem.info
data = mc.query('_exists_:drugbank.enzymes.uniprot', fields='drugbank.enzymes.uniprot', fetch_all=True)

In [4]:
results = list(data)

Fetching 1075 drug(s) . . .
No results to return


**Results**: Total number of drugs in MyChem.info containing target information is 6527

In [6]:
uniprot_list = []
for _doc in results:
    # handle cases where one drug has multiple targets, which are organized as a list
    if type(_doc['drugbank']['enzymes']) == list:
        for _enzyme in _doc['drugbank']['enzymes']:
            uniprot_list.append(_enzyme['uniprot'])
    # handle cases where one drug has only one target
    else:
        uniprot_list.append(_doc['drugbank']['enzymes']['uniprot'])
print(len(uniprot_list))
            

3511


In [7]:
# remove duplicate uniprot ids
uniprot_list = list(set(uniprot_list))
print(len(uniprot_list))

219


## Use BioThings_client to get all NCBI Gene IDs from Uniprot IDs

In [8]:
mg = biothings_client.get_client('gene')

In [9]:
gene_data = mg.querymany(uniprot_list, scopes='uniprot', fields='entrezgene', species='human')

querying 1-219...done.
Finished.
2 input query terms found dup hits:
	[('P22392', 2), ('P24462', 2)]
26 input query terms found no hit:
	['Q9TRC7', 'P00808', 'P00484', 'A7BK78', 'P04167', 'P0A5N0', 'P00551', 'Q56148', 'P0A5L8', 'P04800',
Pass "returnall=True" to return complete lists of duplicate or missing query terms.


In [10]:
entrezgene_list = []
for _doc in gene_data:
    if 'entrezgene' in _doc:
        entrezgene_list.append(_doc['entrezgene'])
    else:
        print(_doc)

{'notfound': True, 'query': 'Q9TRC7'}
{'notfound': True, 'query': 'P00808'}
{'notfound': True, 'query': 'P00484'}
{'notfound': True, 'query': 'A7BK78'}
{'notfound': True, 'query': 'P04167'}
{'notfound': True, 'query': 'P0A5N0'}
{'notfound': True, 'query': 'P00551'}
{'notfound': True, 'query': 'Q56148'}
{'notfound': True, 'query': 'P0A5L8'}
{'notfound': True, 'query': 'P04800'}
{'notfound': True, 'query': 'Q9FAW5'}
{'notfound': True, 'query': 'P00953'}
{'notfound': True, 'query': 'O08355'}
{'notfound': True, 'query': 'P11711'}
{'notfound': True, 'query': 'P50224'}
{'notfound': True, 'query': 'P26841'}
{'notfound': True, 'query': 'P15149'}
{'notfound': True, 'query': 'P05183'}
{'notfound': True, 'query': 'P05057'}
{'notfound': True, 'query': 'B6A7R5'}
{'notfound': True, 'query': 'P00176'}
{'notfound': True, 'query': 'P05184'}
{'notfound': True, 'query': 'Q14097'}
{'notfound': True, 'query': 'P12790'}
{'notfound': True, 'query': 'Q6LAP9'}
{'notfound': True, 'query': 'P14489'}


In [11]:
print(len(entrezgene_list))

195


**Results**: Total number of NCBI Gene IDs returned from search is 195