## Use BioThings_client to get target protein info from drugbank


In [1]:
import biothings_client

In [2]:
# use the mychem client
mc = biothings_client.get_client('drug')

In [22]:
# get all protein targets from mychem.info
data = mc.query('_exists_:drugbank.targets.uniprot', fields='drugbank.targets.uniprot', fetch_all=True)

In [23]:
results = list(data)

Fetching 6527 drug(s) . . .
No results to return


**Results**: Total number of drugs in MyChem.info containing target information is 6527

In [24]:
uniprot_list = []
for _doc in results:
    # handle cases where one drug has multiple targets, which are organized as a list
    if type(_doc['drugbank']['targets']) == list:
        for _target in _doc['drugbank']['targets']:
            uniprot_list.append(_target['uniprot'])
    # handle cases where one drug has only one target
    else:
        uniprot_list.append(_doc['drugbank']['targets']['uniprot'])
print(len(uniprot_list))
            

14694


In [25]:
# remove duplicate uniprot ids
uniprot_list = list(set(uniprot_list))
print(len(uniprot_list))

3970


**Results**: Total number of drug targets in Drugbank is 3970

## Use BioThings_client to get all NCBI Gene IDs from Uniprot IDs

In [26]:
mg = biothings_client.get_client('gene')

In [33]:
gene_data = mg.querymany(uniprot_list, scopes='uniprot', fields='entrezgene', species='human')

querying 1-1000...done.
querying 1001-2000...done.
querying 2001-3000...done.
querying 3001-3970...done.
Finished.
24 input query terms found dup hits:
	[('P54278', 2), ('P50391', 3), ('P01764', 2), ('P06310', 2), ('P35520', 2), ('P69905', 2), ('P04745'
1780 input query terms found no hit:
	['P55038', 'P62661', 'Q5KUI3', 'O68874', 'P83798', 'P27302', 'P12564', 'Q05486', 'P0C1V1', 'P17169',
Pass "returnall=True" to return complete lists of duplicate or missing query terms.


In [34]:
gene_data[1]


{'_id': '5105', '_score': 21.649397, 'entrezgene': 5105, 'query': 'P35558'}

In [35]:
entrezgene_list = []
for _doc in gene_data:
    if 'entrezgene' in _doc:
        entrezgene_list.append(_doc['entrezgene'])
    else:
        print(_doc)

{'query': 'P55038', 'notfound': True}
{'query': 'P62661', 'notfound': True}
{'query': 'Q5KUI3', 'notfound': True}
{'query': 'O68874', 'notfound': True}
{'query': 'P83798', 'notfound': True}
{'query': 'P27302', 'notfound': True}
{'query': 'P12564', 'notfound': True}
{'query': 'Q05486', 'notfound': True}
{'query': 'P0C1V1', 'notfound': True}
{'query': 'P17169', 'notfound': True}
{'query': 'P26396', 'notfound': True}
{'query': 'Q5G940', 'notfound': True}
{'query': 'P01744', 'notfound': True}
{'query': 'P10340', 'notfound': True}
{'query': 'P41020', 'notfound': True}
{'query': 'Q9AGP8', 'notfound': True}
{'query': 'P28593', 'notfound': True}
{'_id': 'ENSG00000243910', '_score': 21.642801, 'query': 'Q9H853'}
{'query': 'P36946', 'notfound': True}
{'query': 'Q9N587', 'notfound': True}
{'query': 'P22364', 'notfound': True}
{'query': 'O52806', 'notfound': True}
{'query': 'P42216', 'notfound': True}
{'query': 'Q8V397', 'notfound': True}
{'query': 'P00861', 'notfound': True}
{'query': 'Q9WZ57', '

In [36]:
print(len(entrezgene_list))

2201


**Results**: Total number of NCBI Gene IDs returned from search is 2201