In [1]:
#### Import some needed modules
import requests
import itertools
import sys
import pandas
import io

In [2]:
def query_biolink_phenotype_to_pathway(phenotype_id):
    """ Find all associated pathways with the input phenotype.
    
    API URL parameters "fetch_objects=false" and "use_compact_associations=true" must work together to get compact response otherwise 500 error code.
    
    API URL parameter "rows=1000" means 1000 associations will be returned. No idea what a optimal number is here.
    
    @param phenotype_id: a string ID starting with "HP:"
    @return: a dict representation of the json response
    """
    biolink_url = 'https://api.monarchinitiative.org/api/bioentity/phenotype/{phenotype_id}/pathways?fetch_objects=false&use_compact_associations=true&rows=1000'.format(phenotype_id=phenotype_id)
    try:
        res = requests.get(biolink_url, timeout=120)
    except requests.exceptions.Timeout:
        print('Timeout in QueryBioLink for URL: ' + biolink_url, file=sys.stderr)
        return None
    except KeyboardInterrupt:
        sys.exit(0)
    except BaseException as e:
        print('%s received in QueryBioLink for URL: %s' % (e, biolink_url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print('Status code ' + str(status_code) + ' for url: ' + biolink_url, file=sys.stderr)
        return None

    return res.json()

In [3]:
def parse_phenotype_to_pathway_json(json):
    """First we extract all `json["compact_associations"][*]["objects"]`, then flatten it into a long list. KEGG ids all have a prefix of "KEGG-path:map", like in "KEGG-path:maphsa00000", which however we need to get rid of. Then we get a list of ids starting with "hsa" but we need further exclude those starting with "hsa_M". The resulting list is then returned.
    
    @param json: the dict object from `query_biolink_phenotype_to_pathway`, which wraps a json object inside
    @return: a list of KEGG ids, in the form of "hsa00000"
    """
    
    # `object_ids` is a list of lists
    object_ids = [entry["objects"] for entry in json["compact_associations"]]

    # flatten into a long list
    object_ids = list(itertools.chain(*object_ids))
    
    # keep KEGG ids only
    kegg_object_ids = [_id for _id in object_ids if _id.startswith("KEGG-path:map")]
    
    # trim proceeding substring "KEGG-path:map" 
    kegg_object_ids = [_id.split("KEGG-path:map")[1] for _id in kegg_object_ids]
    
    # get rid of those starting with "hsa_M"; keep those of form "hsa00000"
    kegg_object_ids = [_id for _id in kegg_object_ids if not _id.startswith("hsa_M")]
    
    return kegg_object_ids

In [4]:
#### Retrieve tsv from drug-path to get pathway->drug associations
url="http://www.cuilab.cn/files/images/drugpath/data.txt"
res=requests.get(url).content
path_drug_df=pandas.read_csv(io.StringIO(res.decode('utf-8')), sep = "\t", names=['num', 'drug_name', 'pathway_id', 'pathway_name', 'C5', 'C6', 'C7', 'C8', 'C9'])

In [5]:
def phenotype_to_drug(phenotype_id):
    """
    This takes a phenotype id, queries biolink, parses the result, and then returns the drugs associated 
    with the resulting list of pathways as a pandas series of drug name strings.
    
    @param phenotype_id: a string ID starting with "HP:"
    @return: a pandas series containing strings of the associated drug names
    """
    json_res = query_biolink_phenotype_to_pathway(phenotype_id)
    pathway_ids = parse_phenotype_to_pathway_json(json_res)
    return path_drug_df[path_drug_df.pathway_id.isin(pathway_ids)]['drug_name'].drop_duplicates().reset_index(drop=True)

In [6]:
#### Input the HP curie id associated with the phenotype
drugs = phenotype_to_drug('HP:0012378')

In [7]:
#### Display the results
with pandas.option_context('display.max_rows', None):
    print(drugs)

0                                      orlistat
1                                fluorocurarine
2                                    moxonidine
3                                     terazosin
4                                alsterpaullone
5                                     ketorolac
6                                  0316684-0000
7                                 Prestwick-972
8                                    lincomycin
9                                  griseofulvin
10                                      dilazep
11                                     riluzole
12                             sulfamethoxazole
13                                  cloxacillin
14                               pentoxifylline
15                            meclofenamic acid
16                                     tiapride
17                                   tinidazole
18                                  guaifenesin
19                                   thiamazole
20                             lithochol