In [1]:
#### Import some needed modules
import requests
import itertools
import sys
import pandas
import io
import json
import numpy

In [2]:
def query_biolink_phenotype_to_pathway(phenotype_id):
    """ Find all associated pathways with the input phenotype.
    
    API URL parameters "fetch_objects=false" and "use_compact_associations=true" must work together to get compact response otherwise 500 error code.
    
    API URL parameter "rows=1000" means 1000 associations will be returned. No idea what a optimal number is here.
    
    @param phenotype_id: a string ID starting with "HP:"
    @return: a dict representation of the json response
    """
    biolink_url = 'https://api.monarchinitiative.org/api/bioentity/phenotype/{phenotype_id}/pathways?fetch_objects=false&use_compact_associations=true&rows=1000'.format(phenotype_id=phenotype_id)
    try:
        res = requests.get(biolink_url, timeout=120)
    except requests.exceptions.Timeout:
        print('Timeout in QueryBioLink for URL: ' + biolink_url, file=sys.stderr)
        return None
    except KeyboardInterrupt:
        sys.exit(0)
    except BaseException as e:
        print('%s received in QueryBioLink for URL: %s' % (e, biolink_url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print('Status code ' + str(status_code) + ' for url: ' + biolink_url, file=sys.stderr)
        return None

    return res.json()

In [3]:
def parse_phenotype_to_pathway_json(json):
    """First we extract all `json["compact_associations"][*]["objects"]`, then flatten it into a long list. KEGG ids all have a prefix of "KEGG-path:map", like in "KEGG-path:maphsa00000", which however we need to get rid of. Then we get a list of ids starting with "hsa" but we need further exclude those starting with "hsa_M". The resulting list is then returned.
    
    @param json: the dict object from `query_biolink_phenotype_to_pathway`, which wraps a json object inside
    @return: a list of KEGG ids, in the form of "hsa00000"
    """
    
    # `object_ids` is a list of lists
    object_ids = [entry["objects"] for entry in json["compact_associations"]]

    # flatten into a long list
    object_ids = list(itertools.chain(*object_ids))
    
    # keep reactome ids only
    react_object_ids = [_id for _id in object_ids if _id.startswith("REACT:")]
    
    return react_object_ids

In [4]:
def phenotype_to_pathway(phenotype_id):
    """
    This takes a phenotype id, queries biolink, parses the result, and then returns the drugs associated 
    with the resulting list of pathways as a pandas series of drug name strings.
    
    @param phenotype_id: a string ID starting with "HP:"
    @return: a pandas series containing strings of the associated drug names
    """
    json_res = query_biolink_phenotype_to_pathway(phenotype_id)
    pathway_ids = parse_phenotype_to_pathway_json(json_res)
    return pathway_ids

In [5]:
#### Input the HP curie id associated with the phenotype
pathway_ids = phenotype_to_pathway('HP:0012378')

In [6]:
#### Set the base URL for the reasoner and its endpoint
API_BASE_URL = 'https://rtx.ncats.io/api/rtx/v1'
url_str = API_BASE_URL + "/query"

In [7]:
response_dicts=[]
for pathway_id in pathway_ids:
  #### Create a dict of the request, specifying the the query type to Q26
  #### Set protein to the curie id of the desired pathway and run.
  request = { "query_type_id": "Q26", "terms": { "pathway": pathway_id } }
  #### Send the request to RTX and check the status
  response_content = requests.post(url_str, json=request, headers={'accept': 'application/json'})
  status_code = response_content.status_code
  if status_code == 200:
    response_dict = response_content.json()
    if "result_list" in response_dict:
      response_dicts.append(response_dict)

In [8]:
#### Display the summary table of the results
print("\t".join(response_dicts[0]["table_column_names"]))
for response_dict in response_dicts:
  for result in response_dict["result_list"]:
    print("\t".join(result["row_data"]))
    
#### NOTE: Some rows below appear as duplicates in this summary table but they correspond to different paths in the KG.

source name	source ID	target name	target ID	Jaccard index
Neutrophil degranulation	REACT:R-HSA-6798695	dalfampridine	CHEMBL.COMPOUND:CHEMBL284348	0.011641
Neutrophil degranulation	REACT:R-HSA-6798695	bortezomib	CHEMBL.COMPOUND:CHEMBL325041	0.011625
Neutrophil degranulation	REACT:R-HSA-6798695	ixazomib citrate	CHEMBL.COMPOUND:CHEMBL3545432	0.011618
Neutrophil degranulation	REACT:R-HSA-6798695	carfilzomib	CHEMBL.COMPOUND:CHEMBL451887	0.010765
Neutrophil degranulation	REACT:R-HSA-6798695	vandetanib	CHEMBL.COMPOUND:CHEMBL24828	0.007826
Neutrophil degranulation	REACT:R-HSA-6798695	lacosamide	CHEMBL.COMPOUND:CHEMBL58323	0.007558
Neutrophil degranulation	REACT:R-HSA-6798695	colchicine	CHEMBL.COMPOUND:CHEMBL107	0.007541
Neutrophil degranulation	REACT:R-HSA-6798695	primidone	CHEMBL.COMPOUND:CHEMBL856	0.007232
Neutrophil degranulation	REACT:R-HSA-6798695	pimozide	CHEMBL.COMPOUND:CHEMBL1423	0.006975
Neutrophil degranulation	REACT:R-HSA-6798695	imipramine	CHEMBL.COMPOUND:CHEMBL11	0.006973
Neutroph

Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	clozapine	CHEMBL.COMPOUND:CHEMBL42	0.011507
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	fluphenazine	CHEMBL.COMPOUND:CHEMBL726	0.011507
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	pimozide	CHEMBL.COMPOUND:CHEMBL1423	0.011488
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	perlapine	CHEMBL.COMPOUND:CHEMBL340801	0.010989
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	indoramin	CHEMBL.COMPOUND:CHEMBL279516	0.010971
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	prochlorperazine	CHEMBL.COMPOUND:CHEMBL728	0.010971
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	amoxapine	CHEMBL.COMPOUND:CHEMBL1113	0.010959
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	pirenzepine	CHEMBL.COMPOUND:CHEMBL9967	0.010959
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	olanzapine	CHEMBL.COMPOUND:CHEMBL715	0.010947
Toll Like Receptor 3 (TLR3) Cascade	REACT:R-HSA-168164	imipramine	CHEMBL.COMPOUN

In [9]:
#### Display the ranked results:
drug_dict = {"drug name":[],"id":[],"jaccard":[]}
for response_dict in response_dicts:
  for result in response_dict["result_list"]:
    drug_dict['drug name'].append(result["row_data"][2])
    drug_dict['id'].append(result["row_data"][3])
    drug_dict['jaccard'].append(float(result["row_data"][4]))
drug_df = pandas.DataFrame(drug_dict)
drug_df = drug_df.groupby(['drug name', 'id']).agg([numpy.average])['jaccard']['average']
with pandas.option_context('display.max_rows', None):
  print(drug_df.sort_values(ascending=False))

drug name                                 id                           
pimobendan                                CHEMBL.COMPOUND:CHEMBL24646      0.086957
levamisole                                CHEMBL.COMPOUND:CHEMBL1454       0.083333
rucaparib                                 CHEMBL.COMPOUND:CHEMBL1173055    0.076923
tacrine                                   CHEMBL.COMPOUND:CHEMBL95         0.064516
anakinra                                  CHEMBL.COMPOUND:CHEMBL1201570    0.053571
dolasetron                                CHEMBL.COMPOUND:CHEMBL2368925    0.049180
sulfaguanidine                            CHEMBL.COMPOUND:CHEMBL338802     0.049107
sulfanilamide                             CHEMBL.COMPOUND:CHEMBL21         0.048889
valdecoxib                                CHEMBL.COMPOUND:CHEMBL865        0.048673
chlorothiazide                            CHEMBL.COMPOUND:CHEMBL842        0.047009
interferon gamma-1b                       CHEMBL.COMPOUND:CHEMBL1201564    0.045455
asco