In [1]:
import logging
import sys
from pprint import pprint

import httpx


sys.path.append(
    "/Users/cwu/prj2/bte-py"
)  # TODO: set correct path to bte-py project, so we can import its modules.

from biothings_explorer.call_apis_v2.query import SmartAPI
from biothings_explorer.call_apis_v2.query_validator import InvalidQuery

logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
# logging.basicConfig(filename="myapp.log", level=logging.INFO, filemode="w")
logger = logging.getLogger(__name__)

In [2]:
smart_api = SmartAPI(
    "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/mygene.info/openapi_full.yml"
)

# smart_api = SmartAPI("https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/semmeddb/smartapi.yaml")
# smart_api = SmartAPI("https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/dgidb/openapi.yml")
# smart_api = SmartAPI("https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/biothings_foodb/smartapi.yaml")
# smart_api = SmartAPI("https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/ddinter/ddinter.yaml")
# smart_api = SmartAPI("https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/pfocr/smartapi.yaml")

# smart_api = SmartAPI("http://www.smart-api.info/api/metadata/e8818d98f59fb98ad5ba6e4637b5dec4")

mkg_edges = smart_api.list_metakg()

In [3]:
no_of_nodes = len(
    set([edge["subject"] for edge in mkg_edges])
    | set([edge["object"] for edge in mkg_edges])
)
no_of_predicates = len(set([edge["predicate"] for edge in mkg_edges]))
print(f"Number of MetaKG nodes: {no_of_nodes}")
print(f"Number of MetaKG predicates: {no_of_predicates}")
print(f"Number of MetaKG edges: {len(mkg_edges)}")

Number of MetaKG nodes: 6
Number of MetaKG predicates: 8
Number of MetaKG edges: 18


In [4]:
test_mkgedge = mkg_edges[0]
pprint(test_mkgedge)

{'bte': {'query_operation': {'method': 'post',
                             'params': {'fields': 'entrezgene',
                                        'size': 1000,
                                        'species': 'human'},
                             'path': '/query',
                             'request_body': {'body': {'q': '{{ queryInputs }}',
                                                       'scopes': 'pathway.reactome.id'}},
                             'server': 'https://mygene.info/v3',
                             'support_batch': True},
         'response_mapping': {'has_participant': {'NCBIGene': 'entrezgene'}}},
 'object': 'Gene',
 'predicate': 'has_participant',
 'subject': 'Pathway'}


In [5]:
test_ids = ["R-HSA-69306", "R-HSA-6798695"]
res = smart_api.get_edge(test_mkgedge, test_ids[0])
cnt = 0
ids_test_edge = set()
for edge in res:
    pprint(edge)
    cnt += 1
    ids_test_edge.add(edge["object"]["NCBIGene"])
print(f"Number of edges retrieved: {cnt}")

{'object': {'NCBIGene': '23633', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '51659', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '64785', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '9837', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '84296', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '81620', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-69306', 'type': 'Pathway'}}
{'object': {'NCBIGene': '8318', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': '

In [6]:
# Let's verify the results by call APIs directly
url = f"https://mygene.info/v3/query?q=pathway.reactome.id:{test_ids[0]}&species=human&fields=entrezgene&size=500"
resp = httpx.get(url).json()
ids_mygene = set([x["entrezgene"] for x in resp["hits"]])
len(ids_mygene), len(ids_test_edge), ids_mygene == ids_test_edge

(186, 186, True)

In [7]:
res = smart_api.get_edges(test_mkgedge, test_ids)
cnt = 0
ids_test_edge = set()
for edge in res:
    pprint(edge)
    cnt += 1
    ids_test_edge.add(edge["object"]["NCBIGene"])
print(f"Number of edges retrieved: {cnt}")

{'object': {'NCBIGene': '10043', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '11322', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '10321', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '150372', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '10394', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '10562', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subject': {'_id': 'R-HSA-6798695', 'type': 'Pathway'}}
{'object': {'NCBIGene': '5273', 'type': 'Gene'},
 'predicate': {'type': 'has_participant'},
 'subje

In [8]:
# Let's verify the results by call APIs directly
url = "https://mygene.info/v3/query"
params = {
    "q": test_ids,
    "scopes": "pathway.reactome.id",
    "species": "human",
    "fields": "entrezgene",
    "size": 1000,
}
resp = httpx.post(url, json=params).json()
ids_mygene = set([x["entrezgene"] for x in resp])
len(ids_mygene), len(ids_test_edge), ids_mygene == ids_test_edge

(650, 650, True)