In [None]:
import requests
import json

response_query_graph = {}
response_kg_edges = {}
response_kg_nodes = {}
response_results = []
response_aux_graphs = {}
import orjson

In [None]:
AC_URL = "https://answercoalesce.renci.org/query"

In [234]:
def generate_trapi_query(input_node_type,output_node_type,input_curie,predicate,input_is_subject=True):
    envelope = {"message":{"query_graph": {"nodes":{"input":{},"output":{}}, "edges":{"edge_0":{}}}}}
    input_node = envelope["message"]["query_graph"]["nodes"]["input"]
    input_node["categories"] = [input_node_type]
    input_node["ids"] = [input_curie]
    output_node = envelope["message"]["query_graph"]["nodes"]["output"]
    output_node["categories"] = [output_node_type]
    query_edge = envelope["message"]["query_graph"]["edges"]["edge_0"]
    if input_is_subject:
        query_edge["subject"] = "output"
        query_edge["object"] = "input"
    else:
        query_edge["subject"] = "input"
        query_edge["object"] = "output"
    query_edge["predicates"]  = [ predicate ]
    query_edge["knowledge_type"] = "inferred"
    return envelope

In [216]:
def disease_to_drug(disease, params=None, print_query=False ):
    input_type = "biolink:Disease"
    output_type = "biolink:Drug"
    predicate = "biolink:treats"
    input_is_subject = "False"
    query = generate_trapi_query( input_type, output_type, disease, predicate, input_is_subject)
    if params:
        query.update(params)
    if print_query:
        print(json.dumps(query,indent=2))
    response = requests.post( AC_URL, json = query )
    print(response.status_code)
    return response.json()


In [217]:
def disease_to_gene(disease, params=None, print_query=False ):
    input_type = "biolink:Disease"
    output_type = "biolink:Gene"
    predicate = "biolink:genetically_associated_with"
    input_is_subject = "False"
    query = generate_trapi_query( input_type, output_type, disease, predicate, input_is_subject)
    if params:
        query.update(params)
    if print_query:
        print(json.dumps(query,indent=2))
    response = requests.post( AC_URL, json = query )
    print(response.status_code)
    return response.json()


In [218]:
def phenotype_to_gene(phenotype, params=None, print_query=False ):
    input_type = "biolink:PhenotypicFeature"
    output_type = "biolink:Gene"
    predicate = "biolink:affects"
    input_is_subject = "False"
    query = generate_trapi_query( input_type, output_type, phenotype, predicate, input_is_subject)
    if params:
        query.update(params)
    if print_query:
        print(json.dumps(query,indent=2))
    response = requests.post( AC_URL, json = query )
    print(response.status_code)
    return response.json()


In [219]:
def gene_to_phenotype(gene, params=None, print_query=False ):
    input_type = "biolink:Gene"
    output_type = "biolink:PhenotypicFeature"
    predicate = "biolink:has_phenotype"
    input_is_subject = "False"
    query = generate_trapi_query( input_type, output_type, gene, predicate, input_is_subject)
    if params:
        query.update(params)
    if print_query:
        print(json.dumps(query,indent=2))
    response = requests.post( AC_URL, json = query )
    print(response.status_code)
    return response.json()


In [237]:
def disease_to_phenotype(disease, params=None, print_query=False ):
    input_type = "biolink:Disease"
    output_type = "biolink:PhenotypicFeature"
    predicate = "biolink:has_phenotype"
    input_is_subject = "True"
    query = generate_trapi_query( input_type, output_type, disease, predicate, input_is_subject)
    if params:
        query.update(params)
    if print_query:
        print(json.dumps(query,indent=2))
    response = requests.post( AC_URL, json = query )
    print(response.status_code)
    return response.json()


In [221]:
def print_results(resp):
    global response_query_graph, response_aux_graphs, response_results, response_kg_edges, response_kg_nodes 
    response_query_graph = resp["message"]["query_graph"]
    response_aux_graphs = resp["message"]["auxiliary_graphs"]
    response_results = resp["message"]["results"]
    response_kg_edges = resp["message"]["knowledge_graph"]["edges"]
    response_kg_nodes = resp["message"]["knowledge_graph"]["nodes"]
    
    for i, result in enumerate(resp["message"]["results"]):
        nb = result["node_bindings"]["output"][0]["id"]
        name = resp["message"]["knowledge_graph"]["nodes"][nb]["name"]
        print(f"{i} | {nb} | {name}") 

In [222]:
# Now let's explore the path for any of the results
def print_result_support_graphs(i, answerset):
    #Let's see the inferred_edge in the KG then extract it support graphs
    for eid, edge in response_results[i]["analyses"][0]["edge_bindings"].items():
        inferred_edge = edge[0]["id"]
        support_graphs = [attributes["value"] for attributes in response_kg_edges[inferred_edge]["attributes"] if attributes["attribute_type_id"] == "biolink:support_graphs"]
        print(f'{inferred_edge} has {len(support_graphs)} evidential Paths')
        print("=============")
        for i, sg in enumerate(support_graphs):
            print(f"{i} | {sg}")
        print()
        print("**e_Inferred.....: graph/edge enriched paths")
        print("**n_Inferred.....: node/property enriched paths")
        return support_graphs

In [223]:
def explore_one_support_graph(i, support_graphs):
    # Tracing the support graph to the auxiliary components
    aux_graph_edges = response_aux_graphs.get(support_graphs[i]).get("edges")
    print(f'{support_graphs[i]} has ---> {len(aux_graph_edges)} edges')
    print("=============")
    for i, aux_edge in enumerate(aux_graph_edges):
        print(i, aux_edge)
    
    print()
    print("**Each Inferred edge support graph's aux_graph has 3 edges**")
    print("- Inferred_node to the enrichment")
    print("- Enrichment to the Groupset (lookup_list)")
    print("- The Groupset to the qg Curie")
    
    inf2enrichment_aux_graph_edge = ''
    enrich2group_aux_graph_edge = ''
    group2curie_aux_graph_edge = ''
    for aedge in aux_graph_edges:
        support = [attributes["value"] for attributes in response_kg_edges[aedge]["attributes"]]
        if support:
            if isinstance(support[0], list):
                enrich2group_aux_graph_edge = aedge
            else:
                group2curie_aux_graph_edge = aedge
        else:
            inf2enrichment_aux_graph_edge = aedge
    return inf2enrichment_aux_graph_edge, enrich2group_aux_graph_edge, group2curie_aux_graph_edge

In [224]:
def explore_one_support_graphs_first_edge(inf2enrichment_aux_graph_edge):
    # The first edge: 
    inf2enrichmentedge = response_kg_edges[inf2enrichment_aux_graph_edge]
    
    print(f"{response_kg_nodes[inf2enrichmentedge['subject']]['name']} -({inf2enrichmentedge['predicate']})-> {response_kg_nodes[inf2enrichmentedge['object']]['name']}")
    print()
    print(f"The first one: {inf2enrichmentedge}")
    
    print()
    print("***inferred_node -(rel)- enrichment usually has no support graph so we stop digging***")

In [225]:
def explore_one_support_graphs_second_edge(enrich2group_aux_graph_edge):
    print(" This is the enrichment - (rel) - lookup_list. The Major AnswerCoalesce Module. It has many support graph, so we dig further ")
    print("=========")
    enrichment2group_edge = response_kg_edges[enrich2group_aux_graph_edge]
    print(enrichment2group_edge)
    print()
    print(f"{response_kg_nodes[enrichment2group_edge['subject']]['name']} -({enrichment2group_edge['predicate']})-> {response_kg_nodes[enrichment2group_edge['object']]['name']}")
    print()
    for attributes in enrichment2group_edge["attributes"]:
        enrichment2group_support_graphs = attributes["value"]
        # Each of these exists in the auxiliary graph
        for i, e2group_sp in enumerate(enrichment2group_support_graphs):
            e2group_edges = response_aux_graphs[e2group_sp]["edges"]
            theedges0 = response_kg_edges[e2group_edges[0]]
            theedges1 = response_kg_edges[e2group_edges[1]]
            if "qualifiers" in theedges0:
                qc = '_'.join([q["qualifier_value"] for q in theedges0["qualifiers"]])
                predicate0 = f"{theedges0["predicate"]}__{qc}"
            else:
                predicate0 = theedges0["predicate"]
                
            if "qualifiers" in theedges1:
                qc = '_'.join([q["qualifier_value"] for q in theedges1["qualifiers"]])
                predicate1 = f"{theedges1["predicate"]}__{qc}"
            else:
                predicate1 = theedges1["predicate"]
            edge0pval = [attr["value"] for attr in theedges0["attributes"] if attr["attribute_type_id"] == "biolink:p_value"]
            edge1pval = [attr["value"] for attr in theedges1["attributes"] if attr["attribute_type_id"] == "biolink:p_value"]
            if edge0pval:
                pvalue = edge0pval[0]
            else:
                pvalue = edge1pval[0]
            print(f'Path{i}: {response_kg_nodes[theedges0["subject"]]["name"]}--({predicate0})-->{response_kg_nodes[theedges0["object"]]["name"]} ^ {response_kg_nodes[theedges1["subject"]]["name"]}--({predicate1})-->{response_kg_nodes[theedges1["object"]]["name"]} | {pvalue}')
            print()
# 4 members of set uuid affects `NCBIGene:83817`         

In [226]:
def explore_one_support_graphs_third_edge(group2curie_aux_graph_edge):
    print("The Groupset- qg Curie edge has support graph of members of the group, so we stop digging")
    print("=========")
    
    print(response_kg_edges[group2curie_aux_graph_edge])
    print()
    print(f"{group2curie_aux_graph_edge}:===>")
    for attributes in response_kg_edges[group2curie_aux_graph_edge]["attributes"]:
        sgs = attributes['value']
        sgedges = sorted(response_aux_graphs[sgs]["edges"])
        j = 0; path = 1
        while j < len(sgedges)-1:
            p1 = response_kg_edges[sgedges[j]]
            p2 = response_kg_edges[sgedges[j+1]]
            print(f'Path{path}: {response_kg_nodes[p2["subject"]]["name"]}-({p2["predicate"]})->{response_kg_nodes[p2["object"]]["name"]} ^ {response_kg_nodes[p1["subject"]]["name"]}-({p1["predicate"]})->{response_kg_nodes[p1["object"]]["name"]}')
            j+=2
            path+=1


In [227]:
disease = "MONDO:0004975" #Alzheimers
p_value = 1e-5; result_length = 100; predicates_to_exclude = [ "biolink:causes", "biolink:biomarker_for", "biolink:contraindicated_for", "biolink:contraindicated_in", "biolink:contributes_to", "biolink:has_adverse_event", "biolink:causes_adverse_event", "biolink:treats_or_applied_or_studied_to_treat" ]
params = {"parameters": {"pvalue_threshold": p_value, "result_length": result_length, "predicates_to_exclude": predicates_to_exclude}}

disease_to_drug_response = disease_to_drug(disease, params, print_query=False)
print_results(disease_to_drug_response)

{'message': {'query_graph': {'nodes': {'input': {'categories': ['biolink:Disease'], 'ids': ['MONDO:0004975']}, 'output': {'categories': ['biolink:Drug']}}, 'edges': {'edge_0': {'subject': 'output', 'object': 'input', 'predicates': ['biolink:treats'], 'knowledge_type': 'inferred'}}}}}
200
0 | CHEBI:27953 | physostigmine
1 | CHEBI:553827 | bambuterol
2 | CHEBI:93248 | (6R,7R)-7-[[2-(2-amino-4-thiazolyl)-2-(carboxymethoxyimino)-1-oxoethyl]amino]-3-ethenyl-8-oxo-5-thia-1-azabicyclo[4.2.0]oct-2-ene-2-carboxylic acid
3 | CHEBI:4754 | econazole
4 | CHEBI:16523 | D-serine
5 | CHEBI:134709 | pitolisant
6 | CHEBI:15854 | quinine
7 | CHEBI:474053 | cefazolin
8 | CHEBI:9150 | simvastatin
9 | CHEBI:3510 | ceftibuten
10 | CHEBI:3480 | cefamandole
11 | CHEBI:49575 | diazepam
12 | CHEBI:31981 | periciazine
13 | CHEBI:45713 | trans-resveratrol
14 | CHEBI:40303 | lovastatin
15 | CHEBI:3611 | chlordiazepoxide
16 | CHEBI:5775 | hydralazine
17 | CHEBI:7565 | nifedipine
18 | CHEBI:6923 | miconazole
19 | CHE

# Let's trace one result

In [228]:
index = 1
support_graphs = print_result_support_graphs(index, disease_to_drug_response)

CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975 has 7 evidential Paths
0 | n_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:has_chemical_role_CHEBI_ROLE_neurotransmitter_agent
1 | e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_NCBIGene:43
2 | e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_NCBIGene:590
3 | e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_NCBIGene:65036
4 | e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_UniProtKB:P81908
5 | n_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:has_chemical_role_CHEBI_ROLE_EC_3.1.1_carboxylic_ester_hydrolase_inhibitor
6 | e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_NCBIGene:8381

In [229]:
index = 1
inf2enrichment_aux_graph_edge, enrich2group_aux_graph_edge, group2curie_aux_graph_edge = explore_one_support_graph(index, support_graphs)

e_Inferred_SG:_CHEBI:553827_Inferred_to_biolink:treats_MONDO:0004975_via_CHEBI:553827_biolink:affects_NCBIGene:43 has ---> 3 edges
0 CHEBI:553827_biolink:affects_NCBIGene:43
1 e_NCBIGene:43_biolink:binds_uuid:1
2 uuid:1_biolink:treats_MONDO:0004975

**Each Inferred edge support graph's aux_graph has 3 edges**
- Inferred_node to the enrichment
- Enrichment to the Groupset (lookup_list)
- The Groupset to the qg Curie


In [230]:
explore_one_support_graphs_first_edge(inf2enrichment_aux_graph_edge)

bambuterol -(biolink:affects)-> ACHE

The first one: {'subject': 'CHEBI:553827', 'object': 'NCBIGene:43', 'predicate': 'biolink:affects', 'sources': [{'resource_id': 'infores:drugcentral', 'resource_role': 'primary_knowledge_source'}], 'qualifiers': [{'qualifier_type_id': 'biolink:object_direction_qualifier', 'qualifier_value': 'decreased'}, {'qualifier_type_id': 'biolink:object_aspect_qualifier', 'qualifier_value': 'activity'}], 'attributes': []}

***inferred_node -(rel)- enrichment usually has no support graph so we stop digging***


In [231]:
explore_one_support_graphs_second_edge(enrich2group_aux_graph_edge)

 This is the enrichment - (rel) - lookup_list. The Major AnswerCoalesce Module. It has many support graph, so we dig further 
{'subject': 'NCBIGene:43', 'object': 'uuid:1', 'predicate': 'biolink:binds', 'sources': [{'resource_id': 'infores:answercoalesce', 'resource_role': 'primary_knowledge_source'}], 'qualifiers': [], 'attributes': [{'attribute_type_id': 'biolink:support_graphs', 'value': ['SG:_e_NCBIGene:43_biolink:binds_CHEBI:45980', 'SG:_e_NCBIGene:43_biolink:binds_CHEBI:42944', 'SG:_e_NCBIGene:43_biolink:binds_CHEBI:53289', 'SG:_e_NCBIGene:43_biolink:binds_CHEBI:8874'], 'attribute_source': 'infores:answercoalesce'}]}

ACHE -(biolink:binds)-> uuid:1

Path0: tacrine--(biolink:member_of)-->uuid:1 ^ ACHE--(biolink:binds)-->tacrine | 2.0767894563400368e-06

Path1: galanthamine--(biolink:member_of)-->uuid:1 ^ ACHE--(biolink:binds)-->galanthamine | 2.0767894563400368e-06

Path2: ACHE--(biolink:binds)-->donepezil ^ donepezil--(biolink:member_of)-->uuid:1 | 2.0767894563400368e-06

Path3: 

In [232]:
explore_one_support_graphs_third_edge(group2curie_aux_graph_edge)

The Groupset- qg Curie edge has support graph of members of the group, so we stop digging
{'subject': 'uuid:1', 'object': 'MONDO:0004975', 'predicate': 'biolink:treats', 'sources': [{'resource_id': 'infores:answercoalesce', 'resource_role': 'primary_knowledge_source'}], 'attributes': [{'attribute_type_id': 'biolink:support_graphs', 'value': 'SG:_uuid:1_biolink:treats_MONDO:0004975', 'attribute_source': 'infores:answercoalesce'}]}

uuid:1_biolink:treats_MONDO:0004975:===>
Path1: acetylcholine-(biolink:member_of)->uuid:1 ^ acetylcholine-(biolink:treats)->Alzheimer disease
Path2: benzatropine-(biolink:member_of)->uuid:1 ^ benzatropine-(biolink:treats)->Alzheimer disease
Path3: galanthamine-(biolink:member_of)->uuid:1 ^ galanthamine-(biolink:treats)->Alzheimer disease
Path4: tacrine-(biolink:member_of)->uuid:1 ^ tacrine-(biolink:treats)->Alzheimer disease
Path5: donepezil-(biolink:member_of)->uuid:1 ^ donepezil-(biolink:treats)->Alzheimer disease
Path6: haloperidol-(biolink:member_of)->uui

### disease_to_gene

In [None]:
disease = "DOID:0050430" # multiple endocrine neoplasia type 2A 
params = {"parameters": {"pvalue_threshold": 1e-5, "result_length": 100, "predicates_to_exclude": []}}
disease_to_gene_response = disease_to_gene(disease, params, print_query=False)
print_results(disease_to_gene_response)

In [None]:
# Let's trace one result
index = 1
support_graphs = print_result_support_graphs(index, disease_to_drug_response)

In [None]:
index = 1
inf2enrichment_aux_graph_edge, enrich2group_aux_graph_edge, group2curie_aux_graph_edge = explore_one_support_graph(
    index, support_graphs)

In [None]:
explore_one_support_graphs_first_edge(inf2enrichment_aux_graph_edge)
explore_one_support_graphs_second_edge(enrich2group_aux_graph_edge)
explore_one_support_graphs_third_edge(group2curie_aux_graph_edge)


# phenotype_to_gene

In [None]:
phenotype = "HP:0003637" # Reduced circulating 4-Hydroxyphenylpyruvate dioxygenase activity
params = {"parameters": {"pvalue_threshold": 1e-5, "result_length": 100, "predicates_to_exclude": []}}
phenotype_to_gene_response = phenotype_to_gene(phenotype, params, print_query=False)
print_results(phenotype_to_gene_response)

# gene_to_phenotype

In [None]:
gene = "NCBIGene:122481" #"AK7"
params = {"parameters": {"pvalue_threshold": 1e-5, "result_length": 100, "predicates_to_exclude": []}}
gene_to_phenotype_response = gene_to_phenotype(gene, params=params, print_query=False )
print_results(gene_to_phenotype_response)

# disease_to_phenotype

In [None]:
disease = "MONDO:0005147" # colonic neoplasm
params = {"parameters": {"pvalue_threshold": 1e-5, "result_length": 100, "predicates_to_exclude": []}}
disease_to_phenotype_response = disease_to_phenotype(disease, params=params, print_query=False )
print_results(disease_to_phenotype_response)