# Setup

In [1]:
import json
import requests
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [3]:
def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response.json())
        return {}
    return response.json()

def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message)
    print(response.status_code)
    return response.json()

def strider(message):
    url = 'https://strider.renci.org/query?log_level=DEBUG'
    strider_answer = post(strider,url,message)
    return strider_answer

def aragorn(message, coalesce_type='xnone'):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

##

def bte(message):
    url = 'https://api.bte.ncats.io/v1/query'
    return post(strider,url,message)
 
def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/graph',{'message':strider_answer})
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',{'message': coalesced_answer})
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',{'message': omni_answer})
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',{'message': weighted_answer})
    return scored_answer

In [4]:
def retrieve_ars_results(mid):
    message_url = f'https://ars.transltr.io/ars/api/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    results = {}
    for child in j['children']:
        if child['actor']['agent'] in ['ara-aragorn', 'ara-aragorn-exp']:
            childmessage_id = child['message']
            child_url = f'https://ars.transltr.io/ars/api/messages/{childmessage_id}'
            child_response = requests.get(child_url).json()
            try:
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except:
                nresults=0
            print( child['status'], child['actor']['agent'],nresults )
    return results

In [5]:
def get_provenance(message):
    """Given a message with results, find the source of the edges"""
    prov = defaultdict(lambda: defaultdict(int)) # {qedge->{source->count}}
    results = message['message']['results']
    kg = message['message']['knowledge_graph']['edges']
    edge_bindings = [ r['edge_bindings'] for r in results ]
    for bindings in edge_bindings:
        for qg_e, kg_l in bindings.items():
            for kg_e in kg_l:
                for att in kg[kg_e['id']]['attributes']:
                    if att['name'] == 'provenance':
                        source = att['value']
                        prov[qg_e][source]+=1
    qg_edges = []
    sources = []
    counts = []
    for qg_e in prov:
        for source in prov[qg_e]:
            qg_edges.append(qg_e)
            sources.append(source)
            counts.append(prov[qg_e][source])
    prov_table = pd.DataFrame({"QG Edge":qg_edges, "Source":sources, "Count":counts})
    return prov_table

## Query Specific

In [8]:
query = {'message':{'query_graph':{
  "nodes": {
    "n0": {
      "categories": [
        "biolink:PhenotypicFeature"
      ],
      "ids": ["HP:0100543"]
    },
    "n1": {
      "ids": ["NCBIGene:351"],
      "categories":[ "biolink:Gene"]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1",
      "predicate": [
        "biolink:related_to"
      ]
    }
  }
}}}

## Strider Direct

In [9]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 1 results in 0:00:12.924755.


In [13]:
edges = strider_result["message"]["knowledge_graph"]["edges"]
nodes = strider_result["message"]["knowledge_graph"]["nodes"]
len(edges)

29

In [14]:
for nid, node in nodes.items():
    print(nid, node['name'])

NCBIGene:351 APP
MONDO:0010857 Semantic dementia
HP:0100543 Cognitive impairment
MONDO:0001627 dementia
MONDO:0001152 amnestic disorder
MONDO:0008119 spinocerebellar ataxia type 1
HP:0001289 Confusion
MONDO:0004975 Alzheimer disease
MONDO:0004648 vascular dementia
MONDO:0007088 Alzheimer disease type 1


In [26]:
subjects=[]
objects = []
predicates=[]
pks_s = []
aggs_s = []
for eid, edge in edges.items():
    if edge["subject"] not in ["NCBIGene:351", "HP:0100543"]:
        continue
    if edge["object"] not in ["NCBIGene:351", "HP:0100543"]:
        continue
    aggs = []
    for source in edge["sources"]:
        if source["resource_role"] == "primary_knowledge_source":
            pks = source["resource_id"]
        else:
            aggs.append(source["resource_id"])
    aggs.remove('infores:aragorn')
    subjects.append(edge["subject"])
    objects.append(edge["object"])
    predicates.append(edge["predicate"])
    pks_s.append(pks)
    aggs_s.append(aggs)
    #print(edge["subject"], edge["object"], edge["predicate"], pks, aggs )
df = pd.DataFrame( {"subject": subjects, "predicate": predicates, "object": objects, "PKS": pks_s, "AKS": aggs_s} )

In [31]:
df.sort_values(by="PKS")

Unnamed: 0,subject,predicate,object,PKS,AKS
3,NCBIGene:351,biolink:genetically_associated_with,HP:0100543,infores:ctd,"[infores:pharos, infores:automat-robokop]"
5,NCBIGene:351,biolink:gene_associated_with_condition,HP:0100543,infores:diseases,[infores:rtx-kg2]
2,NCBIGene:351,biolink:genetically_associated_with,HP:0100543,infores:disgenet,"[infores:pharos, infores:automat-robokop]"
6,NCBIGene:351,biolink:gene_associated_with_condition,HP:0100543,infores:disgenet,[infores:rtx-kg2]
4,HP:0100543,biolink:has_participant,NCBIGene:351,infores:hpo,[infores:molepro]
0,NCBIGene:351,biolink:has_phenotype,HP:0100543,infores:hpo-annotations,"[infores:automat-robokop, infores:monarchiniti..."
1,NCBIGene:351,biolink:genetically_associated_with,HP:0100543,infores:monarchinitiative,"[infores:pharos, infores:automat-robokop]"
7,NCBIGene:351,biolink:affects,HP:0100543,infores:semmeddb,[infores:rtx-kg2]
8,NCBIGene:351,biolink:affects,HP:0100543,infores:semmeddb,[infores:rtx-kg2]
9,NCBIGene:351,biolink:associated_with,HP:0100543,infores:semmeddb,[infores:rtx-kg2]


In [32]:
query = {'message':{'query_graph':{
  "nodes": {
    "n0": {
      "ids": ["GO:0016310"]
    },
    "n1": {
      "ids": ["NCBIGene:351"],
      "categories":[ "biolink:Gene"]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1",
      "predicate": [
        "biolink:related_to"
      ]
    }
  }
}}}

In [33]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 1 results in 0:00:16.748422.


In [34]:
edges = strider_result["message"]["knowledge_graph"]["edges"]
nodes = strider_result["message"]["knowledge_graph"]["nodes"]
len(edges)

18

In [35]:
for nid, node in nodes.items():
    print(nid, node['name'])

NCBIGene:351 A4_HUMAN Amyloid-beta precursor protein (sprot)
GO:0006468 protein phosphorylation
GO:0016310 phosphorylation


In [38]:
subjects=[]
objects = []
predicates=[]
pks_s = []
aggs_s = []
for eid, edge in edges.items():
    if edge["subject"] not in ["NCBIGene:351", "GO:0016310"]:
        continue
    if edge["object"] not in ["NCBIGene:351", "GO:0016310"]:
        continue
    aggs = []
    for source in edge["sources"]:
        if source["resource_role"] == "primary_knowledge_source":
            pks = source["resource_id"]
        else:
            aggs.append(source["resource_id"])
    aggs.remove('infores:aragorn')
    subjects.append(edge["subject"])
    objects.append(edge["object"])
    predicates.append(edge["predicate"])
    pks_s.append(pks)
    aggs_s.append(aggs)
    #print(edge["subject"], edge["object"], edge["predicate"], pks, aggs )
df = pd.DataFrame( {"subject": subjects, "predicate": predicates, "object": objects, "PKS": pks_s, "AKS": aggs_s} )

In [39]:
df

Unnamed: 0,subject,predicate,object,PKS,AKS
0,NCBIGene:351,biolink:actively_involved_in,GO:0016310,infores:goa,"[infores:automat-robokop, infores:automat-huma..."
1,NCBIGene:351,biolink:related_to,GO:0016310,infores:ensembl-gene,[infores:rtx-kg2]
2,NCBIGene:351,biolink:actively_involved_in,GO:0016310,infores:hetionet,[infores:automat-hetionet]
3,NCBIGene:351,biolink:acts_upstream_of_positive_effect,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
4,NCBIGene:351,biolink:acts_upstream_of_or_within_negative_ef...,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
5,NCBIGene:351,biolink:causes,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
6,GO:0016310,biolink:has_participant,NCBIGene:351,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
7,NCBIGene:351,biolink:regulates,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
8,NCBIGene:351,biolink:acts_upstream_of_or_within_positive_ef...,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"
9,NCBIGene:351,biolink:acts_upstream_of_negative_effect,GO:0016310,infores:ctd,"[infores:automat-cam-kp, infores:cam-kp]"


In [40]:
query = {'message':{'query_graph':{
  "nodes": {
    "n0": {
      "ids": ["HP:0100639"]
    },
    "n1": {
      "ids": ["MONDO:0019065"]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1",
      "predicate": [
        "biolink:related_to"
      ]
    }
  }
}}}

In [41]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 2 results in 0:00:11.962492.


In [43]:
for eid, edge in strider_result["message"]["knowledge_graph"]["edges"].items():
    printjson(edge)

{
    "subject": "MONDO:0019065",
    "object": "HP:0100639",
    "predicate": "biolink:has_phenotype",
    "sources": [
        {
            "resource_id": "infores:automat-robokop",
            "resource_role": "aggregator_knowledge_source",
            "upstream_resource_ids": [
                "infores:monarchinitiative"
            ]
        },
        {
            "resource_id": "infores:monarchinitiative",
            "resource_role": "aggregator_knowledge_source",
            "upstream_resource_ids": [
                "infores:hpo-annotations"
            ]
        },
        {
            "resource_id": "infores:aragorn",
            "resource_role": "aggregator_knowledge_source",
            "upstream_resource_ids": [
                "infores:automat-robokop"
            ]
        },
        {
            "resource_id": "infores:hpo-annotations",
            "resource_role": "primary_knowledge_source"
        }
    ],
    "qualifiers": [
        {
            "qualifier_typ