# Setup

Testing https://github.com/ranking-agent/strider/issues/173

In [1]:
import json
import requests
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [44]:
def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response.json())
        return {}
    return response.json()

def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message)
    print(response.status_code)
    return response.json()

def strider(message):
    url = 'https://strider.renci.org/query?log_level=DEBUG'
    strider_answer = post(strider,url,message)
    return strider_answer

def aragorn(message, coalesce_type='xnone'):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

def camkp(message):
    url = 'https://stars-app.renci.org/cam-kp/query'
    return post('cam',url,message)   

def refkg(message):
    url = 'https://trapi.monarchinitiative.org/query'
    return post('REF-KG',url,message)

def bte(message):
    url = 'https://api.bte.ncats.io/v1/query'
    return post(strider,url,message)

def coalesce(message,method='all'):
    url = 'https://answercoalesce.renci.org/coalesce/graph'
    return post('AC'+method,url,message)

def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/all',strider_answer)
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',coalesced_answer)
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',omni_answer)
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',weighted_answer)
    return strider_answer,coalesced_answer,omni_answer,weighted_answer,scored_answer

In [36]:
def print_errors(strider_result):
    errorcounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        if logmessage['level'] == 'ERROR':
            jm = json.loads(logmessage['message'])
            words = jm['error'].split()
            e = " ".join(words[:-5])
            errorcounts[e] += 1
    for error,count in errorcounts.items():
        print(f'{error} ({count} times)')
        
def print_queried_sources(strider_result):
    querycounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        if 'step' in logmessage and isinstance(logmessage['step'],list):
            for s in logmessage['step']:
                querycounts[s['url']] += 1
    for url,count in querycounts.items():
        print(f'{url} ({count} times)')
        
def print_query_for_source(strider_result,url):
    for logmessage in strider_result['logs']:
        if 'step' in logmessage and isinstance(logmessage['step'],list):
            for s in logmessage['step']:
                if s['url']==url:
                    print(s)

In [6]:
def retrieve_ars_results(mid):
    message_url = f'https://ars.transltr.io/ars/api/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    results = {}
    for child in j['children']:
        if child['actor']['agent'] in ['ara-aragorn', 'ara-aragorn-exp']:
            childmessage_id = child['message']
            child_url = f'https://ars.transltr.io/ars/api/messages/{childmessage_id}'
            child_response = requests.get(child_url).json()
            try:
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except:
                nresults=0
            print( child['status'], child['actor']['agent'],nresults )
    return results

In [7]:
def get_provenance(message):
    """Given a message with results, find the source of the edges"""
    prov = defaultdict(lambda: defaultdict(int)) # {qedge->{source->count}}
    results = message['message']['results']
    kg = message['message']['knowledge_graph']['edges']
    edge_bindings = [ r['edge_bindings'] for r in results ]
    for bindings in edge_bindings:
        for qg_e, kg_l in bindings.items():
            for kg_e in kg_l:
                for att in kg[kg_e['id']]['attributes']:
                    if att['name'] == 'provenance':
                        source = att['value']
                        prov[qg_e][source]+=1
    qg_edges = []
    sources = []
    counts = []
    for qg_e in prov:
        for source in prov[qg_e]:
            qg_edges.append(qg_e)
            sources.append(source)
            counts.append(prov[qg_e][source])
    prov_table = pd.DataFrame({"QG Edge":qg_edges, "Source":sources, "Count":counts})
    return prov_table

## Query Specific

In [10]:
query = {
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "name": "Glycerol",
                    "id": "CHEBI:17754"
                },
                "n2": {"category": "biolink:MolecularActivity"}
                },
            "edges": {
                "e0": {
                    "subject": "n2",
                    "object": "n0",
                    "predicate":"biolink:related_to"
                }
            }
        }
    }
}
printjson(query)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "name": "Glycerol",
                    "id": "CHEBI:17754"
                },
                "n2": {
                    "category": "biolink:MolecularActivity"
                }
            },
            "edges": {
                "e0": {
                    "subject": "n2",
                    "object": "n0",
                    "predicate": "biolink:related_to"
                }
            }
        }
    }
}


## Strider Direct

In [11]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 303 results in 0:00:34.636153.


### Provenance

In [12]:
prov = get_provenance(strider_result)
display(prov)

Unnamed: 0,QG Edge,Source,Count
0,e0,https://automat.renci.org/uberongraph/reasonerapi,4
1,e0,https://automat.renci.org/textminingkp/reasone...,286
2,e0,https://automat.renci.org/cord19-scigraph/reas...,17
3,e0,https://automat.renci.org/cord19-scibite/reaso...,1


We're now getting results (yay!) but still not from CAM-KP or REF-KG (boo!)

### Queried sources

In [34]:
print_queried_sources(strider_result)

https://stars-app.renci.org/cam-kp/query (17 times)
https://trapi.monarchinitiative.org/query (2 times)
https://automat.renci.org/covidkopkg/reasonerapi (6 times)
https://automat.renci.org/robokopkg/reasonerapi (6 times)
https://automat.renci.org/cord19-scibite/reasonerapi (2 times)
https://automat.renci.org/cord19-scigraph/reasonerapi (4 times)
https://automat.renci.org/textminingkp/reasonerapi (2 times)
https://automat.renci.org/uberongraph/reasonerapi (6 times)


### Errors

In [35]:
print_errors(strider_result)

Both cam-kp and monarch graph are being queried, but nothing returned.  Why?  Does our query return results from them if not via strider?  The new strider logs no longer show the exact TRAPI query sent, so this will be as far as we can go.

In [43]:
start = dt.now()
camkp_result = camkp(query)
end = dt.now()
print(f"CAM-KP produced {len(camkp_result['message']['results'])} results in {end-start}.")

CAM-KP produced 8 results in 0:00:08.972212.


In [45]:
start = dt.now()
refkg_result = refkg(query)
end = dt.now()
print(f"REF-KG produced {len(refkg_result['message']['results'])} results in {end-start}.")

REF-KG produced 2 results in 0:00:01.172926.


In [48]:
[r['node_bindings']['n2'] for r in camkp_result['message']['results']]

[[{'id': 'GO:0015254'}],
 [{'id': 'GO:0004370'}],
 [{'id': 'ENVO:02500000'}],
 [{'id': 'GO:0050479'}],
 [{'id': 'GO:0043136'}],
 [{'id': 'GO:0047372'}],
 [{'id': 'GO:0016411'}],
 [{'id': 'GO:0004630'}]]

In [50]:
[r['node_bindings']['n2'] for r in refkg_result['message']['results']]

[[{'id': 'GO:0015168'}], [{'id': 'GO:0015254'}]]