# Setup

In [1]:
import json
import requests
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response.json())
        return {}
    return response.json()

def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message)
    print(response.status_code)
    return response.json()

def strider(message):
    url = 'https://strider.renci.org/query?log_level=DEBUG'
    strider_answer = post(strider,url,message)
    return strider_answer

def aragorn(message, coalesce_type='xnone'):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

##

def ontology(message):
    url = 'https://stars-app.renci.org/sparql-kp/query'
    return post('ontology',url,message)

def bte(message):
    url = 'https://api.bte.ncats.io/v1/query'
    return post(strider,url,message)

def refkg(message):
    url = 'https://trapi.monarchinitiative.org/query'
    #url='https://monarch-sandbox.cgrb.oregonstate.edu/query'
    return post('ref kg',url,message)

def camkp(message):
    url = 'https://stars-app.renci.org/cam-kp/query'
    return post('cam kp',url,message)
 
def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/graph',{'message':strider_answer})
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',{'message': coalesced_answer})
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',{'message': omni_answer})
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',{'message': weighted_answer})
    return scored_answer

In [5]:
def retrieve_ars_results(mid):
    message_url = f'https://ars.transltr.io/ars/api/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    results = {}
    for child in j['children']:
        if child['actor']['agent'] in ['ara-aragorn', 'ara-aragorn-exp']:
            childmessage_id = child['message']
            child_url = f'https://ars.transltr.io/ars/api/messages/{childmessage_id}'
            child_response = requests.get(child_url).json()
            try:
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except:
                nresults=0
            print( child['status'], child['actor']['agent'],nresults )
    return results

In [6]:
def get_provenance(message):
    """Given a message with results, find the source of the edges"""
    prov = defaultdict(lambda: defaultdict(int)) # {qedge->{source->count}}
    results = message['message']['results']
    kg = message['message']['knowledge_graph']['edges']
    edge_bindings = [ r['edge_bindings'] for r in results ]
    for bindings in edge_bindings:
        for qg_e, kg_l in bindings.items():
            for kg_e in kg_l:
                for att in kg[kg_e['id']]['attributes']:
                    if att['name'] == 'provenance':
                        source = att['value']
                        prov[qg_e][source]+=1
    qg_edges = []
    sources = []
    counts = []
    for qg_e in prov:
        for source in prov[qg_e]:
            qg_edges.append(qg_e)
            sources.append(source)
            counts.append(prov[qg_e][source])
    prov_table = pd.DataFrame({"QG Edge":qg_edges, "Source":sources, "Count":counts})
    return prov_table

## Query Specific

In [16]:
qg = {
  "nodes": {
    "n0": {
      "name": "Glycerol",
      "id": "CHEBI:17754"
    },
    "n1": {
      "category": "biolink:GeneOrGeneProduct"
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1"
    }
  }
}

query = {'message':{'query_graph':qg}}

In [20]:
printjson(query)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "name": "Glycerol",
                    "id": "CHEBI:17754"
                },
                "n1": {
                    "category": "biolink:GeneOrGeneProduct"
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1"
                }
            }
        }
    }
}


In [17]:
camres = camkp(query)

In [18]:
camres

{'message': {'query_graph': {'nodes': {'n0': {'id': 'CHEBI:17754'},
    'n1': {'category': 'biolink:GeneOrGeneProduct'}},
   'edges': {'e0': {'subject': 'n0',
     'object': 'n1',
     'predicate': 'biolink:related_to'}}},
  'knowledge_graph': {'nodes': {}, 'edges': {}},
  'results': []},
 'status': 'Success'}

## Strider Direct

In [8]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

<function strider at 0x7fece03079d0> error: 504


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [21]:
qg = {
  "nodes": {
    "n0": {
      "id": "CHEBI:17754"
    },
    "n1": {
      "category":
        "biolink:BiologicalProcessOrActivity"
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1",
      "predicate":"biolink:related_to"
    }
  }
}

query = {'message':{'query_graph':qg},"knowledge_graph": {
            "nodes": {},
            "edges": {}
        },
        "results": []}

In [22]:
printjson(query)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "id": "CHEBI:17754"
                },
                "n1": {
                    "category": "biolink:BiologicalProcessOrActivity"
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1",
                    "predicate": "biolink:related_to"
                }
            }
        }
    },
    "knowledge_graph": {
        "nodes": {},
        "edges": {}
    },
    "results": []
}


In [23]:
ores = ontology(query)

In [24]:
ores

{'message': {'query_graph': {'nodes': {'n0': {'id': 'CHEBI:17754'},
    'n1': {'category': 'biolink:BiologicalProcessOrActivity'}},
   'edges': {'e0': {'predicate': 'biolink:related_to',
     'subject': 'n0',
     'object': 'n1'}}},
  'knowledge_graph': {'nodes': {}, 'edges': {}},
  'results': []}}

In [32]:
refkg(query)

{'message': {'query_graph': {'nodes': {'n0': {'id': 'CHEBI:17754',
     'category': None,
     'is_set': False},
    'n1': {'id': None,
     'category': 'biolink:BiologicalProcessOrActivity',
     'is_set': False}},
   'edges': {'e0': {'subject': 'n0',
     'object': 'n1',
     'predicate': 'biolink:related_to',
     'relation': None}}},
  'knowledge_graph': {'nodes': {}, 'edges': {}},
  'results': []},
 'results': [],
 'knowledge_graph': {'nodes': {}, 'edges': {}}}

In [33]:
query['message']['query_graph']['nodes']['n0']['id'] = "PUBCHEM.COMPOUND:753"

In [34]:
refkg(query)

{'message': {'query_graph': {'nodes': {'n0': {'id': 'PUBCHEM.COMPOUND:753',
     'category': None,
     'is_set': False},
    'n1': {'id': None,
     'category': 'biolink:BiologicalProcessOrActivity',
     'is_set': False}},
   'edges': {'e0': {'subject': 'n0',
     'object': 'n1',
     'predicate': 'biolink:related_to',
     'relation': None}}},
  'knowledge_graph': {'nodes': {}, 'edges': {}},
  'results': []},
 'results': [],
 'knowledge_graph': {'nodes': {}, 'edges': {}}}

In [12]:
prov = get_provenance(strider_result)
display(prov)

Unnamed: 0,QG Edge,Source,Count
0,e0,https://api.bte.ncats.io/v1/query,98
1,e0,https://automat.renci.org/ctd/reasonerapi,4
2,e0,https://automat.renci.org/mychem/reasonerapi,6
3,e0,https://translator.broadinstitute.org/molepro/...,3


In [15]:
view = GammaViewer(props={"data":strider_result})
display(view)

### Strider Assessment

We return lots of edges, but they are all from BTE.  I can't easily tell but I'd like to know the underlying source.  If it's a curated source, then that's fine, but I'd like to have a curated source.   Why are we not getting results from SRI KG?

## ARAGORN 

In [13]:
start = dt.now()
aragorn_result = aragorn(query)
end = dt.now()
print(f"ARAGORN produced {len(aragorn_result['message']['results'])} results in {end-start}.")

KeyError: 'results'

In [14]:
print_json(aragorn_result)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "category": [
                        "biolink:ChemicalSubstance"
                    ],
                    "is_set": false,
                    "name": "Chemical Substance"
                },
                "n1": {
                    "id": "MONDO:0018150",
                    "is_set": false,
                    "name": "Gaucher disease"
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1",
                    "predicate": [
                        "biolink:treats"
                    ]
                }
            }
        }
    },
    "error": "Error detected: Got an empty answer from Answer coalesce, aborting."
}


In [19]:
view = GammaViewer(props={"data":aragorn_result})
display(view)

### ARAGORN Assessment

How did we do?

We for some reason are returning no results.  We are however returning a knowledge graph.  I suspect that this means nothing is enriched.  But is that real, or a bug?