# Setup

In [1]:
import json
import requests
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response.json())
        return {}
    return response.json()

def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message)
    print(response.status_code)
    return response.json()

def strider(message):
    url = 'https://strider.renci.org/query?log_level=DEBUG'
    strider_answer = post(strider,url,message)
    return strider_answer

def aragorn(message, coalesce_type='xnone'):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

##

def bte(message):
    url = 'https://api.bte.ncats.io/v1/query'
    return post(strider,url,message)

def coalesce(message,method='all'):
    url = 'https://answercoalesce.renci.org/coalesce/graph'
    return post('AC'+method,url,message)

def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/all',strider_answer)
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',coalesced_answer)
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',omni_answer)
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',weighted_answer)
    return strider_answer,coalesced_answer,omni_answer,weighted_answer,scored_answer

In [5]:
def print_errors(strider_result):
    errorcounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        if logmessage['level'] == 'ERROR':
            jm = json.loads(logmessage['message'])
            words = jm['error'].split()
            e = " ".join(words[:-5])
            errorcounts[e] += 1
    for error,count in errorcounts.items():
        print(f'{error} ({count} times)')
        
def print_queried_sources(strider_result):
    querycounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        j = json.loads(logmessage['message'])
        if 'url' in j:
            querycounts[j['url']] += 1
    for url,count in querycounts.items():
        print(f'{url} ({count} times)')

In [6]:
def retrieve_ars_results(mid):
    message_url = f'https://ars.transltr.io/ars/api/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    results = {}
    for child in j['children']:
        if child['actor']['agent'] in ['ara-aragorn', 'ara-aragorn-exp']:
            childmessage_id = child['message']
            child_url = f'https://ars.transltr.io/ars/api/messages/{childmessage_id}'
            child_response = requests.get(child_url).json()
            try:
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except:
                nresults=0
            print( child['status'], child['actor']['agent'],nresults )
    return results

In [7]:
def get_provenance(message):
    """Given a message with results, find the source of the edges"""
    prov = defaultdict(lambda: defaultdict(int)) # {qedge->{source->count}}
    results = message['message']['results']
    kg = message['message']['knowledge_graph']['edges']
    edge_bindings = [ r['edge_bindings'] for r in results ]
    for bindings in edge_bindings:
        for qg_e, kg_l in bindings.items():
            for kg_e in kg_l:
                for att in kg[kg_e['id']]['attributes']:
                    if att['name'] == 'provenance':
                        source = att['value']
                        prov[qg_e][source]+=1
    qg_edges = []
    sources = []
    counts = []
    for qg_e in prov:
        for source in prov[qg_e]:
            qg_edges.append(qg_e)
            sources.append(source)
            counts.append(prov[qg_e][source])
    prov_table = pd.DataFrame({"QG Edge":qg_edges, "Source":sources, "Count":counts})
    return prov_table

## Query Specific

In [8]:
standup_json='StandupDefinitions/standup_2.json'

In [9]:
with open(standup_json,'r') as jsonfile:
    standup_info = json.load(jsonfile)

In [10]:
display(Markdown(f"# {standup_info['Query Title']}"))
display(Markdown(f"{standup_info['Query Description']}"))
print(f'Github Issue: {standup_info["github_issue"]}')

# 2. Chemicals to Gene

What chemicals are associated with SMS (UniProtKB:P52788)

Github Issue: https://github.com/NCATSTranslator/testing/issues/10


The query as run through the ARS:

In [11]:
query = json.loads(requests.get(standup_info['query_location']).content)
printjson(query)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "id": "UniProtKB:P52788",
                    "category": "biolink:Gene"
                },
                "n1": {
                    "category": "biolink:ChemicalSubstance"
                }
            },
            "edges": {
                "e01": {
                    "subject": "n0",
                    "object": "n1"
                }
            }
        }
    }
}


## ARS Assessment

In [12]:
ARS_Responses = [(dt.strptime(x['ARS_result_date'],'%Y-%m-%d'),x['ARS_result_id']) for x in standup_info['ARS_Results']]
ARS_Responses.sort()

In [13]:
for ars_date, ars_id in ARS_Responses:
    display(Markdown(f'### {ars_date}'))
    _ = retrieve_ars_results(ars_id)
    print(f'https://arax.ncats.io/?source=ARS&id={ars_id}')

### 2021-03-02 00:00:00

Done ara-aragorn 63
Done ara-aragorn-exp 0
https://arax.ncats.io/?source=ARS&id=d187edcd-f4a1-42e8-9248-eee0cb161fdb


### 2021-03-16 00:00:00

Done ara-aragorn 119
Done ara-aragorn-exp 0
https://arax.ncats.io/?source=ARS&id=5ae7b3a8-ab4d-4282-a027-80a7116f1265


### 2021-03-19 00:00:00

Done ara-aragorn 128
Done ara-aragorn-exp 86
https://arax.ncats.io/?source=ARS&id=0f16dbfa-444a-409f-a7b2-9d11506b0d1c


### 2021-03-25 00:00:00

Done ara-aragorn 128
Done ara-aragorn-exp 0
https://arax.ncats.io/?source=ARS&id=8675552b-34b1-46eb-a4f0-3e34179a1e12


## Strider Direct

In [14]:
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 37 results in 0:01:36.635607.


### Provenance

In [15]:
prov = get_provenance(strider_result)
display(prov)

Unnamed: 0,QG Edge,Source,Count
0,e01,https://translator.broadinstitute.org/molepro/...,37


### Queried sources

In [16]:
print_queried_sources(strider_result)

https://automat.renci.org/pharos/reasonerapi (8 times)
https://stars-app.renci.org/cam-kp/query (17 times)
https://translator.broadinstitute.org/molepro/trapi/v1.0/query (2 times)
https://automat.renci.org/ctd/reasonerapi (75 times)
https://automat.renci.org/mychem/reasonerapi (16 times)
https://api.bte.ncats.io/v1/query (3 times)
https://automat.renci.org/cord19-scibite/reasonerapi (2 times)
https://automat.renci.org/hmdb/reasonerapi (2 times)
https://automat.renci.org/gtopdb/reasonerapi (1 times)


### Errors

In [17]:
print_errors(strider_result)

504 Server Error: Gateway Time-out for url: https://automat.renci.org/chembio/reasonerapi (2 times)
504 Server Error: Gateway Time-out for url: https://automat.renci.org/cord19-scigraph/reasonerapi (4 times)
504 Server Error: Gateway Time-out for url: https://automat.renci.org/kegg/reasonerapi (2 times)
400 Client Error: Bad Request for url: https://stars-app.renci.org/cam-kp/query (1 times)
429 Client Error: Too Many Requests for url: https://api.bte.ncats.io/v1/query (121 times)


### Results

In [18]:
view = GammaViewer(props={"data":strider_result})
display(view)

### Strider Assessment

Only molpro returns results, though there are many other valid sources.

We are overwhelming BTE and automat.
See: 
https://github.com/RENCI-AUTOMAT/Automat-server/issues/5
https://github.com/ranking-agent/strider/issues/149


Even though there are some correct results in Automats, there is a problem in getting the edges (e.g. from Pharos). 
See: 
https://github.com/RENCI-AUTOMAT/Automat-server/issues/6

CAM-KP is returning non-interpretable results.
See:
https://github.com/NCATS-Tangerine/cam-kp-api/issues/257

## ARAGORN 

In [19]:
start = dt.now()
aragorn_result = aragorn(query)
end = dt.now()
if 'results' in aragorn_result['message']:
    print(f"ARAGORN produced {len(aragorn_result['message']['results'])} results in {end-start}.")
else:
    print('Error, no result field')

ARAGORN produced 37 results in 0:01:07.454371.


In [20]:
view = GammaViewer(props={"data":aragorn_result})
display(view)

### ARAGORN Assessment

1. No omnicorp results

See: 

https://github.com/TranslatorSRI/NodeNormalization/issues/43

https://github.com/ranking-agent/aragorn-ranker/issues/12

2. No coalescence
This seems to be because of out-of-date databases.

See:

https://github.com/ranking-agent/AnswerCoalesce/issues/44

https://github.com/ranking-agent/strider/issues/164


In [21]:
omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',strider_result)

In [22]:
omni_answer

{'message': {'query_graph': {'nodes': {'n1': {'category': ['biolink:ChemicalSubstance'],
     'is_set': False},
    'n0': {'id': ['UniProtKB:P52788'],
     'category': ['biolink:Gene'],
     'is_set': False}},
   'edges': {'e01': {'subject': 'n0', 'object': 'n1'}}},
  'knowledge_graph': {'nodes': {'PUBCHEM.COMPOUND:439533': {'category': ['biolink:ChemicalSubstance'],
     'name': '(+)-taxifolin',
     'attributes': [{'type': 'omnicorp_article_count', 'value': 0}]},
    'PUBCHEM.COMPOUND:65057': {'category': ['biolink:ChemicalSubstance'],
     'name': 'diphenylcyclopropenone',
     'attributes': [{'type': 'omnicorp_article_count', 'value': 0}]},
    'PUBCHEM.COMPOUND:122150': {'category': ['biolink:ChemicalSubstance'],
     'name': '3-amino-2-(4-chlorophenyl)-1-propanesulfonic acid',
     'attributes': [{'type': 'omnicorp_article_count', 'value': 0}]},
    'PUBCHEM.COMPOUND:5284550': {'category': ['biolink:ChemicalSubstance'],
     'name': 'trans-dothiepin',
     'attributes': [{'type':

In [30]:
coal = post('coalesce','https://answercoalesce.renci.org/coalesce/all',{'message':strider_result['message']})

In [31]:
coal

{'message': {'query_graph': {'nodes': {'n1': {'category': ['biolink:ChemicalSubstance'],
     'is_set': False},
    'n0': {'id': ['UniProtKB:P52788'],
     'category': ['biolink:Gene'],
     'is_set': False}},
   'edges': {'e01': {'subject': 'n0', 'object': 'n1'}}},
  'knowledge_graph': {'nodes': {'PUBCHEM.COMPOUND:439533': {'category': ['biolink:ChemicalSubstance'],
     'name': '(+)-taxifolin',
     'attributes': []},
    'PUBCHEM.COMPOUND:65057': {'category': ['biolink:ChemicalSubstance'],
     'name': 'diphenylcyclopropenone',
     'attributes': []},
    'PUBCHEM.COMPOUND:122150': {'category': ['biolink:ChemicalSubstance'],
     'name': '3-amino-2-(4-chlorophenyl)-1-propanesulfonic acid',
     'attributes': []},
    'PUBCHEM.COMPOUND:5284550': {'category': ['biolink:ChemicalSubstance'],
     'name': 'trans-dothiepin',
     'attributes': []},
    'PUBCHEM.COMPOUND:4075': {'category': ['biolink:ChemicalSubstance'],
     'name': 'mesalamine',
     'attributes': []},
    'PUBCHEM.COMPO

In [32]:
view = GammaViewer(props={"data":coal})
display(view)

In [33]:
strider_result['message']['knowledge_graph']['nodes']

{'PUBCHEM.COMPOUND:439533': {'category': ['biolink:ChemicalSubstance'],
  'name': '(+)-taxifolin',
  'attributes': []},
 'PUBCHEM.COMPOUND:65057': {'category': ['biolink:ChemicalSubstance'],
  'name': 'diphenylcyclopropenone',
  'attributes': []},
 'PUBCHEM.COMPOUND:122150': {'category': ['biolink:ChemicalSubstance'],
  'name': '3-amino-2-(4-chlorophenyl)-1-propanesulfonic acid',
  'attributes': []},
 'PUBCHEM.COMPOUND:5284550': {'category': ['biolink:ChemicalSubstance'],
  'name': 'trans-dothiepin',
  'attributes': []},
 'PUBCHEM.COMPOUND:4075': {'category': ['biolink:ChemicalSubstance'],
  'name': 'mesalamine',
  'attributes': []},
 'PUBCHEM.COMPOUND:10219702': {'category': ['biolink:ChemicalSubstance'],
  'name': 'CID 10219702',
  'attributes': []},
 'PUBCHEM.COMPOUND:65833': {'category': ['biolink:ChemicalSubstance'],
  'name': '(1R,2S)-2-(aminomethyl)-N,N-diethyl-1-phenyl-1-cyclopropanecarboxamide',
  'attributes': []},
 'PUBCHEM.COMPOUND:5311281': {'category': ['biolink:ChemicalS