# Setup

In [1]:
import json
import requests
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd
import copy

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response.json())
        return {}
    return response.json()

def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message)
    print(response.status_code)
    return response.json()

def strider(message):
    url = 'https://strider.renci.org/1.1/query'
    strider_answer = post(strider,url,message)
    return strider_answer

def aragorn(message, coalesce_type='xnone'):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/1.1/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/1.1/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

##

def bte(message):
    url = 'https://api.bte.ncats.io/v1/query'
    return post(strider,url,message)

def coalesce(message,method='all'):
    url = 'https://answercoalesce.renci.org/coalesce/graph'
    return post('AC'+method,url,message)

def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/all',strider_answer)
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',coalesced_answer)
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',omni_answer)
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',weighted_answer)
    return strider_answer,coalesced_answer,omni_answer,weighted_answer,scored_answer

In [5]:
def print_errors(strider_result):
    errorcounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        if logmessage['level'] == 'ERROR':
            #jm = json.loads(logmessage['message'])
            #words = jm['error'].split()
            #e = " ".join(words[:-5])            
            e = logmessage['error']
            errorcounts[e] += 1
    for error,count in errorcounts.items():
        print(f'{error} ({count} times)')
        
def print_queried_sources(strider_result):
    querycounts = defaultdict(int)
    for logmessage in strider_result['logs']:
        if 'step' in logmessage and isinstance(logmessage['step'],list):
            for s in logmessage['step']:
                querycounts[s['url']] += 1
    for url,count in querycounts.items():
        print(f'{url} ({count} times)')
        
def print_query_for_source(strider_result,url):
    for logmessage in strider_result['logs']:
        if 'step' in logmessage and isinstance(logmessage['step'],list):
            for s in logmessage['step']:
                if s['url']==url:
                    print(s)

In [6]:
def retrieve_ars_results(mid):
    ars='https://ars-dev.transltr.io'
    message_url = f'{ars}/ars/api/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    results = {}
    for child in j['children']:
        if child['actor']['agent'] in ['ara-aragorn', 'ara-aragorn-exp']:
            childmessage_id = child['message']
            child_url = f'{ars}/ars/api/messages/{childmessage_id}'
            child_response = requests.get(child_url).json()
            try:
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except:
                nresults=0
            print( child['status'], child['actor']['agent'],nresults )
    return results

In [7]:
def get_provenance(message):
    """Given a message with results, find the source of the edges"""
    prov = defaultdict(lambda: defaultdict(int)) # {qedge->{source->count}}
    results = message['message']['results']
    kg = message['message']['knowledge_graph']['edges']
    edge_bindings = [ r['edge_bindings'] for r in results ]
    for bindings in edge_bindings:
        for qg_e, kg_l in bindings.items():
            for kg_e in kg_l:
                for att in kg[kg_e['id']]['attributes']:
                    if att['attribute_type_id'] == 'MetaInformation:Provenance':
                        source = att['value']
                        prov[qg_e][source]+=1
    qg_edges = []
    sources = []
    counts = []
    for qg_e in prov:
        for source in prov[qg_e]:
            qg_edges.append(qg_e)
            sources.append(source)
            counts.append(prov[qg_e][source])
    prov_table = pd.DataFrame({"QG Edge":qg_edges, "Source":sources, "Count":counts})
    return prov_table

In [8]:
def ac_to_table(aragorn_result,mnode):
    #scores = []
    answer_node_count = []
    merged_count = []
    method = []
    extra = []
    for res_i, result in enumerate(aragorn_result['message']['results']):
        #scores.append(result['score'])
        answer_node_count.append(len(result['node_bindings']))
        merged_count.append(len(result['node_bindings'][mnode]))
        try:
            method.append(result['node_bindings'][mnode][0]['coalescence_method'])
        except:
            method.append('Original')
    df = pd.DataFrame({'N_Answer_Nodes':answer_node_count, 'N_Merged_Nodes':merged_count, 'Method':method})
    return df

def filter_to_simple(aragorn_result,mnode):
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = list(
    filter( lambda x: 'coalescence_method' not in x['node_bindings'][mnode][0], 
           aragorn_result['message']['results'])
    )
    print(len(simple_result['message']['results']))
    return simple_result

def print_nodenames(simple_result,qnode):
    #Print the names of the answers
    for result in simple_result['message']['results']:
        #Each answer has an identifier:
        n1_id = result['node_bindings'][qnode][0]['id']
        #The information for that identifier is in the KG:
        node = simple_result['message']['knowledge_graph']['nodes'][n1_id]
        #Each node has a name
        print(node['name'])

def filter_to_coal(aragorn_result,mnode,method):
    #The results that have been coalesced:
    coalesced_results = list(
        filter( lambda x: 'coalescence_method'  in x['node_bindings'][mnode][0], 
               aragorn_result['message']['results'])
    )
    #Those that have been coalesced via a new node (graph coalescence)
    graph_coalesced_results = list(
        filter( lambda x: x['node_bindings'][mnode][0]['coalescence_method'] == method, coalesced_results)
    )
    print(len(graph_coalesced_results))
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = graph_coalesced_results
    return simple_result

def filter_to_gc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'graph_enrichment')

def filter_to_pc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'property_enrichment')

def print_gc_result(graph,gc_result,node):
    print('p_value:', gc_result['node_bindings'][node][0]['p_value'])
    maxprint=5
    for extra_edge in gc_result['edge_bindings']:
        if not extra_edge.startswith('extra_'):
            continue
        numnodes = len(gc_result['edge_bindings'][extra_edge])
        if numnodes == 0:
            printjson(gc_result)
            return
        print ('Merged',numnodes)
        nprint = min([numnodes,maxprint])
        for eb in gc_result['edge_bindings'][extra_edge][:maxprint]:        
            kge = graph['edges'][eb['id']]
            subject_node = kge['subject']
            object_node = kge['object']
            pred = kge['predicate']
            print( f"  {graph['nodes'][subject_node]['name']} -[{pred}]-> {graph['nodes'][object_node]['name']}")
    print('----')
    
def print_pc_result(knowledge_graph,pc_result,node):
    print('p_value:', pc_result['node_bindings'][node][0]['p_values'])
    print('properties:', pc_result['node_bindings'][node][0]['properties'])
    numnodes = len(pc_result['node_bindings'][node])
    print('node count',numnodes)
    maxprint=5
    nprint = min([numnodes,maxprint])
    for node in pc_result['node_bindings'][node][:nprint]:
        kgn = knowledge_graph['nodes'][node['id']]
        print( f"  {kgn['name']}")
    if numnodes > maxprint:
        print('  ...')
    print('----')

## Query Specific

In [9]:
standup_json='StandupDefinitions/standup_26.json'

In [14]:
with open(standup_json,'r') as jsonfile:
    standup_info = json.load(jsonfile)

In [15]:
display(Markdown(f"# {standup_info['Query Title']}"))
display(Markdown(f"{standup_info['Query Description']}"))
print(f'Github Issue: {standup_info["github_issue"]}')

# 25. 2 hop

Disease Sleep Apnea (MONDO:0005296) - Gene - ChemicalSubstance Valproic acid (PUBCHEM.COMPOUND:3121)

Github Issue: https://github.com/NCATSTranslator/testing/issues/81


The query as run through the ARS:

In [16]:
query = json.loads(requests.get(standup_info['query_location']).content)
printjson(query)

{
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "ids": [
                        "MONDO:0005296"
                    ],
                    "categories": [
                        "biolink:Disease"
                    ]
                },
                "n1": {
                    "categories": [
                        "biolink:Gene"
                    ]
                },
                "n2": {
                    "ids": [
                        "PUBCHEM.COMPOUND:3121"
                    ],
                    "categories": [
                        "biolink:ChemicalSubstance"
                    ]
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1"
                },
                "e1": {
                    "subject": "n1",
                    "object": "n2"
                }
            }
        }
    }
}


## ARS Assessment

In [17]:
ARS_Responses = [(dt.strptime(x['ARS_result_date'],'%Y-%m-%d'),x['ARS_result_id']) for x in standup_info['ARS_Results']]
ARS_Responses.sort()

In [18]:
for ars_date, ars_id in ARS_Responses:
    display(Markdown(f'### {ars_date}'))
    _ = retrieve_ars_results(ars_id)
    print(f'https://arax.ncats.io/?source=ARS&id={ars_id}')

### 2021-06-24 00:00:00

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## Strider Direct

In [21]:
query['log_level']='DEBUG'
start = dt.now()
strider_result = strider(query)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 0 results in 0:04:22.810492.


### Provenance

In [22]:
prov = get_provenance(strider_result)
display(prov)

Unnamed: 0,QG Edge,Source,Count


### Queried sources

In [23]:
print_queried_sources(strider_result)

https://automat.renci.org/covidkopkg/1.1/query (83 times)
https://automat.renci.org/cord19-scibite/1.1/query (2 times)
https://automat.renci.org/cord19-scigraph/1.1/query (4 times)
https://automat.renci.org/robokopkg/1.1/query (82 times)
https://automat.renci.org/mychem/1.1/query (14 times)
https://automat.renci.org/chembio/1.1/query (1 times)
https://automat.renci.org/pharos/1.1/query (7 times)
https://automat.renci.org/hmdb/1.1/query (2 times)
https://automat.renci.org/ctd/1.1/query (75 times)
https://translator.broadinstitute.org/molepro/trapi/v1.1/query (8 times)
https://cam-kp-api-dev.renci.org/query (27 times)
https://spokekp.healthdatascience.cloud/api/v1.1//query (2 times)
https://arax.ncats.io/api/rtxkg2/v1.1/query (87 times)
https://automat.renci.org/kegg/1.1/query (2 times)
https://automat.renci.org/gtopdb/1.1/query (1 times)


In [24]:
levs=[ l['level'] for l in strider_result['logs']]

In [25]:
n = defaultdict(int)
for l in levs:
    n[l] += 1
for l,c in n.items():
    print(l,c)

ERROR 62
DEBUG 11
INFO 5


### Errors

In [21]:
print_errors(strider_result)

504 Server Error: Gateway Time-out for url: https://automat.renci.org/robokopkg/1.1/query
For more information check: https://httpstatuses.com/504 (830 times)
400 Client Error: Bad Request for url: https://cam-kp-api-dev.renci.org/query
For more information check: https://httpstatuses.com/400 (162 times)


### Results

In [31]:
view = GammaViewer(props={"data":strider_result})
display(view)

### Strider Assessment

Enter Assessment Here

In [26]:
hop1 = {
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "ids": [
                        "MONDO:0005296"
                    ],
                    "categories": [
                        "biolink:Disease"
                    ]
                },
                "n1": {
                    "categories": [
                        "biolink:Gene"
                    ]
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1"
                }
            }
        }
    }
}

In [27]:
query['log_level']='DEBUG'
start = dt.now()
strider_result = strider(hop1)
end = dt.now()
print(f"Strider produced {len(strider_result['message']['results'])} results in {end-start}.")

Strider produced 88 results in 0:00:07.139656.


In [28]:
view = GammaViewer(props={"data":strider_result})
display(view)

In [29]:
hop2 = {
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "ids": [
                        "PUBCHEM.COMPOUND:3121"
                    ],
                    "categories": [
                        "biolink:ChemicalSubstance"
                    ]
                },
                "n1": {
                    "categories": [
                        "biolink:Gene"
                    ]
                }
            },
            "edges": {
                "e0": {
                    "subject": "n0",
                    "object": "n1"
                }
            }
        }
    }
}
query['log_level']='DEBUG'
start = dt.now()
strider_result2 = strider(hop2)
end = dt.now()
print(f"Strider produced {len(strider_result2['message']['results'])} results in {end-start}.")

Strider produced 5625 results in 0:01:54.517449.


In [31]:
names = [ v['name'] for v in strider_result2['message']['knowledge_graph']['nodes'].values()]

In [33]:
'FGFR3' in names

True

In [34]:
'BCHE' in names

True

In [35]:
'GNE' in names

True

In [39]:
hop1keys = set( strider_result['message']['knowledge_graph']['nodes'].keys() )
hop2keys = set( strider_result2['message']['knowledge_graph']['nodes'].keys() )

In [40]:
hop1keys.intersection(hop2keys)

{'NCBIGene:10020',
 'NCBIGene:1026',
 'NCBIGene:1028',
 'NCBIGene:10628',
 'NCBIGene:169026',
 'NCBIGene:2026',
 'NCBIGene:2261',
 'NCBIGene:336',
 'NCBIGene:4280',
 'NCBIGene:429',
 'NCBIGene:5054',
 'NCBIGene:5660',
 'NCBIGene:590',
 'NCBIGene:5979',
 'NCBIGene:6281',
 'NCBIGene:64324',
 'NCBIGene:6647',
 'NCBIGene:6651',
 'NCBIGene:6714',
 'NCBIGene:7099',
 'NCBIGene:7422',
 'NCBIGene:761',
 'NCBIGene:8863'}

## ARAGORN 

In [32]:
start = dt.now()
aragorn_result = aragorn(query)
end = dt.now()
if 'results' in aragorn_result['message']:
    print(f"ARAGORN produced {len(aragorn_result['message']['results'])} results in {end-start}.")
else:
    print('Error, no result field')

ARAGORN produced 4 results in 0:01:31.725570.


In [33]:
view = GammaViewer(props={"data":aragorn_result})
display(view)

In [44]:
node='n0' #this might need to change based on the query
property_answers = filter_to_pc(aragorn_result,node)
for pc_result in property_answers['message']['results']:
    print_pc_result(aragorn_result['message']['knowledge_graph'],pc_result,node)

72
p_value: [5.515487725071365e-06]
properties: ['molecule_type:Small molecule']
node count 41
  Progesterone
  2-[(4-Benzyltriazol-1-yl)methyl]phenol
  2-Hydroxy-N-(4-methyl-2-nitrophenyl)benzamide
  (2-Hydroxyphenyl)(5-methyl-1H-benzotriazole-1-yl) ketone
  (2-Hydroxyphenyl)(5-chloro-1H-benzotriazole-1-yl) ketone
  ...
----
p_value: [7.280805894138608e-21]
properties: ['drugbank.approved']
node count 10
  Enflurane
  Cromolyn
  Potassium ion
  Miconazole
  Hydroflumethiazide
  ...
----
p_value: [6.562508889245017e-13]
properties: ['application']
node count 9
  Enflurane
  Cromolyn
  Hydroflumethiazide
  Hydron;2,2,2-trifluoroacetate
  Bendroflumethiazide
  ...
----
p_value: [6.897912892096567e-07]
properties: ['biological_role']
node count 8
  ferroheme b
  Carbon monoxide
  Hydrochlorothiazide
  Cromolyn
  Potassium ion
  ...
----
p_value: [9.721386753620717e-15]
properties: ['therapeutic_flag']
node count 8
  Enflurane
  Bendroflumethiazide
  Hydrochlorothiazide
  Cromolyn
  Chlorz

In [49]:
property_answers['message']['results'][0]

{'node_bindings': {'n1': [{'id': 'NCBIGene:3778'}],
  'n0': [{'id': 'PUBCHEM.COMPOUND:5994',
    'coalescence_method': 'property_enrichment',
    'p_values': [5.515487725071365e-06],
    'properties': ['molecule_type:Small molecule']},
   {'id': 'PUBCHEM.COMPOUND:11437030',
    'coalescence_method': 'property_enrichment',
    'p_values': [5.515487725071365e-06],
    'properties': ['molecule_type:Small molecule']},
   {'id': 'PUBCHEM.COMPOUND:3605484',
    'coalescence_method': 'property_enrichment',
    'p_values': [5.515487725071365e-06],
    'properties': ['molecule_type:Small molecule']},
   {'id': 'PUBCHEM.COMPOUND:10243969',
    'coalescence_method': 'property_enrichment',
    'p_values': [5.515487725071365e-06],
    'properties': ['molecule_type:Small molecule']},
   {'id': 'PUBCHEM.COMPOUND:10967695',
    'coalescence_method': 'property_enrichment',
    'p_values': [5.515487725071365e-06],
    'properties': ['molecule_type:Small molecule']},
   {'id': 'PUBCHEM.COMPOUND:2798243',

In [67]:
graph_answers=filter_to_gc(aragorn_result,node)
for gc_result in graph_answers['message']['results']:
    print_gc_result(aragorn_result['message']['knowledge_graph'],gc_result,node)

85
p_value: 1.488362376405769e-24
Merged 8
  Isochamaejasmin -[biolink:directly_interacts_with]-> VEGFA
  Holo-Lys2 -[biolink:directly_interacts_with]-> VEGFA
  Pipecolic acid betaine -[biolink:directly_interacts_with]-> VEGFA
  glycerophosphoethanolamine -[biolink:directly_interacts_with]-> VEGFA
  6-Bromohexanoic acid -[biolink:directly_interacts_with]-> VEGFA
----
p_value: 1.098744871207281e-18
Merged 7
  Holo-Lys2 -[biolink:directly_interacts_with]-> TNF
  1-[(3S,9S,10S)-9-[[Cyclopropylmethyl(methyl)amino]methyl]-12-[(2R)-1-hydroxypropan-2-yl]-3,10-dimethyl-13-oxo-2,8-dioxa-12-azabicyclo[12.4.0]octadeca-1(14),15,17-trien-16-yl]-3-(4-fluorophenyl)urea -[biolink:directly_interacts_with]-> TNF
  Deoxylimonoic acid D-ring-lactone -[biolink:directly_interacts_with]-> TNF
  Diazoxide -[biolink:directly_interacts_with]-> TNF
  Isochamaejasmin -[biolink:directly_interacts_with]-> TNF
----
p_value: 2.5251858514751036e-18
Merged 6
  Diazoxide -[biolink:directly_interacts_with]-> CXCL8
  glyc

### ARAGORN Assessment

How did we do?