In [1]:
from use_translator import *

In [2]:
def create_query(disease_id, inferred = True):
    query_graph = {'nodes': {'disease': {'ids': [disease_id]}},
                   'edges': {}}
    pnode = 'disease'
    cnode = 'chemical'
    query_graph['nodes'][cnode] = {'categories': ['biolink:ChemicalEntity']}
    query_graph['edges']['t_edge'] = {'object': pnode, 'subject': cnode, 'predicates':['biolink:treats']}
    if inferred:
        query_graph['edges']['t_edge']['knowledge_type']= 'inferred'
    m = {'message':{'query_graph':query_graph}}
    return m

In [30]:
aragorn_url="https://aragorn.ci.transltr.io/aragorn/query"
ars_url='https://ars.ci.transltr.io/ars/api'

In [4]:
creative_at = create_query("MONDO:0011399")

In [5]:
x = submit_to_ars(creative_at, ars_url=ars_url)

https://ars.ci.transltr.io/ars/api/submit
201
https://arax.ncats.io/?source=ARS&id=39f06992-1462-47d2-bd0d-5b2808c9d946


'39f06992-1462-47d2-bd0d-5b2808c9d946'

In [7]:
ars_results = retrieve_ars_results('39f06992-1462-47d2-bd0d-5b2808c9d946',ars_url=ars_url)

In [20]:
def get_results(ares):
    chem_ids = {}
    for responder in ares:
        if responder.startswith('ara'):
            results = ares[responder]['message'].get('results',[])
            if len(results) > 0:
                for res in results:
                    cids = [ chem['id'] for chem in  res['node_bindings']['chemical'] ]
                    for cid in cids:
                        chem_ids[cid] = ares[responder]['message']['knowledge_graph']['nodes'][cid]
    return chem_ids
                

In [24]:
ars_results['ara-aragorn']['message']['results'][0]['node_bindings']

{'disease': [{'id': 'MONDO:0000984',
   'query_id': 'MONDO:0011399',
   'attributes': None}],
 'chemical': [{'id': 'PUBCHEM.COMPOUND:2973',
   'query_id': None,
   'attributes': None}]}

In [21]:
chem_ids = get_results(ars_results)

In [22]:
len(chem_ids)

1416

In [56]:
def create_enrichment_query(identifiers):
    trapi = {"message": {
        "query_graph": {"nodes": {"input_chemical": {"categories": ["biolink:ChemicalEntity"]}}, "edges": {}},
        "knowledge_graph": {"nodes": {}, "edges": {}},
        "results": []
    },
    "workflow": [{"id": "enrich_results"}]
    }
    for ident in identifiers:
        trapi['message']['knowledge_graph']['nodes'][ident] = identifiers[ident]
        result = {'node_bindings': {'input_chemical': [{'id': ident}]}, 'edge_bindings':{}}
        trapi['message']['results'].append(result)
    return trapi
        

In [57]:
trapi = create_enrichment_query(chem_ids)

In [58]:
enriched = requests.post(aragorn_url, json=trapi)

In [59]:
enriched.status_code

200

In [60]:
len(enriched.json()['message']['results'])

2868

In [118]:
def extract(res,kg):
    nchems = len( result['node_bindings']['input_chemical'])
    if nchems == 1:
        return
    nnb = len(result['node_bindings'])
    if nnb == 1:
        rules = extract_property(result['node_bindings'])
        for prop,pvalue,num in rules:
            print(prop,pvalue,num)
    elif nnb == 2:
        rule, pvalue, num = extract_edge(result,kg)
        #I don't like all these
        if pvalue == 0:
            return
        pred = rule[1]
        if pred in ['biolink:causes','biolink:contributes_to','biolink:part_of','biolink:biomarker_for','biolink:contraindicated_for']:
            return
        print (rule, pvalue,num)
    else:
        print('wtf')

In [119]:
def extract_property(nb):
    chem = nb['input_chemical']
    n = len(chem)
    atts = chem[0]['attributes']
    for att in atts:
        if att['original_attribute_name'] == 'p_value':
            pvalues = att['value']
        if att['original_attribute_name'] == 'properties':
            props = att['value']
    rules = [ (pr,pv,n) for pv,pr in zip(pvalues,props)]
    return rules

In [123]:
def extract_edge(res,kg):
    nb = res['node_bindings']
    chem = nb['input_chemical']
    chem_ids = [ c['id'] for c in chem]
    num_chem = len(chem)
    atts = chem[0]['attributes']
    for att in atts:
        if att['original_attribute_name'] == 'p_value':
            p = att['value']
    kk = set(nb.keys())
    kk.remove('input_chemical')
    ok = list(kk)[0]
    if len( nb[ok] ) > 1:
        raise 'y'
    othernode_id = nb[ok][0]['id']
    othernode_name = kg['nodes'][othernode_id]['name']
    for k,v in res['edge_bindings'].items():
        predicate = kg['edges'][v[0]['id']]['predicate']
        subject = kg['edges'][v[0]['id']]['subject']
        if subject in chem_ids:
            rule = ('?',predicate,(othernode_id,othernode_name))
        else:
            rule = ((othernode_id,othernode_name),predicate,'?')
    return rule, p, num_chem

In [124]:
knowledge_graph = enriched.json()['message']['knowledge_graph']
for result in enriched.json()['message']['results']:
    extract(result,knowledge_graph)

CHEBI_ROLE:drug 5.754933058996342e-39 503
CHEBI_ROLE:pharmaceutical 4.5686170998782305e-37 503
CHEBI_ROLE:immunomodulator 1.831972150882083e-22 42
CHEBI_ROLE:cardiovascular_drug 6.085125607211122e-19 79
CHEBI_ROLE:immunosuppressive_agent 1.9829640637427237e-18 29
CHEBI_ROLE:antihypertensive_agent 6.2594322686768574e-18 50
CHEBI_ROLE:drug_allergen 8.937639169023616e-15 30
CHEBI_ROLE:micronutrient 1.4875496653146656e-14 22
CHEBI_ROLE:nutrient 1.9247602497041452e-14 25
CHEBI_ROLE:prodrug 1.008080563614563e-12 40
CHEBI_ROLE:pro-agent 8.108650948311721e-10 40
CHEBI_ROLE:nutraceutical 1.6781866196274058e-12 23
CHEBI_ROLE:xenobiotic 1.6151276408977996e-11 58
CHEBI_ROLE:anti-inflammatory_drug 2.9796418567800253e-11 42
CHEBI_ROLE:antimetabolite 1.5796757493803551e-10 23
CHEBI_ROLE:antagonist 2.4871427836008957e-10 64
CHEBI_ROLE:diuretic 5.350713674687563e-10 15
CHEBI_ROLE:environmental_contaminant 9.91827688942707e-10 53
CHEBI_ROLE:central_nervous_system_drug 1.5579141009865073e-09 59
CHEBI_ROL