In [136]:
# what genes interact with MEIS1 to contribute to 'restless leg syndrome'?
# does iron deficiency contributes to 'restless leg syndrome'?
# does iron treat 'restless leg syndrome'? 
# what drugs target iron deficiency?

# see how many ARAs can return MEIS1 with various identifiers - hopefully equivalent results
# HGNC:7000, OMIM:601739, NCBIGene:4211

# PRO ids: PR:000010316 (gene)
# PR:000037928 (isoform1)
# PR:O00470-1 (human isoform1)
# PR:Q60954-1 (mouse isoform1)

# what if someone searches for protein interactions of MEIS1?
# n0 - MEIS1, n1 - category:['biolink:Protein']
# UniProtKB:O00470

# start with iron deficiency


import json
import requests
from gamma_viewer import GammaViewer
from IPython.display import display
from pprint import pprint

In [137]:
def trapi_query(json_str, url):
    return requests.post(url, 
                         json=json.loads(json_str))

In [138]:
def query_ara_s(ara, jstr):
    results = []
    count = 0
    print(ara)
    r = trapi_query(jstr, ara)
    j = r.json()
    edges = j['message']['knowledge_graph']['edges']  
    for edge_key in edges.keys():
        edge = edges.get(edge_key)
        result = {'curie': edge.get('object'), 'predicate': edge.get('predicate'), 'pValue': edge.get('attributes')[0].get('value')}
        results.append(edge.get('subject') + " " + edge.get('predicate') + " " + edge.get('object'))
        count = count + 1
    return results, count;

In [152]:
def query_ara_debug(ara, jstr):
    results = []
    count = 0
    print(ara)
    r = trapi_query(jstr,ara)
    print(r)
    j = r.json()
    print(j)
    edges = j['message']['knowledge_graph']['edges']
        
    for edge_key in edges.keys():
        edge = edges.get(edge_key)
        result = {'curie': edge.get('object'), 'predicate': edge.get('predicate'), 'pValue': edge.get('attributes')[0].get('value')}
        results.append(edge.get('subject') + " " + edge.get('predicate') + " " + edge.get('object'))
        count = count + 1
    return results, count;

In [182]:
def summarize(aras_to_summarize, specific_query):
    for member in aras_to_summarize:
        try:
            #pprint(specific_query)
            results_fn, counter_fn = query_ara_s(member, specific_query)
            print ("count of results: %", counter_fn)
            pprint (results_fn)
        except KeyError:
            r = trapi_query(specific_query,member)
            # print(r)
            # raise
            print('fail, key error - might mean 0 results')
            continue
        except ValueError:
            r = trapi_query(specific_query,member)
            print('fail, value error')
            #print(r)
            continue
        except TypeError:
            r = trapi_query(specific_query,member)
            print('fail, type error')
            #print(r)
            continue
        except:
            raise

In [165]:
def run_query(aras, ids, category):
    if category == 'gene':
        for id_to_query in ids:
            print(id_to_query)
            id_query = get_query_by_gene_id(id_to_query)
            summarize(aras, id_query)
    elif category == 'protein':
        for pid_to_query in ids:
            print(pid_to_query)
            pid_query = get_query_by_category(pid_to_query)
            summarize(aras, pid_query)
    elif category == 'function':
        print(ids)
        pid_query = get_protein_to_molecularactivity_query(ids)
        summarize(aras, pid_query)
    elif category == 'p2g':
        print(ids)
        pid_query = get_protein_to_gene_query(ids)
        summarize(aras, pid_query)
    else:
        for c in ids:
            cid_to_query = c[0]
            name = c[1]
            print(cid_to_query + ": " + name)
            cid_query = get_query_by_chemical_id(cid_to_query, "biolink:ChemicalSubstance")
            summarize(aras, cid_query)
    

In [187]:
aras_to_summarize = [#"https://arax.ncats.io/api/arax/v1.0/query", 
                    "https://evidara.healthdatascience.cloud/api/v1/query",
                    "https://explanatory-agent.azurewebsites.net/v1.0/query/",
                    "https://api.bte.ncats.io/v1/query",
                    "https://smart-api.info/query",
                    #"https://aragorn.renci.org/query",
                    #"https://strider.renci.org/query"
                    ]
gene_queries_to_run = ["NCBIGene:4211", 
                  "HGNC:7000",
                  "OMIM:601739",
                  ]

protein_queries_to_run = [#["UniProtKB:O00470", "biolink:Protein", "biolink:Gene"],
                          ["UniProtKB:O00470", "biolink:Protein", "biolink:Protein"],
                          ["PR:000010316", "biolink:Protein", "biolink:Protein"]]

# CHEBI:30769 - citric acid
# KEGG.COMPOUND:C00158 - citrate, citric acid - conflates 
# PUBCHEM.COMPOUD:31348 - citric acid

chem_map = [["CHEBI:30769", "citrate(3-)"],
                   ["CHEBI:16947", "citric acid"],
                   ["CHEBI:35808", "citrate(2-)"],
                   ["CHEBI:35804", "citrate(1-)"],
                   ["CHEBI:64733", "potassium citrate - kidney stone treatment form"],
                   ["CHEBI:132362", "citrate(4-)"],
                   ["CHEMBL:CHEMBL1261", "CITRIC ACID"],
                   ["CHEMBL:CHEMBL2107737", "CITRIC ACID MONOHYDRATE"],
                   ["PUBCHEM.COMPOUND:31348", "citrate"],
                   ["PUBCHEM.COMPOUND:311", "citric acid"],
                   ["KEGG.COMPOUND:C00158", "KEGG pre conflated"],
                   ["MESH:D019343", "MESH sort of pre conflated"]
           ]

# Protein
# run_query(aras_to_summarize, protein_queries_to_run, 'protein')
# Gene
# run_query(aras_to_summarize, gene_queries_to_run, 'gene')
# citrate
# run_query(aras_to_summarize, chem_map, 'chemical')
# query_ara_debug("https://arax.ncats.io/api/arax/v1.0/query",protein_query)

print("Gene to MolecularActivity")
run_query(aras_to_summarize, ['NCBIGene:4211','biolink:Gene'], 'function')
print()
print()
print("Gene to Protein")
run_query(aras_to_summarize, 'UniProtKB:O00470', 'p2g')
print()
print()
print("Protein to MolecularActivity")
run_query(aras_to_summarize, ['UniProtKB:O00470','biolink:Protein'], 'function')

# run_query(aras_to_summarize, 'UniProtKB:O00470', 'p2g')


Gene to MolecularActivity
['NCBIGene:4211', 'biolink:Gene']
https://evidara.healthdatascience.cloud/api/v1/query
count of results: % 12
['NCBIGene:4211 biolink:participates_in GO:0035326',
 'NCBIGene:4211 biolink:participates_in GO:1990837',
 'NCBIGene:4211 biolink:participates_in GO:0000987',
 'NCBIGene:4211 biolink:participates_in GO:0001216',
 'NCBIGene:4211 biolink:participates_in GO:0044212',
 'NCBIGene:4211 biolink:participates_in GO:0000976',
 'NCBIGene:4211 biolink:participates_in GO:0001067',
 'NCBIGene:4211 biolink:participates_in GO:0001228',
 'NCBIGene:4211 biolink:participates_in GO:0001012',
 'NCBIGene:4211 biolink:participates_in GO:0000977',
 'NCBIGene:4211 biolink:participates_in GO:0000978',
 'NCBIGene:4211 biolink:participates_in GO:0003682']
https://explanatory-agent.azurewebsites.net/v1.0/query/
count of results: % 0
[]
https://api.bte.ncats.io/v1/query
fail, value error
https://smart-api.info/query
fail, value error


Gene to Protein
UniProtKB:O00470
https://evida

In [106]:
def get_query_by_id(id_to_use):
    id_question = """

{
  "message": {
    "query_graph": 

{
  "nodes": {
    "n0": {
      "id": "%s",
      "category": ["biolink:Gene"]
    },
    "n1": {
      "category": [
        "biolink:Gene"
      ]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1"
    }
  }
}
}
}
""" % id_to_use
    
    return id_question

In [107]:

# check for the kinds of relations between genes -> proteins 
# get me the specific predicates first -> "protein-protein-interactions", can I query by gene id. 
# 
# see what kind of conflation each one can handle. 

# PRO vs. UniProtKB 
# drug formulation vs. active ingredient
# conjugate bases pair: id for citrate and id for citric acid, CHEBI ids  -> return a query for one, check the other.  
# if has it for one, but not the other, then its a case for conflation.
# CHEMBL, PUBCHEM

# mutants vs. WT  --- text mining conflates this foo+/- (just annotation to foo) -- this probably happens upstream potentially
# gene or gene product -- so if using inference, this is handled in the model.
# conflation in the IDs -- this should be handled in the node normalizer
# no check between entity and category right now - separate issue.


In [108]:
def get_query_by_chemical_id(id_to_use, category):
    identifier = id_to_use
    n0_category = category

    query = """
{
  "message": {
    "query_graph": 

{
      "nodes": {
          "id": "%s",
          "category": ["%s"]
        },
        "n1": {
          "category": ["biolink:NamedThing"]
        }
      },
      "edges": {
        "e0": {
            "subject":"n0",
            "object":"n1",
            "predicate": "biolink:related_to"
        }
      }
  }
}
}

""" % (identifier, n0_category)
    return query

In [109]:
def get_query_by_category(replacement_list):
    identifier = replacement_list[0]
    n0_category = replacement_list[1]
    n1_category = replacement_list[2]
    
    protein_query = """
{
  "message": {
    "query_graph": 
{
  "nodes": {
    "n0": {
       "id": "%s",
       "category": ["%s"]
    },
    "n1": {
      "category": [
        "%s"
      ]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1"
    }
  }
}
}
}

""" % (identifier, n0_category, n1_category)
    return protein_query

In [180]:
def get_protein_to_molecularactivity_query(ids):
    identifier = ids[0]
    category = ids[1]
# function
    protein_to_molecularactivity_query = """
{
  "message": {
    "query_graph": 
{
  "nodes": {
    "n0": {
      "id":"%s",
      "category": ["%s"]
    },
    "n1": {
      "category": ["biolink:MolecularActivity"]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1"
    }
  }
}
}
}

""" % (identifier,category)
    
    return protein_to_molecularactivity_query

In [167]:
def get_protein_to_gene_query(ids):
#p2g 
    p2g_query = """
{
  "message": {
    "query_graph": 
{
  "nodes": {
    "n0": {
      "category": ["biolink:Gene"]
    },
    "n1": {
      "id":"%s",
      "category": ["biolink:Protein"]
    }
  },
  "edges": {
    "e0": {
      "subject": "n0",
      "object": "n1"
    }
  }
}
}
}

""" % ids
    return p2g_query