# Question Augmentation (Edge Expanding)

Users, who do not know the details of the underlying data, need help in constructing meaningful queries.   This service takes queries, and attempts to expand each edge using rules that maximize precision and recall.

In [1]:
import requests
import json
from notebook_functions import reasonerGraphToCytoscape, knowledgeGraphToCytoscape
from cyjupyter import Cytoscape
import time

In [2]:
class Strider:
    def __init__(self):
        self.url='http://robokop.renci.org:5781'
    def call(self,question):
        message = {'message': {'query_graph': question}}
        return self.send_message(message)
    def send_message(self,message):
        response = requests.post(f'{self.url}/query',json=message)
        if response.status_code == 200:
            pid = response.json()
            return pid
        else:
            print(response.status_code)
            return None
    def query_result(self,pid):
        r = requests.get(f'{self.url}/results',params={'query_id':pid})
        print(json.dumps(r.json(),indent=2))
    
strider = Strider()

For each edge type, we have precomputed translations that optimally balance precision and recall.  Here, we have a question "What drugs treat type-2 diabetes?"

In [3]:
question = { 'nodes': [{'id':'n0', 'type':'chemical_substance'},
                      {'id': 'n1', 'type':'disease', 'curie': 'MONDO:0005148'}],
             'edges': [ {'id': 'e0', 'source_id': 'n0', 'target_id': 'n1', 'type': 'treats'}]}

In [4]:
c=reasonerGraphToCytoscape(question)
c['style'][0]['style']['width']='15em'
Cytoscape(data=c, visual_style=c["style"], layout_name='grid', layout={"name": "grid", "height": "200px"})

Cytoscape(data={'elements': {'nodes': [{'data': {'id': 'n0', 'label': 'chemical_substance\n[]', 'curie': '', '…

Running this query will produce answers:

Running this query would produce answers, but these answers are those chemicals for which an explicit 'treats' edge occurs in the (federated) graph.  It tells us known treatments, but doesn't help us find new ones.

We would like to run a query that may infer new chemicals to treat diabetes.  But what query should we run?  How should we modify our treats query to create a query that may infer new edges?  We can call our Question Augmentation service:

In [5]:
message = {'message': {'query_graph': question}}
qa_url = 'https://questionaugmentation.renci.org/edge_expand'
rq = requests.post(qa_url,json=message, params={'depth':1})
new_questions = rq.json()

In [6]:
q = new_questions[0]
c=reasonerGraphToCytoscape(q['query_graph'])
Cytoscape(data=c, visual_style=c["style"], layout_name='grid', layout={"name": "grid", "height": "200px"})

Cytoscape(data={'elements': {'nodes': [{'data': {'id': 'n0', 'label': 'chemical_substance\n[]', 'curie': '', '…

In [11]:
#print the new questions (in a non-general way)
for q in new_questions:
    nodes = q['query_graph']['nodes']
    edges = q['query_graph']['edges']
    if 'n0' in (edges[0]['source_id'],edges[0]['target_id']):
        e0 = edges[0]
        e1 = edges[1]
    else:
        e0 = edges[1]
        e1 = edges[0]
    if e0['source_id']=='n0':
        e0_rep = f'{e0["type"]}->'
    else:
        e0_rep = f'<-{e0["type"]}'
    if e1['target_id']=='n1':
        e1_rep = f'{e1["type"]}->'
    else:
        e1_rep = f'<-{e1["type"]}'
    line=[nodes[0]['type'], e0_rep, nodes[2]['type'], e1_rep, nodes[1]['curie']]
    print(line)

['chemical_substance', 'derives_from->', 'chemical_substance', 'treats->', 'MONDO:0005148']
['chemical_substance', '<-affects_response_to', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', '<-increases_response_to', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', '<-decreases_response_to', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', 'increases_activity_of->', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', 'interacts_with->', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', 'decreases_activity_of->', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', 'decreases_expression_of->', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']
['chemical_substance', 'affects_activity_of->', 'gene', '<-disease_to_gene_association', 'MONDO:0005148']


In the above, we expanded a single edge, because there was only a single edge in the query, but the service will expand every edge.   In this prototype, each edge is expanded independently, leading potentially to a lot of possible questions.

In [14]:
two_step_question = new_questions[-1]

In [15]:
message2 = {'message': two_step_question}
qa_url = 'https://questionaugmentation.renci.org/edge_expand'
rq = requests.post(qa_url,json=message2)
new_questions = rq.json()
print(len(new_questions))

7


In [23]:
q = new_questions[6]
c=reasonerGraphToCytoscape(q['query_graph'])
Cytoscape(data=c, visual_style=c["style"], layout_name='grid', layout={"name": "grid", "height": "400px"})

Cytoscape(data={'elements': {'nodes': [{'data': {'id': 'n0', 'label': 'chemical_substance\n[]', 'curie': '', '…

In [24]:
q = new_questions[3]
c=reasonerGraphToCytoscape(q['query_graph'])
Cytoscape(data=c, visual_style=c["style"], layout_name='grid', layout={"name": "grid", "height": "400px"})

Cytoscape(data={'elements': {'nodes': [{'data': {'id': 'n0', 'label': 'chemical_substance\n[]', 'curie': '', '…