# Question Augmentation (Edge Expanding)

Users, who do not know the details of the underlying data, need help in constructing meaningful queries.   This service takes queries, and attempts to expand each edge using rules that maximize precision and recall.

In [1]:
import requests
import json

class Strider:
    def __init__(self):
        self.url='http://robokop.renci.org:5781'
    def call(self,question):
        message = {'message': {'query_graph': question}}
        return self.send_message(message)
    def send_message(self,message):
        response = requests.post(f'{self.url}/query',json=message)
        if response.status_code == 200:
            pid = response.json()
            return pid
        else:
            print(response.status_code)
            return None
    def query_result(self,pid):
        r = requests.get(f'{self.url}/results',params={'query_id':pid})
        print(json.dumps(r.json(),indent=2))
    
strider = Strider()

For each edge type, we have precomputed translations that optimally balance precision and recall.  Here, we have a question "What drugs treat type-2 diabetes?"

In [2]:
question = { 'nodes': [{'id':'n0', 'type':'chemical_substance'},
                      {'id': 'n1', 'type':'disease', 'curie': 'MONDO:0005148'}],
             'edges': [ {'id': 'e0', 'source_id': 'n0', 'target_id': 'n1', 'type': 'treats'}]}

Running this query will produce answers:

In [3]:
p = strider.call(question)

In [4]:
strider.query_result(p)

{
  "query_graph": {
    "nodes": [
      {
        "id": "n1",
        "curie": "MONDO:0005148",
        "type": "disease"
      },
      {
        "id": "n0",
        "curie": null,
        "type": "chemical_substance"
      }
    ],
    "edges": [
      {
        "id": "e0",
        "type": "treats",
        "source_id": "n0",
        "target_id": "n1"
      }
    ]
  },
  "knowledge_graph": null,
  "results": [
    {
      "node_bindings": [
        {
          "qg_id": "n0",
          "kg_id": "PUBCHEM:23927"
        },
        {
          "qg_id": "n1",
          "kg_id": "MONDO:0005148"
        }
      ],
      "edge_bindings": [
        {
          "qg_id": "e0",
          "kg_id": "bb3532a10ccdd7a46e5d7ab4420640a9"
        }
      ]
    },
    {
      "node_bindings": [
        {
          "qg_id": "n0",
          "kg_id": "CHEBI:27999"
        },
        {
          "qg_id": "n1",
          "kg_id": "MONDO:0005148"
        }
      ],
      "edge_bindings": [
        {
       

These answers, however, are those chemicals for which an explicit 'treats' edge occurs in the (federated) graph.  It tells us known treatments, but doesn't help us find new ones.

We would like to run a query that may infer new chemicals to treat diabetes.  But what query should we run?  How should we modify our treats query to create a query that may infer new edges?  We can call our Question Augmentation service:

In [7]:
message = {'message': {'query_graph': question}}
qa_url = 'https://questionaugmentation.renci.org/edge_expand'
rq = requests.post(qa_url,json=message, params={'depth':1})
new_questions = rq.json()
print(len(new_questions))

print(json.dumps(new_questions,indent=2))

9
[
  {
    "query_graph": {
      "nodes": [
        {
          "id": "n0",
          "type": "chemical_substance"
        },
        {
          "id": "n1",
          "type": "disease",
          "curie": "MONDO:0005148"
        },
        {
          "id": "expansion_node_0",
          "type": "chemical_substance"
        }
      ],
      "edges": [
        {
          "id": "expansion_edge_0",
          "source_id": "n0",
          "target_id": "expansion_node_0",
          "type": "derives_from"
        },
        {
          "id": "expansion_edge_1",
          "source_id": "expansion_node_0",
          "target_id": "n1",
          "type": "treats"
        }
      ]
    }
  },
  {
    "query_graph": {
      "nodes": [
        {
          "id": "n0",
          "type": "chemical_substance"
        },
        {
          "id": "n1",
          "type": "disease",
          "curie": "MONDO:0005148"
        },
        {
          "id": "expansion_node_0",
          "type": "gene"
      

In the above, we expanded a single edge, because there was only a single edge in the query, but the service will expand every edge.   In this prototype, each edge is expanded independently, leading potentially to a lot of possible questions.

In [9]:
two_step_question = new_questions[-1]
print(json.dumps(two_step_question,indent=2))

{
  "query_graph": {
    "nodes": [
      {
        "id": "n0",
        "type": "chemical_substance"
      },
      {
        "id": "n1",
        "type": "disease",
        "curie": "MONDO:0005148"
      },
      {
        "id": "expansion_node_0",
        "type": "gene"
      }
    ],
    "edges": [
      {
        "id": "expansion_edge_0",
        "source_id": "n0",
        "target_id": "expansion_node_0",
        "type": "affects_activity_of"
      },
      {
        "id": "expansion_edge_1",
        "source_id": "n1",
        "target_id": "expansion_node_0",
        "type": "disease_to_gene_association"
      }
    ]
  }
}


In [11]:
message2 = {'message': two_step_question}
qa_url = 'https://questionaugmentation.renci.org/edge_expand'
rq = requests.post(qa_url,json=message2)
new_questions = rq.json()
print(len(new_questions))

print(json.dumps(new_questions,indent=2))

7
[
  {
    "query_graph": {
      "nodes": [
        {
          "id": "n0",
          "type": "chemical_substance"
        },
        {
          "id": "n1",
          "type": "disease",
          "curie": "MONDO:0005148"
        },
        {
          "id": "expansion_node_0",
          "type": "gene"
        }
      ],
      "edges": [
        {
          "id": "expansion_edge_1",
          "source_id": "n1",
          "target_id": "expansion_node_0",
          "type": "disease_to_gene_association"
        },
        {
          "id": "expansion_edge_0",
          "source_id": "n0",
          "target_id": "expansion_node_0",
          "type": "increases_activity_of"
        }
      ]
    }
  },
  {
    "query_graph": {
      "nodes": [
        {
          "id": "n0",
          "type": "chemical_substance"
        },
        {
          "id": "n1",
          "type": "disease",
          "curie": "MONDO:0005148"
        },
        {
          "id": "expansion_node_0",
          "type