In [1]:
import pandas as pd
import numpy as np
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import requests 
import itertools

In [299]:
with open('templates.json') as f:
    templates = json.load(f)

with open('data/lc-quad/test/input.txt') as f:
    question = f.read().strip().split('\n')
    
with open('data/lc-quad/test/input.pos') as f:
    pos = f.read().strip().split('\n')
    
with open('data/lc-quad/test/output.txt') as f:
    output = f.read().strip().split('\n')

In [205]:
# Sorts based on descending order of values
def sort_dict_by_values(dictionary):
    keys = []
    for key, value in sorted(dictionary.items(), key=lambda item: (item[1], item[0]), reverse=True):
        keys.append(key)
    return keys

def get_earl_entities(query):
    THRESHOLD = 0.001
    response = requests.post('http://sda.tech/earl/api/processQuery', 
                             json={"nlquery": query, "pagerankflag": False})
    
    json_response = json.loads(response.text)
    r_dict = {}
    c_dict = {}
    p_dict = {}
    
    reranked_lists = json_response['rerankedlists']
    for key in reranked_lists.keys():
        for result in reranked_lists[key]:
            if result[0] < THRESHOLD:
                continue                
            
            if result[1].startswith('http://dbpedia.org/resource/'):
                r_dict[result[1]] = result[0]
            elif result[1].split('/')[-1][0].isupper():
                c_dict[result[1]] = result[0]
            else:
                p_dict[result[1]] = result[0]
        
    return {'r': sort_dict_by_values(r_dict), 'p': sort_dict_by_values(p_dict), 'c': sort_dict_by_values(c_dict)}

In [170]:
def make_sparql_query(query, return_var):
    sparql = SPARQLWrapper("http://akswnc9.aksw.uni-leipzig.de/dbpedia/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    if return_var == 'boolean':
        if 'boolean' in results:            
            return results['boolean']
        else:
            return None
    
    output = []
    for result in results["results"]["bindings"]:
        if return_var in result:
            output.append(result[return_var]['value'])
    return output

In [218]:
def get_answer(question, template):
    output = []
    sparql_query = ''
    
    entities = get_earl_entities(q)
    slots = {}
    for slot in template['slots']:
        slots[slot] = entities[slot[0]]        
    
    # This means something probably went wrong and no predicates or resources were detected for the query
    if len(slots['p']) == 0 or len(slots['r']) == 0:
        return output, sparql_query
    
    ranges = []
    slot_keys = list(slots.keys())
    slot_len = len(slot_keys)
    for slot in slot_keys:
        ranges.append(range(len(slots[slot]))) 
        
    for i in itertools.product(*ranges):
        if slot_keys[0] == 'p' and slot_keys[1] == 'p2' and slots['p'][i[0]] == slots['p2'][i[1]]:
            continue
        if slot_keys[-2] == 'r' and slot_keys[-1] == 'r2' and slots['r'][i[-2]] == slots['r2'][i[-1]]:
            continue
            
        sparql_query = template['sparql']                
        for index in range(slot_len):
            sparql_query = sparql_query.replace('<' + slot_keys[index] + '>', '<' + slots[slot_keys[index]][i[index]] + '>')
        
        print(sparql_query, "\n\n")
        output = make_sparql_query(sparql_query, template['return'])        
        if template['return'] == 'boolean':
            if output != None:
                break
        elif len(output) > 0:
            break
    
    return output, sparql_query

In [4]:
df = pd.DataFrame({'question': question, 'pos': pos, 'output': output})

In [287]:
df['output'].unique()

array(['5', '16', '2', '6', '1', '151', '3', '15', '8', '105', '111',
       '101', '106', '7', '102', '11', '108', '103'], dtype=object)

In [300]:
template = '108'
q = df[df['output'] == template].iloc[0]['question']

In [301]:
q

'Count the number of movies whose distributor is RKO Pictures and director of photography is Gregg Toland?'

In [302]:
get_answer(q, templates[template])

SELECT (COUNT(DISTINCT ?uri) as ?count) WHERE { ?uri <http://dbpedia.org/ontology/distributor> <http://dbpedia.org/resource/RKO_Pictures> . ?uri <http://dbpedia.org/ontology/number> <http://dbpedia.org/resource/Gregg_Toland> . } 




(['0'],
 'SELECT (COUNT(DISTINCT ?uri) as ?count) WHERE { ?uri <http://dbpedia.org/ontology/distributor> <http://dbpedia.org/resource/RKO_Pictures> . ?uri <http://dbpedia.org/ontology/number> <http://dbpedia.org/resource/Gregg_Toland> . }')

In [217]:
get_earl_entities(q)

{'c': ['http://dbpedia.org/ontology/River'],
 'p': ['http://dbpedia.org/ontology/sourcePlace'],
 'r': ['http://dbpedia.org/resource/North_Sea',
  'http://dbpedia.org/resource/Gloucestershire',
  'http://dbpedia.org/resource/River_mouth',
  'http://dbpedia.org/resource/Mouthe',
  'http://dbpedia.org/resource/A_Mouthful',
  'http://dbpedia.org/resource/Sea*a',
  'http://dbpedia.org/resource/Mouth',
  'http://dbpedia.org/resource/Is_It_the_Sea%3F',
  'http://dbpedia.org/resource/To_the_Sea',
  'http://dbpedia.org/resource/Mouthing',
  'http://dbpedia.org/resource/Sea']}

In [115]:
q

'What sports are played at Vishwajyot High School?'