# Install and Import

In [None]:
!pip install sparqlwrapper

Collecting sparqlwrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Collecting rdflib>=6.1.1
  Downloading rdflib-6.1.1-py3-none-any.whl (482 kB)
[K     |████████████████████████████████| 482 kB 8.2 MB/s 
[?25hCollecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 318 kB/s 
Installing collected packages: isodate, rdflib, sparqlwrapper
Successfully installed isodate-0.6.1 rdflib-6.1.1 sparqlwrapper-2.0.0


In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

import requests
import json
import pprint

In [None]:
train_url = "https://raw.githubusercontent.com/KGQA/QALD_10/main/data/qald_9_plus/qald_9_plus_train_wikidata.json"
test_url = "https://raw.githubusercontent.com/KGQA/QALD_10/main/data/qald_10/qald_10.json"

# Utils

In [None]:
def read_prediction(filename):
    head_lst = []
    pred_lst = []
    tail_lst = []
    with open(filename, 'r') as f:
        for line in f:
            head, pred, tail = line.split("\t")
            head_lst.append(head)
            pred_lst.append(pred)
            tail_lst.append(tail.strip().split(' ')[0])
    return head_lst, pred_lst, tail_lst

In [None]:
def fb_to_wikidata(data):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

    all_answers = []
    for MIDs in data:
        answers = []
        for MID in MIDs.split(' '):
            sparql.setQuery("""
            SELECT DISTINCT ?item WHERE {
              ?item p:P646 ?statement0.
              ?statement0 (ps:P646)""" +  f'"/m/0{MID[3:]}".' + """
            }
            LIMIT 1
            """)
            
            sparql.setReturnFormat(JSON)
            try:
                results = sparql.query().convert()
                if results['results']['bindings']:
                    answers.append(results['results']['bindings'][0]['item']['value'])
                else:
                    answers.append(None)
            except:
                answers.append(None)
                
        all_answers.append(answers)
    return all_answers

# Process Predictions (Freebase)

Choose the corresponding file for specific language

en_prediction.txt

de_prediction.txt

ru_prediction.txt

zh_prediction.txt

In [None]:
heads, preds, tails = read_prediction('zh_prediction.txt')

In [None]:
tails

['',
 'm.0ddqw',
 'm.01www',
 'm.02lx2r',
 'm.02nsjl9',
 'm.02zsn',
 'm.02h40lc',
 'm.02kl65p',
 'm.0290zjn',
 'm.012z_m',
 'm.09c7w0',
 'm.01w88m0',
 'm.06ppq',
 'm.05vz3zq',
 'm.0lj5',
 'm.01ls2',
 'm.01_d4',
 'm.03_3d',
 'm.09c7w0',
 'm.0rh6k',
 'm.09c7w0',
 'm.0hk18',
 'm.02lx2r',
 'm.03czz87',
 '',
 'm.042_f1',
 'm.07ssc',
 'm.02lx2r',
 'm.0t4ytl',
 'm.01xrdt7',
 'm.03fpg',
 'm.05zppz',
 'm.01p4vl',
 'm.02jx1',
 'm.07ssc',
 'm.02b8t9',
 'm.06yxd',
 'm.0327fc',
 'm.02h40lc',
 '',
 'm.087m22',
 'm.02plv02',
 'm.01xr_d2',
 'm.0ltl7s',
 'm.014k0b',
 'm.053k78',
 'm.09j_g',
 'm.0m7gqh',
 'm.05g12v',
 'm.0l6nwf8',
 'm.021y6sl',
 'm.0cqbc',
 'm.02_7z_',
 'm.0d0vqn',
 'm.048vfz',
 'm.01y857',
 'm.06mkj',
 'm.0c_m3',
 'm.0f94t',
 'm.01rjgp',
 'm.0bp145p',
 'm.0377l',
 'm.03rj0',
 'm.043g7l',
 'm.0kprtg',
 'm.0nbgf',
 'm.09c7w0',
 'm.0l09whv',
 'm.013kcv',
 'm.02h40lc',
 'm.03zbp0y',
 'm.02d6yt',
 'm.0dwc3wh',
 'm.01_7r6',
 'm.0bvc88s',
 'm.01clyr',
 'm.02zsn',
 'm.0jsg3d',
 'm.01www',
 'm.

In [None]:
print('Len heads:', len(heads))
print('Len preds:', len(preds))
print('Len tails:', len(tails))

Len heads: 394
Len preds: 394
Len tails: 394


In [None]:
answers = fb_to_wikidata(tails)

In [None]:
answers

[[None],
 ['http://www.wikidata.org/entity/Q51797'],
 ['http://www.wikidata.org/entity/Q34467'],
 ['http://www.wikidata.org/entity/Q482994'],
 [None],
 ['http://www.wikidata.org/entity/Q43445'],
 ['http://www.wikidata.org/entity/Q1860'],
 [None],
 [None],
 ['http://www.wikidata.org/entity/Q212431'],
 ['http://www.wikidata.org/entity/Q30'],
 ['http://www.wikidata.org/entity/Q712624'],
 ['http://www.wikidata.org/entity/Q226730'],
 ['http://www.wikidata.org/entity/Q15180'],
 ['http://www.wikidata.org/entity/Q214132'],
 ['http://www.wikidata.org/entity/Q739'],
 ['http://www.wikidata.org/entity/Q1297'],
 ['http://www.wikidata.org/entity/Q17'],
 ['http://www.wikidata.org/entity/Q30'],
 ['http://www.wikidata.org/entity/Q61'],
 ['http://www.wikidata.org/entity/Q30'],
 [None],
 ['http://www.wikidata.org/entity/Q482994'],
 ['http://www.wikidata.org/entity/Q28856068'],
 [None],
 ['http://www.wikidata.org/entity/Q7039258'],
 ['http://www.wikidata.org/entity/Q145'],
 ['http://www.wikidata.org/entit

In [None]:
f = requests.get(test_url)
data_test = json.loads(f.text)

In [None]:
data_test['questions'][0]

{'aggregation': False,
 'answers': [{'head': {'vars': ['result']},
   'results': {'bindings': [{'result': {'type': 'uri',
       'value': 'http://www.wikidata.org/entity/Q42299'}}]}}],
 'id': 0,
 'query': {'sparql': 'PREFIX bd: <http://www.bigdata.com/rdf#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX p: <http://www.wikidata.org/prop/> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX psn: <http://www.wikidata.org/prop/statement/value-normalized/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wds: <http://www.wikidata.org/entity/statement/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wdv: <http://www.wikidata.org/value/> PREFIX wikibase: <http://wikiba.se/ontology#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?result WHERE { wd:Q761383 wdt:P138 ?result. }'},
 'question': [{'langu

In [None]:
for item in data_test['questions']:
    del item['answers']
    del item['aggregation']
    del item['query']

In [None]:
data_test['questions'][0]

{'id': 0,
 'question': [{'language': 'en',
   'string': 'After whom is the Riemannian geometry named?'},
  {'language': 'zh', 'string': '黎曼几何是以谁命名的？'},
  {'language': 'de',
   'string': 'Nach wem ist die Riemannsche Geometrie benannt?'},
  {'language': 'ru', 'string': 'В честь кого названа риманова геометрия ?'}]}

In [None]:
for i, item in enumerate(data_test['questions']):
    item['aggregation'] = False
    head_dict = {"vars": ["result"]}
    results_dict = {"bindings": [{"result": {"type": "uri", "value": answers[i][0]}}]}
    item['answers'] = [{"head": head_dict,
                        "results": results_dict}]

In [None]:
data_test['questions'][0]

{'aggregation': False,
 'answers': [{'head': {'vars': ['result']},
   'results': {'bindings': [{'result': {'type': 'uri', 'value': None}}]}}],
 'id': 0,
 'question': [{'language': 'en',
   'string': 'After whom is the Riemannian geometry named?'},
  {'language': 'zh', 'string': '黎曼几何是以谁命名的？'},
  {'language': 'de',
   'string': 'Nach wem ist die Riemannsche Geometrie benannt?'},
  {'language': 'ru', 'string': 'В честь кого названа риманова геометрия ?'}]}

In [None]:
with open("prediction.json", "w") as outfile:
    json.dump(data_test, outfile)

# Process Predictions (Wikidata)

In [None]:
answers = []
with open('zh_id_answers.txt', 'r') as f:
    for line in f:
        s = line.strip()
        if len(s) < 1:
            s = ' '
        answers.append(s)

In [None]:
answers

['http://www.wikidata.org/entity/Q42299',
 'http://www.wikidata.org/entity/Q667465',
 'http://www.wikidata.org/entity/Q1676929',
 ' ',
 ' ',
 'http://www.wikidata.org/entity/Q227965',
 ' ',
 ' ',
 '1',
 ' ',
 ' ',
 'http://www.wikidata.org/entity/Q65837091',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 'http://www.wikidata.org/entity/Q121180',
 ' ',
 'http://www.wikidata.org/entity/Q4661471',
 ' ',
 ' ',
 ' ',
 '1',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 '64858754',
 ' ',
 'http://www.wikidata.org/entity/Q190076',
 ' ',
 '1',
 ' ',
 ' ',
 ' ',
 '1',
 ' ',
 ' ',
 ' ',
 'http://www.wikidata.org/entity/Q5237440',
 ' ',
 '1',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 '1',
 '1',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 '1',
 '1',
 '1',
 '1',
 '3',
 '1',
 '1',
 '1',
 ' ',
 ' ',
 ' ',
 '1',
 '1',
 'http://www.wikidata.org/entity/Q1280917',
 '1',
 ' ',
 ' ',
 ' ',
 '1',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 '2',
 ' ',
 ' ',
 '1',
 '1',
 ' ',
 ' ',
 ' ',
 '2',
 ' ',
 ' ',
 ' ',
 '3',
 ' ',
 ' ',
 '1',
 ' ',
 '1',


In [None]:
data_test['questions'][0]

{'aggregation': False,
 'answers': [{'head': {'vars': ['result']},
   'results': {'bindings': [{'result': {'type': 'uri',
       'value': 'http://www.wikidata.org/entity/Q15416'}}]}}],
 'id': 0,
 'question': [{'language': 'en',
   'string': 'After whom is the Riemannian geometry named?'},
  {'language': 'zh', 'string': '黎曼几何是以谁命名的？'},
  {'language': 'de',
   'string': 'Nach wem ist die Riemannsche Geometrie benannt?'},
  {'language': 'ru', 'string': 'В честь кого названа риманова геометрия ?'}]}

In [None]:
for i, item in enumerate(data_test['questions']):
    if answers[i][0] != ' ':
        item['aggregation'] = False
        head_dict = {"vars": ["result"]}
        results_dict = {"bindings": [{"result": {"type": "uri", "value": answers[i][0]}}]}
        item['answers'] = [{"head": head_dict,
                            "results": results_dict}]
    elif answers[i][0].isnumeric():
        item['aggregation'] = False
        head_dict = {"vars": ["result"]}
        results_dict = {"bindings": [{"result": {"datatype": "http://www.w3.org/2001/XMLSchema#integer", "type": "literal", "value": answers[i][0]}}]}
        item['answers'] = [{"head": head_dict,
                            "results": results_dict}]

In [None]:
data_test['questions'][14]

{'aggregation': False,
 'answers': [{'head': {'vars': ['result']},
   'results': {'bindings': [{'result': {'type': 'uri', 'value': None}}]}}],
 'id': 14,
 'question': [{'language': 'en',
   'string': 'How many different presidents of Russia have there been that took the position after 1990?'},
  {'language': 'zh', 'string': '在1990年后俄罗斯有多少任总统？'},
  {'language': 'de',
   'string': 'Wieviele unterschiedliche Präsident:innen Russlands haben seit 1990 das Amt angetreten?'},
  {'language': 'ru',
   'string': 'Сколько было разных президентов России, занимавших этот пост после 1990 года?'}]}

In [None]:
with open("zh_ensemble_prediction_3.json", "w") as outfile:
    json.dump(data_test, outfile)