This notebook queries the Wikidata API for relational knowledge, saving it in a JSON format that can be read by `dataclasses_json` as Relation objects. The focus is on symmetric relations such as `married` or `is-sibling`, but low-context relations are another area of interest that can be explored further below.

In [14]:
import sys
sys.path.append('..')
from lre.data import Relation, RelationSample
import json
import requests
import pandas as pd
!ls

A2Rel.ipynb        Wikidata2Rel.ipynb [34mjson[m[m               [34mwikidata[m[m
RelBenchmark.ipynb example.json       [34mtext[m[m


In [11]:
# Define the endpoint and query
url = "https://query.wikidata.org/sparql"
query = """
SELECT ?person ?personLabel ?sibling ?siblingLabel WHERE {
  ?person wdt:P31 wd:Q5;    # Instance of human
          wdt:P3373 ?sibling.  # Sibling relationship
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 100
"""

# Send the request to the Wikidata SPARQL endpoint
response = requests.get(url, params={'query': query, 'format': 'json'})
data = response.json()

In [18]:
relation_name = "person - sibling"
relation_path_json = "json/sym/person-sibling-r" + ".json"
prompts = [
    "The sibling of {} is",
    ]
# Extract results
RelationSamples = []
for item in data['results']['bindings']:
    sibling = item['personLabel']['value']
    person = item['siblingLabel']['value']
    RelationSamples.append(RelationSample(person, sibling))

rel = Relation(
                name=relation_name,
                prompt_templates=prompts,
                prompt_templates_zs=prompts,
                samples=
                RelationSamples
            )

relation_json = rel.to_dict()

In [19]:
with open(relation_path_json, "w") as file:
    json.dump(relation_json, file, indent=4)