In [7]:
import requests
import json
from tqdm import tqdm

In [1]:

def get_predicate_description(property_id):
    # URL to the Wikidata API
    url = f"https://www.wikidata.org/w/api.php"
    
    # Parameters for the API request to get entity information
    params = {
        "action": "wbgetentities",
        "ids": property_id,
        "format": "json",
        "props": "descriptions",
        "languages": "en"  # Fetch descriptions in English
    }
    
    # Sending the request to the Wikidata API
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extracting the description
    if 'entities' in data and property_id in data['entities']:
        entity = data['entities'][property_id]
        if 'descriptions' in entity and 'en' in entity['descriptions']:
            description = entity['descriptions']['en']['value']
            return description
        else:
            return "No description available in English."
    else:
        return "Property ID not found."

# Example usage
property_id = "P149"  # Example property ID for 'architectural style'
description = get_predicate_description(property_id)
print(f"Description for {property_id}: {description}")


Description for P149: architectural style of a structure


In [10]:
rubq_train = json.load(open("/Users/somov-od/Documents/phd/projects/open_kgqa/data/RuBQ/RuBQ_2.0/train_with_aliases.json", 'r'))
rubq_test = json.load(open("/Users/somov-od/Documents/phd/projects/open_kgqa/data/RuBQ/RuBQ_2.0/test_with_aliases.json", 'r'))
all_data = rubq_train + rubq_test

preds_set = set()
for sample in all_data:
    for pred in sample['question_props']:
        preds_set.add(pred)
        
len(preds_set)

preds_list = list(preds_set)

In [13]:
preds_list

['wdt:P403',
 'wdt:P559',
 'wdt:P88',
 'wdt:P1366',
 'wdt:P706',
 'pq:P585',
 'wdt:P942',
 'p:P69',
 'p:P26',
 'p:P2046',
 'wdt:P272',
 'p:P175',
 'p:P180',
 'wdt:P1344',
 'wdt:P840',
 'wdt:P156',
 'ps:P1448',
 'wdt:P607',
 'wdt:P1557',
 'wdt:P25',
 'wdt:P495',
 'p:P457',
 'p:P54',
 'wdt:P2048',
 'wdt:P570',
 'wdt:P2556',
 'wdt:P178',
 'wdt:P87',
 'wdt:P1191',
 'wdt:P6',
 'wdt:P2053',
 'p:P734',
 'wdt:P2575',
 'wdt:P575',
 'wdt:P735',
 'ps:P36',
 'wdt:P1449',
 'wdt:P69',
 'wdt:P3091',
 'wdt:P4511',
 'wdt:P1582',
 'pq:P2079',
 'wdt:P2101',
 'wdt:P97',
 'wdt:P937',
 'wdt:P1346',
 'wdt:P1303',
 'wdt:P1066',
 'wdt:P4969',
 'wdt:P176',
 'wdt:P26',
 'wdt:P4770',
 'wdt:P1876',
 'wdt:P569',
 'wdt:P2579',
 'wdt:P927',
 'wdt:P4934',
 'wdt:P582',
 'wdt:P2044',
 'wdt:P941',
 'wdt:P501',
 'wdt:P4647',
 'ps:P457',
 'p:P36',
 'wdt:P1249',
 'wdt:P112',
 'wdt:P463',
 'wdt:P460',
 'ps:P734',
 'wdt:P2583',
 'pq:P1545',
 'wdt:P1619',
 'wdt:P155',
 'wdt:P150',
 'wdt:P306',
 'wdt:P27',
 'wdt:P1029',
 'wdt:P

In [16]:
pred2descrs = dict()
for pred in tqdm(preds_list):
    pred_clean = pred.split(':')[-1]
    description = get_predicate_description(pred_clean)
    pred2descrs[pred] = description

100%|██████████████████████████████████████████████████████████████████████████████████████| 277/277 [01:30<00:00,  3.05it/s]


In [19]:
json.dump(pred2descrs, open("/Users/somov-od/Documents/phd/projects/open_kgqa/data/RuBQ/RuBQ_2.0/rubq_english_preds_description.json", 'w'), ensure_ascii=False, indent=4)

In [15]:
pred2descrs

{'wdt:P403': 'the body of water to which the watercourse drains',
 'wdt:P559': 'the feature (intersecting road, train station, etc.) at the end of a linear feature',
 'wdt:P88': 'person or organization that commissioned this work',
 'wdt:P1366': 'other person or item which continues the item by replacing it in its role. Use P156 ("followed by") if the item is not replaced nor identical, but adds to the series (e.g. books in a series).',
 'wdt:P706': 'located on the specified (geo)physical feature. Should not be used when the value is only political/administrative (P131) or a mountain range (P4552).'}