# Adding Subclass Extraction to WDumper

In this file, I provide a code you can use to enrich the wdumper specification files with sub-classes. The main issue with WDumper is you should add all sub-classes of a type to the spec file separately which is very hard doing it by hand. In this file, we first grab all QIDs of subtypes (that their parent class already is added to the input spec file) with a SPARQL query and add their proper JSON nodes into an existed wdumper spec file.

INPUT  : wdumper spec file with P31 hasValue QXX filters in it
OUTPUT : wdumper spec file with added P31hasValue (subclasses of QXX ) filters

In [1]:
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys, json, requests, copy
from SPARQLWrapper import SPARQLWrapper, JSON


wikidataEndpoint = 'https://query.wikidata.org/sparql'
queryTemplate = """
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?subtypeQIDs WHERE {{
    ?subtypeQIDs wdt:P279+ {0} .
}}

"""
def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url,user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

with open("UKuniversitiesWithRQFS.json") as datafile:
  data = json.load(datafile)
outData=copy.deepcopy(data)
ctr=0
for entity in data['entities']:
    if entity['type']=='item':
        for property in entity['properties']:            
            if property['property']=='P31' and property['type']=='entityid':
                query=queryTemplate.format('wd:'+property['value'])
                results = get_results(wikidataEndpoint, query)
                for qid in results['results']['bindings']:
                    value=qid['subtypeQIDs']['value'].replace('http://www.wikidata.org/entity/','')
                    if len(entity['properties'])==1:
                        outData['entities'].append({'type': 'item','properties': [{'type':'entityid','rank': 'all','value': value,'property': 'P31'}]})
                    else:
                        outData['entities'][ctr]['properties'].append({'type': 'entityid','rank': 'all','value': value,'property': 'P31'})
    ctr=ctr+1
with open('UKuniversitiesWithRQFSandSubclasses.json', 'w') as outfile:
    json.dump(outData, outfile)
