In [4]:
import csv, os, re
import pandas as pd
from collections import defaultdict as ddict
from SPARQLWrapper import SPARQLWrapper2, SPARQLWrapper, JSON


def query_wd_from_category(category, lang, qid="unk"):
    
    """
    category = the name of the semantic class as labeled in Wikidata
    lang = the language ISO code
    qid = the ID of the class in Wikidata (if known)
    """
    
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setReturnFormat(JSON)

    try:        
        _dir = f"category_{lang.upper()}"
        if not os.path.isdir(_dir):
            os.makedirs(_dir)
            print(f"Directory '{_dir}' created")
        
        if qid == "unk":
            cat_query = f'SELECT DISTINCT ?item WHERE {{ ?item rdfs:label "{category}"@{lang} . }}' 
            sparql.setQuery(cat_query)
            results = sparql.queryAndConvert()["results"]["bindings"]
            qid = pick_entity(results)
        ent_query = f'SELECT DISTINCT ?label WHERE{{ ?item wdt:P31 wd:{qid} . \
        ?item rdfs:label ?label FILTER (langMatches( lang(?label), "{lang.upper()}" ) ) \
        ?item wikibase:sitelinks ?sitelinks . \
        }} ORDER BY DESC(?sitelinks)'
        sparql.setQuery(ent_query)
        result = sparql.queryAndConvert()["results"]["bindings"]
        
        alias_query = f'SELECT  (GROUP_CONCAT(?alias;separator=" | ") AS ?aliases) WHERE {{?item wdt:P31 wd:{qid} .\
                             ?item rdfs:label ?label FILTER (LANG(?label) = "{lang}")  .   \
                             ?item skos:altLabel ?alias FILTER (LANG(?alias) = "{lang}" ) . \
                              ?item wikibase:sitelinks ?sitelinks . \
                              }} GROUP BY ?label ORDER BY ASC(?sitelinks)'
        
        sparql.setQuery(alias_query)
        a_result = sparql.queryAndConvert()["results"]["bindings"]
        
        if len(result) != 0:
            if " " in category:
                category_label = "_".join(category.split(" "))
            else:
                category_label = category
            newfile = f"{lang.upper()}_{category_label}.txt"
            with open(f"{_dir}/{newfile}", "w", encoding="utf-8") as f:
                for item in result:
                    f.write(item["label"]["value"]+"\n")
                if len(a_result) != 0:
                    for item in a_result:
                        alias = item["aliases"]["value"]
                        if "|" in alias:
                            aliases = alias.split(" | ")
                            for a in aliases:
                                f.write(a+"\n")
                        else:
                            f.write(alias+"\n")
        else:
            print(f"No results found for {qid}")
    
    except Exception as e:
        print(e)
         

            
def pick_entity(results):
    
    for result in results:
        _id = result["item"]["value"].split("/")[-1] 
        if _id.startswith("Q"):
            return _id
        


In [None]:
query_wd_from_category("diritto", "it", "Q7748")    