In [1]:
import sys
import json
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

In [2]:
def get_results(endpoint_url, query):
    user_agent = "WikiLiteratureQuery/0.1 (IBL PAN; literary research using Wikidata; contact: nikodem.wolczuk@ibl.waw.pl) Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

In [3]:
# pisarze
query = """SELECT ?person ?name ?officialWebsite
WHERE {
  ?person wdt:P31 wd:Q5;
          wdt:P106 wd:Q36180;
          wdt:P27 wd:Q36;
          wdt:P856 ?officialWebsite.
 
  ?person rdfs:label ?name.
  FILTER (LANG(?name) = "pl")
  
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "pl".
  }
}
ORDER BY ?personLabel"""


In [4]:
results = get_results(endpoint_url, query)

# for result in results["results"]["bindings"]:
#     print(result)

In [7]:
to_df = [(e['person']['value'], e['name']['value'], e['officialWebsite']['value']) for e in results["results"]["bindings"]]
df = pd.DataFrame(to_df, columns=['wikidata', 'label', 'website'])
df.to_excel('ipbl_writers.xlsx', index=False)

In [16]:
with open('ipbl_writers.json', 'w', encoding='utf-8') as jfile:
    json.dump(results, jfile, indent=4, ensure_ascii=False)

In [11]:
# nagrody literackie
query = """SELECT ?award ?awardLabel ?officialWebsite
WHERE {
  ?award wdt:P31 wd:Q378427;  
         wdt:P17 wd:Q36; 
         wdt:P856 ?officialWebsite.         
         
  ?award rdfs:label ?awardLabel.
  FILTER(LANG(?awardLabel) = "pl")
  
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "pl".
  }
}
ORDER BY ?awardLabel"""

In [12]:
results = get_results(endpoint_url, query)

# for result in results["results"]["bindings"]:
#     print(result)


In [13]:
to_df = [(e['award']['value'], e['awardLabel']['value'], e['officialWebsite']['value']) for e in results["results"]["bindings"]]
df = pd.DataFrame(to_df, columns=['wikidata', 'label', 'website'])
df.to_excel('ipbl_awards.xlsx', index=False)

In [19]:
with open('ipbl_awards.json', 'w', encoding='utf-8') as jfile:
    json.dump(results, jfile, indent=4, ensure_ascii=False)

In [20]:
# konkursy literackie
query = """SELECT ?competition ?competitionLabel ?officialWebsite
WHERE {
  ?competition wdt:P31 wd:Q16543246; 
         wdt:P17 wd:Q36; 
         wdt:P856 ?officialWebsite.         
         
  ?competition rdfs:label ?competitionLabel.
  FILTER(LANG(?competitionLabel) = "pl")
  
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "pl".
  }
}
ORDER BY ?competitionLabel"""

In [21]:
results = get_results(endpoint_url, query)

# for result in results["results"]["bindings"]:
#     print(result)


In [None]:
to_df = [(e['competition']['value'], e['competitionLabel']['value'], e['officialWebsite']['value']) for e in results["results"]["bindings"]]
df = pd.DataFrame(to_df, columns=['wikidata', 'label', 'website'])
df.to_excel('ipbl_competitions.xlsx', index=False)

In [22]:
with open('ipbl_competitions.json', 'w', encoding='utf-8') as jfile:
    json.dump(results, jfile, indent=4, ensure_ascii=False)