<a href="https://colab.research.google.com/github/otoperalias/miscellanea/blob/main/Wikidata_queries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Link
https://itnext.io/extracting-data-from-wikidata-using-sparql-and-python-59e0037996f


In [2]:
!pip install SPARQLWrapper

Collecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Collecting rdflib>=6.1.1 (from SPARQLWrapper)
  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib>=6.1.1->SPARQLWrapper)
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0 isodate-0.6.1 rdflib-7.0.0


In [4]:
import sys
import pandas as pd
from typing import List, Dict
from SPARQLWrapper import SPARQLWrapper, JSON

class WikiDataQueryResults:
    """
    A class that can be used to query data from Wikidata using SPARQL and return the results as a Pandas DataFrame or a list
    of values for a specific key.
    """
    def __init__(self, query: str):
        """
        Initializes the WikiDataQueryResults object with a SPARQL query string.
        :param query: A SPARQL query string.
        """
        self.user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        self.endpoint_url = "https://query.wikidata.org/sparql"
        self.sparql = SPARQLWrapper(self.endpoint_url, agent=self.user_agent)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

    def __transform2dicts(self, results: List[Dict]) -> List[Dict]:
        """
        Helper function to transform SPARQL query results into a list of dictionaries.
        :param results: A list of query results returned by SPARQLWrapper.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        new_results = []
        for result in results:
            new_result = {}
            for key in result:
                new_result[key] = result[key]['value']
            new_results.append(new_result)
        return new_results

    def _load(self) -> List[Dict]:
        """
        Helper function that loads the data from Wikidata using the SPARQLWrapper library, and transforms the results into
        a list of dictionaries.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        results = self.sparql.queryAndConvert()['results']['bindings']
        results = self.__transform2dicts(results)
        return results

    def load_as_dataframe(self) -> pd.DataFrame:
        """
        Executes the SPARQL query and returns the results as a Pandas DataFrame.
        :return: A Pandas DataFrame representing the query results.
        """
        results = self._load()
        return pd.DataFrame.from_dict(results)

In [5]:
query = """
SELECT ?city ?cityLabel ?location ?locationLabel ?founding_date
WHERE {
  ?city wdt:P31/wdt:P279* wd:Q515.
  ?city wdt:P17 wd:Q30.
  ?city wdt:P625 ?location.
  ?city wdt:P571 ?founding_date.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

In [6]:
data_extracter = WikiDataQueryResults(query)
df = data_extracter.load_as_dataframe()
df

                                    city         founding_date  \
0     http://www.wikidata.org/entity/Q62  1776-06-29T00:00:00Z   
1  http://www.wikidata.org/entity/Q16554  1858-11-22T00:00:00Z   
2  http://www.wikidata.org/entity/Q16568  1791-01-01T00:00:00Z   
3  http://www.wikidata.org/entity/Q23197  1779-01-01T00:00:00Z   
4  http://www.wikidata.org/entity/Q25330  1719-01-01T00:00:00Z   

                             location      cityLabel  \
0       Point(-122.416388888 37.7775)  San Francisco   
1  Point(-104.984722222 39.739166666)         Denver   
2          Point(-81.65 30.316666666)   Jacksonville   
3   Point(-86.774444444 36.162222222)      Nashville   
4   Point(-74.765833333 40.220277777)        Trenton   

                        locationLabel  
0       Point(-122.416388888 37.7775)  
1  Point(-104.984722222 39.739166666)  
2          Point(-81.65 30.316666666)  
3   Point(-86.774444444 36.162222222)  
4   Point(-74.765833333 40.220277777)  


In [7]:
df

Unnamed: 0,city,founding_date,location,cityLabel,locationLabel
0,http://www.wikidata.org/entity/Q62,1776-06-29T00:00:00Z,Point(-122.416388888 37.7775),San Francisco,Point(-122.416388888 37.7775)
1,http://www.wikidata.org/entity/Q16554,1858-11-22T00:00:00Z,Point(-104.984722222 39.739166666),Denver,Point(-104.984722222 39.739166666)
2,http://www.wikidata.org/entity/Q16568,1791-01-01T00:00:00Z,Point(-81.65 30.316666666),Jacksonville,Point(-81.65 30.316666666)
3,http://www.wikidata.org/entity/Q23197,1779-01-01T00:00:00Z,Point(-86.774444444 36.162222222),Nashville,Point(-86.774444444 36.162222222)
4,http://www.wikidata.org/entity/Q25330,1719-01-01T00:00:00Z,Point(-74.765833333 40.220277777),Trenton,Point(-74.765833333 40.220277777)
...,...,...,...,...,...
5568,http://www.wikidata.org/entity/Q244146,1891-01-01T00:00:00Z,Point(-82.764166666 27.973611111),Clearwater,Point(-82.764166666 27.973611111)
5569,http://www.wikidata.org/entity/Q323414,1969-10-10T00:00:00Z,Point(-118.739167 34.270833),Simi Valley,Point(-118.739167 34.270833)
5570,http://www.wikidata.org/entity/Q323432,1859-01-01T00:00:00Z,Point(-122.054166666 38.257777777),Fairfield,Point(-122.054166666 38.257777777)
5571,http://www.wikidata.org/entity/Q328941,1785-01-01T00:00:00Z,Point(-87.3594444 36.5297222),Clarksville,Point(-87.3594444 36.5297222)


In [54]:
query="""
SELECT ?item ?itemLabel ?occupationLabel ?place ?coord ?birthyear
WHERE {
    ?item wdt:P31 wd:Q5.
    ?item wdt:P19/wdt:P131* wd:Q5783.
    ?item wdt:P106 ?occupation.
    ?item wdt:P19 ?place.
    ?place wdt:P625 ?coord.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],es". }
}
"""

This query selects individuals who:

Are instances of humans (Q5).
Were born in Andalusia (Q207).
Have an occupation (P106).
Have a date of birth (P569).
The results will include their name, occupation, date of birth, and birth location. Feel free to execute this query on the Wikidata SPARQL endpoint to explore the fascinating biographies of people from Andalusia! 🌟

1: Wikidata:SPARQL query service/queries/examples

In [55]:
data_extracter = WikiDataQueryResults(query)
df = data_extracter.load_as_dataframe()
df

Unnamed: 0,item,place,coord,itemLabel,occupationLabel
0,http://www.wikidata.org/entity/Q591166,http://www.wikidata.org/entity/Q95025,Point(-3.5 38.0),Curro Savoy,músico
1,http://www.wikidata.org/entity/Q591166,http://www.wikidata.org/entity/Q528530,Point(-4.0505603 38.0392362),Curro Savoy,músico
2,http://www.wikidata.org/entity/Q591166,http://www.wikidata.org/entity/Q95025,Point(-3.5 38.0),Curro Savoy,guitarrista
3,http://www.wikidata.org/entity/Q591166,http://www.wikidata.org/entity/Q528530,Point(-4.0505603 38.0392362),Curro Savoy,guitarrista
4,http://www.wikidata.org/entity/Q591166,http://www.wikidata.org/entity/Q95025,Point(-3.5 38.0),Curro Savoy,silbador
...,...,...,...,...,...
19928,http://www.wikidata.org/entity/Q466111,http://www.wikidata.org/entity/Q24011933,Point(-5.4782 36.97602),Melody,cantante
19929,http://www.wikidata.org/entity/Q466111,http://www.wikidata.org/entity/Q24011933,Point(-5.4782 36.97602),Melody,modelo
19930,http://www.wikidata.org/entity/Q20015654,http://www.wikidata.org/entity/Q5724461,Point(-3.409722222 37.825),Socorro Mármol Brís,poeta
19931,http://www.wikidata.org/entity/Q21054935,http://www.wikidata.org/entity/Q5724461,Point(-3.409722222 37.825),Francisco Reyes Martínez,político


In [None]:
SELECT DISTINCT ?persona ?nombrePersona ?ocupacion ?fechaNacimiento ?lugarNacimiento
WHERE {
  ?persona wdt:P31 wd:Q5 ;  # Selecciona instancias de seres humanos
          wdt:P19 wd:Q207 ;  # Nacidos en Andalucía (España)
          wdt:P106 ?ocupacion ;  # Ocupación
          wdt:P569 ?fechaNacimiento ;  # Fecha de nacimiento
          rdfs:label ?nombrePersona .  # Nombre de la persona
  ?ocupacion rdfs:label ?ocupacionLabel .  # Etiqueta de la ocupación
  ?persona wdt:P19 ?lugarNacimiento .  # Lugar de nacimiento
  ?lugarNacimiento rdfs:label ?lugarNacimientoLabel .  # Etiqueta del lugar de nacimiento
  FILTER(LANG(?nombrePersona) = "es")  # Solo etiquetas en español
  FILTER(LANG(?ocupacionLabel) = "es")  # Solo etiquetas de ocupación en español
  FILTER(LANG(?lugarNacimientoLabel) = "es")  # Solo etiquetas del lugar de nacimiento en español
}
ORDER BY ?nombrePersona