In [1]:
import mkwikidata, pprint
import pandas as pd

In [9]:
# first query
query = """
SELECT ?child ?childLabel
WHERE
{
    # child has parent 'Bach'
    ?child wdt:P22 wd:Q1339;
           wdt:P25 wd:Q57487;
           wdt:P106 wd:Q36834,
                   wd:Q486748.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""
qr = mkwikidata.run_query(query)
qr_df = pd.json_normalize(qr['results']['bindings'])
qr_df.head()

Unnamed: 0,child.type,child.value,childLabel.xml:lang,childLabel.type,childLabel.value
0,uri,http://www.wikidata.org/entity/Q107277,en,literal,Wilhelm Friedemann Bach


Common pattern:

```
SELECT ?s1 ?s2 ?s3
WHERE
{
  ?s1 p1 o1;
      p2 o2;
      p3 o31, o32, o33.
  ?s2 p4 o41, o42.
  ?s3 p5 o5;
      p6 o6.
}
```

In [3]:
# second query
query = """
SELECT ?label ?placelabel ?coord  # three variables
WHERE {
    # here we set ?type as a group of items
    VALUES ?type {wd:Q571 wd:Q7725634}  # book or literary work
    ?item wdt:P31 ?type.  # item is of type ?type - instance of ?type
    ?item rdfs:label ?label FILTER(lang(?label) = "en").  # item has label ?label
    ?item wdt:P840 ?place.  # item has narrative location ?place
    ?place wdt:P31/wdt:P279* wd:Q515. # ?place is a subclass of administrative territorial entity
    ?place wdt:P625 ?coord .  # has coordinate location ?coord
    ?place rdfs:label ?placelabel FILTER(lang(?placelabel) = "en").
}
"""

query_result = mkwikidata.run_query(query)

In [4]:
qr_df2 = pd.json_normalize(query_result['results']['bindings'])
qr_df2.head()

Unnamed: 0,coord.datatype,coord.type,coord.value,label.xml:lang,label.type,label.value,placelabel.xml:lang,placelabel.type,placelabel.value
0,http://www.opengis.net/ont/geosparql#wktLiteral,literal,Point(-122.416388888 37.7775),en,literal,Zodiac,en,literal,San Francisco
1,http://www.opengis.net/ont/geosparql#wktLiteral,literal,Point(-122.416388888 37.7775),en,literal,American Gods,en,literal,San Francisco
2,http://www.opengis.net/ont/geosparql#wktLiteral,literal,Point(11.254166666 43.771388888),en,literal,The Decameron,en,literal,Florence
3,http://www.opengis.net/ont/geosparql#wktLiteral,literal,Point(11.254166666 43.771388888),en,literal,A Room with a View,en,literal,Florence
4,http://www.opengis.net/ont/geosparql#wktLiteral,literal,Point(11.254166666 43.771388888),en,literal,Day 8 Tale 5 of the Decameron,en,literal,Florence


In [5]:
qr_df2[['label.value', 'placelabel.value', 'coord.value']].head()

Unnamed: 0,label.value,placelabel.value,coord.value
0,Zodiac,San Francisco,Point(-122.416388888 37.7775)
1,American Gods,San Francisco,Point(-122.416388888 37.7775)
2,The Decameron,Florence,Point(11.254166666 43.771388888)
3,A Room with a View,Florence,Point(11.254166666 43.771388888)
4,Day 8 Tale 5 of the Decameron,Florence,Point(11.254166666 43.771388888)


This shows that SPARQL is a powerful language to retrieve data from wikidata, such as
the coordinates of a city, the population of a country, the date of birth of a person, etc. Sometimes, one has to pay for the data if you are trying to get coordinates of a city from other sources. But with SPARQL, you can get it for free.

In [6]:
qr_df2.shape

(13052, 9)

In [7]:
# Query 3: What is the most common narration country in Wikidata books and literature works?

query = """
SELECT ?countryLabel (COUNT(?country) AS ?count)
WHERE {
    VALUES ?type {wd:Q571 wd:Q7725634}  # book or literary work
    ?item wdt:P31 ?type.  # item is of type ?type - instance of ?type
    ?item wdt:P840 ?place.  # item has narrative location ?place
    ?place wdt:P17 ?country .  # ?place is in country ?country
    ?country rdfs:label ?countryLabel FILTER(lang(?countryLabel) = "en").
}
GROUP BY ?countryLabel
ORDER BY DESC(?count)
"""
qr3 = mkwikidata.run_query(query)
qr_df3 = pd.json_normalize(qr3['results']['bindings'])

In [8]:
qr_df3.head()

Unnamed: 0,countryLabel.xml:lang,countryLabel.type,countryLabel.value,count.datatype,count.type,count.value
0,en,literal,United States of America,http://www.w3.org/2001/XMLSchema#integer,literal,2484
1,en,literal,United Kingdom,http://www.w3.org/2001/XMLSchema#integer,literal,1576
2,en,literal,France,http://www.w3.org/2001/XMLSchema#integer,literal,1336
3,en,literal,Italy,http://www.w3.org/2001/XMLSchema#integer,literal,730
4,en,literal,Spain,http://www.w3.org/2001/XMLSchema#integer,literal,601


In [10]:
qr_df3[['countryLabel.value', 'count.value']].head(10)

Unnamed: 0,countryLabel.value,count.value
0,United States of America,2484
1,United Kingdom,1576
2,France,1336
3,Italy,730
4,Spain,601
5,Denmark,540
6,Germany,365
7,Serbia,331
8,India,243
9,Sweden,211
