# Extraction of city data from wikidata

__author__ = "Pierre Nugues"


A few imports

In [1]:
import requests
import pandas as pd

Setting presentation options

In [2]:
pd.options.display.max_rows = 10000
pd.options.display.max_columns = 80
pd.options.display.width = 200

The query

In [3]:
prefixes = '''PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>'''

query = '''
SELECT ?entity ?population 
WHERE 
{
  ?entity rdfs:label "Busan"@en .
  ?entity wdt:P1082 ?population.

}'''

We query the data from wikidata

In [4]:
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
data = requests.get(url, params={'query': prefixes + query, 'format': 'json'}).json()

In [5]:
data

{'head': {'vars': ['entity', 'population']},
 'results': {'bindings': [{'entity': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q16520'},
    'population': {'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
     'type': 'literal',
     'value': '3453198'}}]}}

And we print it

In [6]:
# print(data)
cities = []
for item in data['results']['bindings']:
    # print(item)
    cities.append({
        'id': item['entity']['value'],
        'value': item.get('population', {}).get('value')})

df = pd.DataFrame(cities)

In [7]:
len(df)

1

In [8]:
df

Unnamed: 0,id,value
0,http://www.wikidata.org/entity/Q16520,3453198
