In [16]:

# question: what percentage of each countries population voted in the 2012 poll?

import requests, pydash, pandas, numpy, altair

url = 'http://138.197.181.117:8989/bigdata/sparql'

query = """
SELECT DISTINCT ?voter ?voterLabel  ?votercountry ?votercountryLabel WHERE {
?film wdt:P5 wd:Q1. ?film wdt:P6 ?voter. ?voter wdt:P9 ?votercountry .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }}"""

r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()

data = pydash.get(data, 'results.bindings')
data = [pydash.get(x, 'votercountryLabel.value') for x in data]

index = pandas.Index(data)
thing = pandas.DataFrame(index.value_counts()).reset_index()
thing.columns = ['country', 'voters']

replacer = {'UK':'United Kingdom', 'US':'United States of America', 'China':"People's Republic of China"}
thing = thing.replace({'country':replacer})

url = 'https://query.wikidata.org/sparql'

query = """
SELECT DISTINCT ?country ?countryLabel ?population ?populationLabel  WHERE {
    ?country wdt:P31/wdt:P279* wd:Q6256 .
    ?country wdt:P1082 ?population .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }}"""

r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()

data = pydash.get(data, 'results.bindings') 
data = [(pydash.get(x, 'countryLabel.value'), pydash.get(x, 'population.value')) for x in data]
population = pandas.DataFrame(data, columns=['country', 'population'])
population = population.sort_values(by='population', ascending=False)
population = population.drop_duplicates(subset='country', keep='first')

combination = pandas.merge(thing, population, on='country', how='left')
combination = combination.loc[~combination.population.isin([numpy.nan])]
combination['voters'] = combination['voters'].astype('int64')
combination['population'] = combination['population'].astype('int64')
combination['%'] = (combination['voters']/combination['population'])*100

line = altair.Chart(combination).mark_line(interpolate='linear').encode(x='country',y='%')
display(altair.layer(line).properties(width=1200, height=300))
combination.head(20)


Unnamed: 0,country,voters,population,%
0,United Kingdom,311,66022273,0.000471
1,United States of America,221,325145963,6.8e-05
2,Spain,65,46733038,0.000139
3,Germany,46,83149300,5.5e-05
4,France,44,66628000,6.6e-05
5,Australia,36,24511800,0.000147
6,Argentina,32,44938712,7.1e-05
7,Italy,24,60317000,4e-05
8,Canada,24,37894799,6.3e-05
9,Russia,20,146804372,1.4e-05
