# Import Packages

In [1]:
!pip install sparqlwrapper



In [2]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

In [3]:
def select(query, service='https://query.wikidata.org/sparql'):
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query().convert()
    return pd.json_normalize(result['results']['bindings'])

## 1.1 Olympians

In [4]:
query_string = """
SELECT
  ?person ?personLabel ?sexgenderLabel
WHERE
{
  ?person wdt:P3171 ?value .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
LIMIT 30000
"""
result=select(query_string)
result.to_csv("Olympians.csv")
result

Unnamed: 0,person.type,person.value,personLabel.xml:lang,personLabel.type,personLabel.value
0,uri,http://www.wikidata.org/entity/Q1681,en,literal,Abel Mutai
1,uri,http://www.wikidata.org/entity/Q1789,en,literal,Ashton Eaton
2,uri,http://www.wikidata.org/entity/Q1672,en,literal,Dejen Gebremeskel
3,uri,http://www.wikidata.org/entity/Q1712,en,literal,Erick Barrondo
4,uri,http://www.wikidata.org/entity/Q1956,en,literal,Hermann Maier
...,...,...,...,...,...
29658,uri,http://www.wikidata.org/entity/Q453445,en,literal,Shelley Olds
29659,uri,http://www.wikidata.org/entity/Q4968493,en,literal,Sara Mustonen
29660,uri,http://www.wikidata.org/entity/Q4350864,en,literal,Mayuko Hagiwara
29661,uri,http://www.wikidata.org/entity/Q2480355,en,literal,Joelle Numainville


## 1.2 Olympians with Sex or Gender

In [5]:
query_string = """
SELECT
  ?person ?personLabel ?sexgenderLabel
WHERE
{
  ?person wdt:P3171 ?value .
  OPTIONAL { ?person wdt:P21 ?sexgender . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
LIMIT 30000
"""
result=select(query_string)
result.to_csv("Olympians_sex_or_gender.csv")
result

HTTPError: HTTP Error 429: Too Many Requests

## 1.3 Olympians with Sports

In [None]:
query_string = """
SELECT
  ?person ?personLabel (GROUP_CONCAT(?sportLabel;SEPARATOR=', ') AS ?sports)
WHERE
{
  ?person wdt:P3171 ?value .
  OPTIONAL { ?person wdt:P106 ?sport .
    ?sport rdfs:label ?sportLabel . FILTER(LANG(?sportLabel)='en') }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel
LIMIT 30000
"""
result=select(query_string)
result.to_csv("Olympians_sports.csv")
result

## 1.4 Olympians with All Demographic Categories

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?sexgenderLabel (GROUP_CONCAT(?sportLabel;SEPARATOR=', ') AS ?sports)
WHERE
{
  ?person wdt:P3171 ?value .
  OPTIONAL { ?person wdt:P21 ?sexgender . }
  OPTIONAL { ?person wdt:P106 ?sport .
           ?sport rdfs:label ?sportLabel . FILTER(LANG(?sportLabel)='en') }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel ?sexgenderLabel
LIMIT 30000
"""
result=select(query_string)
result.to_csv("Olympians_complete.csv")
result

## 2.1 People With Medical Conditions

In [None]:
query_string = """
SELECT ?person ?personLabel (GROUP_CONCAT(DISTINCT ?conditionLabel;SEPARATOR=', ') AS ?conditions) 
WHERE
{
  ?person wdt:P31 wd:Q5 .
  ?person wdt:P1050 ?condition .
  ?condition rdfs:label ?conditionLabel . FILTER(LANG(?conditionLabel)='en')
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel
"""

result=select(query_string)
result.to_csv("Medical_conditions.csv")
result

## 2.2 People With Medical Conditions and Sex or Gender

In [None]:
query_string = """
SELECT 
  ?person ?personLabel ?sexgenderLabel (GROUP_CONCAT(DISTINCT ?conditionLabel;SEPARATOR=', ') AS ?conditions) 
WHERE
{
  ?person wdt:P31 wd:Q5 .
  ?person wdt:P1050 ?condition .
  ?condition rdfs:label ?conditionLabel . FILTER(LANG(?conditionLabel)='en')
  OPTIONAL { ?person wdt:P21 ?sexgender . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel ?sexgenderLabel
"""

result=select(query_string)
result.to_csv("Medical_conditions_sex_or_gender.csv")
result

## 2.3 People With Medical Conditions and Cause of Death

In [None]:
query_string = """
SELECT ?person ?personLabel (GROUP_CONCAT(DISTINCT ?conditionLabel;SEPARATOR=', ') AS ?conditions) (GROUP_CONCAT(DISTINCT ?causedeathLabel;SEPARATOR=', ') AS ?causedeath)
WHERE
{
  ?person wdt:P31 wd:Q5 .
  ?person wdt:P1050 ?condition .
  ?condition rdfs:label ?conditionLabel . FILTER(LANG(?conditionLabel)='en')
  OPTIONAL { ?person wdt:P509 ?causedeath .
           ?causedeath rdfs:label ?causedeathLabel . FILTER(LANG(?causedeathLabel)='en') }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel
"""

result=select(query_string)
result.to_csv("Medical_conditions_cause_of_death.csv")
result

## 2.4 People With Sexually-Transmitted Infections

In [None]:
query_string = """
SELECT ?person ?personLabel (GROUP_CONCAT(DISTINCT ?conditionLabel;SEPARATOR=', ') AS ?conditions) 
WHERE
{
  ?person wdt:P31 wd:Q5 .
  ?person wdt:P1050 ?condition .
  ?condition wdt:P279 wd:Q12198 .
  ?condition rdfs:label ?conditionLabel . FILTER(LANG(?conditionLabel)='en')
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel
"""

result=select(query_string)
result.to_csv("Medical_conditions_STIs.csv")
result

## 2.5 People With STIs, Sex or Gender, and Cause of Death

In [None]:
query_string = """
SELECT ?person ?personLabel ?sexgenderLabel (GROUP_CONCAT(DISTINCT ?conditionLabel;SEPARATOR=', ') AS ?conditions) 
  (GROUP_CONCAT(DISTINCT ?causedeathLabel;SEPARATOR=', ') AS ?causedeath)
WHERE
{
  ?person wdt:P31 wd:Q5 .
  OPTIONAL { ?person wdt:P21 ?sexgender . }
  ?person wdt:P1050 ?condition .
  ?condition wdt:P279+ wd:Q12198 .
  ?condition rdfs:label ?conditionLabel . FILTER(LANG(?conditionLabel)='en')
  OPTIONAL { ?person wdt:P509 ?causedeath .
           ?causedeath rdfs:label ?causedeathLabel . FILTER(LANG(?causedeathLabel)='en') }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel ?sexgenderLabel
ORDER BY ?personLabel
"""

result=select(query_string)
result.to_csv("Medical_conditions_STIs_complete.csv")
result

## 3.1 University of Virginia Employees

In [None]:
query_string = """
SELECT
  ?person ?personLabel
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
}
"""
result=select(query_string)
result.to_csv("UVA_employees.csv")
result

## 3.2 UVA Employees with Sex or Gender

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?sexorgender
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P21 ?value .
    ?value rdfs:label ?sexorgender . FILTER(LANG(?sexorgender)='en') }
}
"""
result=select(query_string)
result.to_csv("UVA_employees_sex_or_gender.csv")
result

## 3.3 UVA Employees with Ethnicity

In [None]:
query_string = """
SELECT
  ?person ?personLabel (GROUP_CONCAT(DISTINCT ?ethnicityLabel;SEPARATOR=', ') AS ?ethnicity)
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P172 ?ethnicity .
    ?ethnicity rdfs:label ?ethnicityLabel . FILTER(LANG(?ethnicityLabel)='en') }
}
GROUP BY ?person ?personLabel
"""
result=select(query_string)
result.to_csv("UVA_employees_ethnicity.csv")
result

## 3.4 UVA Employees with Birthplace

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?birthplace
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P19 ?value . 
    ?value rdfs:label ?birthplace . FILTER(LANG(?birthplace)='en') }
}
"""
result=select(query_string)
result.to_csv("UVA_employees_birthplace.csv")
result

## 3.5 UVA Employees with Citizenship

In [None]:
query_string = """
SELECT
  ?person ?personLabel (GROUP_CONCAT(DISTINCT ?citizenshipLabel;SEPARATOR=', ') AS ?citizenship)
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P27 ?citizenship .
    ?citizenship rdfs:label ?citizenshipLabel . FILTER(LANG(?citizenshipLabel)='en') }
}
GROUP BY ?person ?personLabel
"""
result=select(query_string)
result.to_csv("UVA_employees_birthplace.csv")
result

## 3.6 UVA Employees with Religion

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?religion
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P140 ?value .
    ?value rdfs:label ?religion . FILTER(LANG(?religion)='en') }
}
"""
result=select(query_string)
result.to_csv("UVA_employees_religion.csv")
result

## 3.7 UVA Employees with Sexuality

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?sexuality
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P91 ?value4 .
    ?value4 rdfs:label ?sexuality . FILTER(LANG(?sexuality)='en') }
}
"""
result=select(query_string)
result.to_csv("UVA_employees_sexuality.csv")
result

## 3.8 UVA Employees with All Demographic Categories

In [None]:
query_string = """
SELECT
  ?person ?personLabel ?sexorgender (GROUP_CONCAT(DISTINCT ?ethnicityLabel;SEPARATOR=', ') AS ?ethnicity) 
  ?birthplace (GROUP_CONCAT(DISTINCT ?citizenshipLabel;SEPARATOR=', ') AS ?citizenship) ?religion 
  ?sexuality
WHERE
{
  ?person wdt:P108 wd:Q213439 .
  ?person rdfs:label ?personLabel . FILTER(LANG(?personLabel)='en')
  OPTIONAL { ?person wdt:P21 ?value .
           ?value rdfs:label ?sexorgender . FILTER(LANG(?sexorgender)='en') }
  OPTIONAL { ?person wdt:P172 ?ethnicity .
           ?ethnicity rdfs:label ?ethnicityLabel . FILTER(LANG(?ethnicityLabel)='en') }
  OPTIONAL { ?person wdt:P19 ?value2 . 
           ?value2 rdfs:label ?birthplace . FILTER(LANG(?birthplace)='en') }
  OPTIONAL { ?person wdt:P27 ?citizenship .
           ?citizenship rdfs:label ?citizenshipLabel . FILTER(LANG(?citizenshipLabel)='en') }
  OPTIONAL { ?person wdt:P140 ?value3 .
           ?value3 rdfs:label ?religion . FILTER(LANG(?religion)='en') }
  OPTIONAL { ?person wdt:P91 ?value4 .
           ?value4 rdfs:label ?sexuality . FILTER(LANG(?sexuality)='en') }
}
GROUP BY ?person ?personLabel ?sexorgender ?birthplace ?religion ?sexuality
"""

result=select(query_string)
result.to_csv("UVA_employees_complete.csv")
result