Task 1

In [None]:
import requests
from datetime import datetime
from urllib.parse import unquote

WIKIDATA_API_ENDPOINT = "https://www.wikidata.org/w/api.php"
WIKIPEDIA_API_ENDPOINT = "https://en.wikipedia.org/w/api.php"

HEADERS = {"User-Agent": "cw_query/2.0"}

PARAMS_QUERY_SEARCH = {
    "action":"query",
    "format":"json",
    "formatversion":"latest",
    "list":"search",
    "srsearch": "haswbstatement:P166=Q185667",
    "srlimit":"max"
}

PARAMS_GETCONTENT={
    "action": "query",
    "format": "json",
    "titles": "",
    "prop": "extracts",
    "explaintext": True,
}

PARAMS_WBGETENTITIES_LABELS = {
    "action": "wbgetentities",
    "format": "json",
    "ids": "",
    "sites": "",
    "props": "labels",
    "languages": "en",
    "sitefilter": "enwiki",
    "utf8": 1,
    "ascii": 1,
    "formatversion": "latest"
}

PARAMS_WBGETENTITIES_SITES = {
    "action": "wbgetentities",
    "format": "json",
    "ids": "",
    "sites": "",
    "props": "sitelinks/urls",
    "languages": "en",
    "sitefilter": "enwiki",
    "utf8": 1,
    "ascii": 1,
    "formatversion": "latest"
}

PARAMS_WBGETENTITIES_CLAIMS = {
    "action": "wbgetentities",
    "format": "json",
    "ids": "",
    "props": "claims",
    "languages": "en",
    "sitefilter": "",
    "formatversion": "latest"
}

In [None]:
def get_turing_award_recipients():
    acm_award_entities = []
    search_response = requests.get(WIKIDATA_API_ENDPOINT, headers=HEADERS, params=PARAMS_QUERY_SEARCH)
    data = search_response.json()
    for result in data['query']['search']:
        acm_award_entities.append(result['title'])
    return acm_award_entities

get_turing_award_recipients()

Task 2

In [None]:
def get_wikipedia_content(entity_id):
    PARAMS_WBGETENTITIES_SITES["ids"] = entity_id
    wbgetentities_response = requests.get(WIKIDATA_API_ENDPOINT, headers=HEADERS, params=PARAMS_WBGETENTITIES_SITES)
    wbgetentities_data = wbgetentities_response.json()
    recipient_name = wbgetentities_data["entities"][entity_id]["sitelinks"]["enwiki"]["url"].split("https://en.wikipedia.org/wiki/")[1]

    PARAMS_GETCONTENT["titles"] = unquote(recipient_name)
    wbgetentities_response = requests.get(WIKIPEDIA_API_ENDPOINT, headers=HEADERS, params=PARAMS_GETCONTENT)
    wbgetentities_data = wbgetentities_response.json()
    content = next(iter(wbgetentities_data["query"]["pages"].values()))["extract"]
    return content

print(get_wikipedia_content("Q92625"))

Task 3

In [None]:
wiki_data_dict = {"gender": "P21", "birth_date": "P569", "birth_city": "P19", 
                  "birth_country": "P17", "employer": "P108", "educated_at": "P69"}

def get_wikidata_label(entity_id):
    PARAMS_WBGETENTITIES_LABELS["ids"] = entity_id
    wbgetentities_response = requests.get(WIKIDATA_API_ENDPOINT, headers=HEADERS, params=PARAMS_WBGETENTITIES_LABELS)
    wbgetentities_data = wbgetentities_response.json()
    labels = next(iter(wbgetentities_data["entities"].values()))["labels"]
    value = labels["en"]["value"]
    return value

def get_wikidata_claims(entity_id):
    PARAMS_WBGETENTITIES_CLAIMS["ids"] = entity_id
    wbgetentities_response = requests.get(WIKIDATA_API_ENDPOINT, headers=HEADERS, params=PARAMS_WBGETENTITIES_CLAIMS)
    wbgetentities_data = wbgetentities_response.json()
    claims = next(iter(wbgetentities_data["entities"].values()))["claims"]
    return claims

def get_dict_values(entity_id):
    claims = get_wikidata_claims(entity_id)
    try:
        name = get_wikidata_label(entity_id)
    except KeyError:
        name = None
    try:
        intro = get_wikipedia_content(entity_id).split("\n\n\n")[0]
    except KeyError:
        intro = None   
    try: 
        gender = get_wikidata_label(claims[wiki_data_dict["gender"]][0]["mainsnak"]["datavalue"]["value"]["id"])
    except KeyError:
        gender = None
    try:
        birth_date = datetime.strptime(claims[wiki_data_dict["birth_date"]][0]["mainsnak"]["datavalue"]["value"]["time"], "+%Y-%m-%dT%XZ").strftime("%d %B %Y")
    except ValueError:
        birth_date = datetime.strptime(claims[wiki_data_dict["birth_date"]][0]["mainsnak"]["datavalue"]["value"]["time"], "+%Y-00-00T%XZ").strftime("%Y")
    except KeyError:
        birth_date = None
    try:
        birth_city = get_wikidata_label(claims[wiki_data_dict["birth_city"]][0]["mainsnak"]["datavalue"]["value"]["id"])
        birth_city_claims = get_wikidata_claims(claims[wiki_data_dict["birth_city"]][0]["mainsnak"]["datavalue"]["value"]["id"])
        birth_country = get_wikidata_label(birth_city_claims[wiki_data_dict["birth_country"]][0]["mainsnak"]["datavalue"]["value"]["id"])
        birth_place = "{}, {}".format(birth_city, birth_country)
    except KeyError:
        birth_place = None
    try:
        employer_list = []
        if len(claims[wiki_data_dict["employer"]]) == 1:
            employer = get_wikidata_label(claims[wiki_data_dict["employer"]][0]["mainsnak"]["datavalue"]["value"]["id"])
        else:
            for i in range(len(claims[wiki_data_dict["employer"]])):
                employer = get_wikidata_label(claims[wiki_data_dict["employer"]][i]["mainsnak"]["datavalue"]["value"]["id"])
                employer_list.append(employer)
            employer = employer_list
    except KeyError:
        employer = None
    try:
        education_list = []
        if len(claims[wiki_data_dict["educated_at"]]) == 1:
            education = get_wikidata_label(claims[wiki_data_dict["educated_at"]][0]["mainsnak"]["datavalue"]["value"]["id"])
        else:
            for i in range(len(claims[wiki_data_dict["educated_at"]])):
                education = get_wikidata_label(claims[wiki_data_dict["educated_at"]][i]["mainsnak"]["datavalue"]["value"]["id"])
                education_list.append(education)
            education = education_list
    except KeyError:
        education = None
    return name, intro, gender, birth_date, birth_place, employer, education


award_winners = {"name": [], "intro": [], "gender": [], "birth_date": [], 
                    "birth_place": [], "employer": [], "educated_at": []}
acm_award_winners = get_turing_award_recipients()
for entity_id in acm_award_winners:
    print(entity_id)
    name, intro, gender, birth_date, birth_place, employer, education = get_dict_values(entity_id)
    award_winners["name"].append(name)
    award_winners["intro"].append(intro)
    award_winners["gender"].append(gender)
    award_winners["birth_date"].append(birth_date)
    award_winners["birth_place"].append(birth_place)
    award_winners["employer"].append(employer)
    award_winners["educated_at"].append(education)

print(award_winners)

Task 4

In [76]:
for name in sorted(award_winners["name"]):
    print(name)


Adi Shamir
Alan Kay
Alan Perlis
Alfred Aho
Allen Newell
Amir Pnueli
Andrew Yao
Barbara Liskov
Bob Kahn
Butler Lampson
Charles Bachman
Charles P. Thacker
Dana Scott
David A. Patterson
Dennis M. Ritchie
Donald Knuth
Douglas Engelbart
E. Allen Emerson
Edgar F. Codd
Edmund M. Clarke
Edsger W. Dijkstra
Edward Feigenbaum
Edwin Catmull
Fernando J. Corbató
Frances E. Allen
Fred Brooks
Geoffrey Hinton
Herbert Simon
Iosif Sifakis
Ivan Sutherland
Jack Dongarra
James H. Wilkinson
Jeffrey David Ullman
Jim Gray
John Backus
John Cocke
John Edward Hopcroft
John L. Hennessy
John McCarthy
Judea Pearl
Juris Hartmanis
Ken Thompson
Kenneth E. Iverson
Kristen Nygaard
Leonard Adleman
Leslie Lamport
Leslie Valiant
Manuel Blum
Martin Edward Hellman
Marvin Minsky
Maurice Wilkes
Michael O. Rabin
Michael Stonebraker
Niklaus Wirth
Ole-Johan Dahl
Pat Hanrahan
Peter Naur
Raj Reddy
Richard E. Stearns
Richard Hamming
Richard M. Karp
Robert Tarjan
Robert W. Floyd
Robin Milner
Ron Rivest
Shafrira Goldwasser
Silvio Mical