In [25]:
%pip install --quiet everypolitician bs4 pandas approx_dates

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from everypolitician import EveryPolitician
import json
from bs4 import BeautifulSoup
import requests
import pandas as pd
ep = EveryPolitician()


try:
    with open("countries.json", "r") as file:
        countryJSON = json.load(file)
        # print(countryJSON)
except json.JSONDecodeError as e:
    print(f"Failed to decode JSON: {e}")
except Exception as e:
    print(f"An error occurred: {e}")




[{'key': '', 'doc_count': 632}, {'key': 'Australia', 'doc_count': 145}, {'key': 'Indonesia', 'doc_count': 95}, {'key': 'China', 'doc_count': 94}, {'key': 'Malaysia', 'doc_count': 74}, {'key': 'India', 'doc_count': 71}, {'key': 'Vietnam', 'doc_count': 42}, {'key': 'Singapore', 'doc_count': 31}, {'key': 'Japan', 'doc_count': 28}, {'key': 'Qatar', 'doc_count': 24}]


In [4]:
countryList = [country['key'] for country in countryJSON]
countryList = countryList[1:] #remove the first element because its empty
print(countryList)




['Australia', 'Indonesia', 'China', 'Malaysia', 'India', 'Vietnam', 'Singapore', 'Japan', 'Qatar']


In [29]:
import os
from approx_dates.models import ApproxDate
from datetime import datetime
# get the list of politicians for each country
politicians_by_country = {}
politicians_wikidata = {}
# Custom encoder function for JSON serialization
def custom_encoder(obj):
    if isinstance(obj, ApproxDate):
        try:
            return str(datetime.strptime(obj.source_string, '%Y-%m-%d').isoformat())
        except ValueError:
            # Handle cases where the date is not in the expected format
            # Assuming the date might be just a year or a year and month
            if len(obj.source_string) == 4:  # Only year is provided
                return str(datetime.strptime(obj.source_string, '%Y').isoformat())
            elif len(obj.source_string) == 7:  # Year and month are provided
                return str(datetime.strptime(obj.source_string, '%Y-%m').isoformat())
            else:
                raise ValueError(f"Date format of {obj.source_string} is not supported")
    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

if not os.path.exists('politicians_by_country.json'):
    for country in countryList:
        try:
            country_legislature = ep.country(country).lower_house()
            politicians = country_legislature.popolo().persons
            politicians_by_country[country] = {}
            for politician in politicians:
                wikidata_id = politician.wikidata
                politicians_by_country[country][wikidata_id] = {
                    'name': politician.name,
                    'politicalCountry': country,
                    'image': politician.image,
                    'alternate_names': politician.other_names,
                    'gender': politician.gender,
                    'email': politician.email,
                    'birthDate': politician.birth_date,
                    'deathDate': politician.death_date,
                    # 'biography': politician.biography, all null
                    # 'national_identity': politician.national_identity all null
                }
        except Exception as e:
            print(f"Failed to retrieve data for {country}: {e}")
        with open('politicians_by_country.json', 'w') as file:
            json.dump(politicians_by_country, file, indent=4, default=custom_encoder)

with open('politicians_by_country.json', 'r') as file:
    politicians_by_country = json.load(file)


print(json.dumps(politicians_by_country.get("Singapore", []), indent=4, default=custom_encoder))

Failed to retrieve data for Qatar: Couldn't find the country with slug 'Qatar'
{
    "Q17722375": {
        "name": "Masagos Zulkifli Bin Masagos Mohamad",
        "image": "http://www.parliament.gov.sg/sites/default/files/masagos_0.jpg",
        "alternate_names": [
            {
                "lang": "en",
                "name": "Masagos Zulkifli",
                "note": "multilingual"
            },
            {
                "lang": "ja",
                "name": "\u30de\u30b5\u30b4\u30b9\u30fb\u30ba\u30eb\u30ad\u30d5\u30ea",
                "note": "multilingual"
            },
            {
                "lang": "zh-cn",
                "name": "\u9a6c\u5584\u9ad8",
                "note": "multilingual"
            },
            {
                "lang": "zh-hans",
                "name": "\u9a6c\u5584\u9ad8",
                "note": "multilingual"
            },
            {
                "lang": "zh-hant",
                "name": "\u99ac\u5584\u9ad8",
             

In [27]:
# Load or initialize the cache
try:
    with open('label_cache.json', 'r') as cache_file:
        label_cache = json.load(cache_file)
except FileNotFoundError:
    label_cache = {}

def get_label(qid):
    # Check if the label is already in the cache
    if qid in label_cache:
        return label_cache[qid]

    # If not in cache, fetch from Wikidata
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": qid,
        "format": "json",
        "props": "labels",
        "languages": "en" 
    }
    response = requests.get(url, params=params)
    data = response.json()
    # print("data", qid, data)
    label = data['entities'][qid]['labels']['en']['value']

    # Update the cache with the new label
    label_cache[qid] = label
    with open('label_cache.json', 'w') as cache_file:
        json.dump(label_cache, cache_file)

    return label

In [56]:
def get_wikidata(id, language="en"):
    if id is None:
        return None
    base = "https://wikidata.org/w/api.php"
    get_base_params = {
        "action": "wbgetentities",
        "format": "json",
        "ids": id,
        "languages": language
    }

    get_res = requests.get(base, params={**get_base_params})
    # print(get_res.content)
    parsed_res = get_res.json()['entities'][id]
    
    def get_nested(data, keys, default=None):
        try:
            for key in keys:
                if isinstance(data, dict) and len(data) > 0:
                    data = data.get(key, {})
                    if isinstance(data, str) and data.startswith('Q') and data[1:].isdigit():
                        label = get_label(data)
                        # print("data is qid, convert to label", data, label)
                        return label
                    # else:
                        # print(keys, " is dict", str(data)[:100], '...')
                elif isinstance(data, list) and len(data) > 0:  # Check if data is a non-empty list
                    data = data[0]
                    # print(keys, " is list", str(data)[:100], '...')
                elif (isinstance(data, str) or isinstance(data, int)) and (len(data) > 0):
                    # print(keys, " is str or int", str(data)[:100], '...')
                    return data
                else:
                    # print(keys, 'is', type(data))
                    return default
            return data
        except Exception as e:
            print(f"Error navigating JSON: {e}")
            return default
    # print(json.dumps(parsed_res, indent=4))
        
    properties = {
        'P27': 'countryCitizenship',
        'P140': 'religionWorldview',
        'P19': 'placeBirth',
        'P131': 'nationality',
        'P1971': 'numberChildren',
        'P551': 'residence',
        'P102': 'politicalParty',
        'P106': 'occupation',
        'P69': 'educatedAt',
    }
    
    data = {
        prop: get_nested(parsed_res, ['claims', prop, 0, 'mainsnak', 'datavalue', 'value', 'id','time'])
        for prop in properties.keys()
    }
    positions_held = []
    for position in parsed_res.get('claims', {}).get('P39', []):
        position_data = {
            'position': get_nested(position, ['mainsnak', 'datavalue', 'value', 'id']),
            'startDate': get_nested(position, ['qualifiers', 'P580', 0, 'datavalue', 'value', 'time']),
            'endDate': get_nested(position, ['qualifiers', 'P582', 0, 'datavalue', 'value', 'time']),
            'replaces': get_nested(position, ['qualifiers', 'P1365', 0, 'datavalue', 'value', 'id']),
            'replacedBy': get_nested(position, ['qualifiers', 'P1366', 0, 'datavalue', 'value', 'id'])
        }
        positions_held.append(position_data)
    
    # aliases = [alias.get('value') for alias in parsed_res.get('aliases', {}).get(language, []) if isinstance(alias, dict)]
    description = parsed_res.get('descriptions', {}).get(language, {}).get('value', '')
    for k, v in data.items():
        if str(v).startswith('Q') and str(v)[1:].isdigit():
            # print('for', k, 'we got qid', v)
            # print('now we get label', get_label(v))
            data[k] = get_label(v)


    return {**{properties[prop]: data[prop] for prop in properties}, "description": description, "positionsHeld": positions_held}

singapore_politicians_tables = []

for politician_id in list(politicians_by_country['Singapore'])[:1]:
    politician_basic_info = pd.DataFrame([politicians_by_country['Singapore'][politician_id]])
    # print(politician_id)
    if politician_id != "null":
        politician_wikidata_info = pd.DataFrame([get_wikidata(politician_id, 'en')])
        combined_info = pd.concat([politician_basic_info, politician_wikidata_info], axis=1)
        singapore_politicians_tables.append(combined_info)
display(pd.concat(singapore_politicians_tables))

Unnamed: 0,name,image,alternate_names,gender,email,birthDate,deathDate,countryCitizenship,religionWorldview,placeBirth,nationality,numberChildren,residence,politicalParty,occupation,educatedAt,description,positionsHeld
0,Masagos Zulkifli Bin Masagos Mohamad,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'en', 'name': 'Masagos Zulkifli', 'n...",male,,1963-04-16T00:00:00,,Singapore,Islam,Colony of Singapore,,,,People's Action Party,politician,USC Marshall School of Business,Singaporean politician,[{'position': 'Member of the Parliament of Sin...


In [54]:
lang_pairs = {
    "Indonesia": "id",
    "Australia": "en",
    "China": "zh",
    "Singapore": "en",
    "Malaysia": "en",
    "India": "hi",
    "Vietnam": "vi",
    "Japan": "ja"
}
# Print the length of politician_wikidata per country
for country in lang_pairs.keys():
    country_wikidata = [politician for politician in politicians_wikidata.get(country, []) if politician is not None]
    country_wikidata_length = len(country_wikidata)
    # print(f"{country}: {country_wikidata_length} politicians")

# Let's start with the country with the least politicians.
# 1. Singapore 403 192
# 2. Indonesia 662 272
# 3. Vietnam 500 350
# 4. Japan 567 504
# 5. Australia 515 515
# 6. Malaysia 1121 533
# 7. India 541 541
# 8. China 2956 2269

Indonesia: 272 politicians
Australia: 515 politicians
China: 2269 politicians
Singapore: 192 politicians
Malaysia: 533 politicians
India: 541 politicians
Vietnam: 350 politicians
Japan: 504 politicians


In [60]:
# Create a list to hold the dataframes for each Singapore politician
def get_country_data(country, lang_code):
    df_list = []

    for politician_id in list(politicians_by_country[country]):
        politician_basic_info = pd.DataFrame([politicians_by_country[country][politician_id]])
    # print(politician_id)
        if politician_id != "null":
            politician_wikidata_info = pd.DataFrame([get_wikidata(politician_id, lang_code)])
            combined_info = pd.concat([politician_basic_info, politician_wikidata_info], axis=1)
            df_list.append(combined_info)
    
    with open(f"{country}.json", "w") as file:
        file.write(pd.concat(df_list).to_json(orient='records'))

    return pd.concat(df_list)

get_country_data("Singapore", "en")

Unnamed: 0,name,image,alternate_names,gender,email,birthDate,deathDate,countryCitizenship,religionWorldview,placeBirth,nationality,numberChildren,residence,politicalParty,occupation,educatedAt,description,positionsHeld
0,Masagos Zulkifli Bin Masagos Mohamad,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'en', 'name': 'Masagos Zulkifli', 'n...",male,,1963-04-16T00:00:00,,Singapore,Islam,Colony of Singapore,,,,People's Action Party,politician,USC Marshall School of Business,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
0,Hri Kumar Nair,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'lzh', 'name': '哈里古瑪', 'note': 'mult...",male,,1966-06-16T00:00:00,,Singapore,Hinduism,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician and lawyer,[{'position': 'Member of the Parliament of Sin...
0,"Rajaratnam, S",http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'bn', 'name': 'এস রাজারত্নম', 'note'...",male,,1915-02-25T00:00:00,2006-02-22T00:00:00,Singapore,Hinduism,Sri Lanka,,,,People's Action Party,journalist,King's College London,Singaporean politician (1915–2006),"[{'position': 'Minister for Culture, Community..."
0,Ang Hin Kee,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'hak', 'name': 'Fùng Tín-kî', 'note'...",male,,1965-10-22T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
0,Alvin Yeo,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'ar', 'name': 'آلفين يو', 'note': 'm...",male,,1962-03-28T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,Anglo-Chinese School,Singaporean lawyer and former politician,[{'position': 'Member of the Parliament of Sin...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Dennis Tan Lip Fong,http://www.parliament.gov.sg/sites/default/fil...,[],male,,1970-01-01T00:00:00,,,,Singapore,,,,,politician,,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
0,Davinder Singh,http://www.parliament.gov.sg/sites/default/fil...,[],male,,1957-08-01T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician and lawyer,[{'position': 'Member of the Parliament of Sin...
0,Lim Biow Chuan,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'hak', 'name': 'Lìm Mèu-chhièn', 'no...",male,,1963-05-22T00:00:00,,Singapore,,Colony of Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
0,Lily Neo,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'ar', 'name': 'ليلي تيرتاسانا نيو', ...",female,,1953-08-12T00:00:00,,Singapore,,Medan,,,,People's Action Party,physician,Royal College of Surgeons in Ireland,Singaporean politician,[{'position': 'Member of the Parliament of Sin...


In [61]:
with open("singapore.json", "r") as file:
    data = json.load(file)
singapore_politicians_df = pd.DataFrame(data)

# Display the dataframe to verify contents
display(singapore_politicians_df)

Unnamed: 0,name,image,alternate_names,gender,email,birthDate,deathDate,countryCitizenship,religionWorldview,placeBirth,nationality,numberChildren,residence,politicalParty,occupation,educatedAt,description,positionsHeld
0,Masagos Zulkifli Bin Masagos Mohamad,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'en', 'name': 'Masagos Zulkifli', 'n...",male,,1963-04-16T00:00:00,,Singapore,Islam,Colony of Singapore,,,,People's Action Party,politician,USC Marshall School of Business,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
1,Hri Kumar Nair,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'lzh', 'name': '哈里古瑪', 'note': 'mult...",male,,1966-06-16T00:00:00,,Singapore,Hinduism,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician and lawyer,[{'position': 'Member of the Parliament of Sin...
2,"Rajaratnam, S",http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'bn', 'name': 'এস রাজারত্নম', 'note'...",male,,1915-02-25T00:00:00,2006-02-22T00:00:00,Singapore,Hinduism,Sri Lanka,,,,People's Action Party,journalist,King's College London,Singaporean politician (1915–2006),"[{'position': 'Minister for Culture, Community..."
3,Ang Hin Kee,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'hak', 'name': 'Fùng Tín-kî', 'note'...",male,,1965-10-22T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
4,Alvin Yeo,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'ar', 'name': 'آلفين يو', 'note': 'm...",male,,1962-03-28T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,Anglo-Chinese School,Singaporean lawyer and former politician,[{'position': 'Member of the Parliament of Sin...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Dennis Tan Lip Fong,http://www.parliament.gov.sg/sites/default/fil...,[],male,,1970-01-01T00:00:00,,,,Singapore,,,,,politician,,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
188,Davinder Singh,http://www.parliament.gov.sg/sites/default/fil...,[],male,,1957-08-01T00:00:00,,Singapore,,Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician and lawyer,[{'position': 'Member of the Parliament of Sin...
189,Lim Biow Chuan,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'hak', 'name': 'Lìm Mèu-chhièn', 'no...",male,,1963-05-22T00:00:00,,Singapore,,Colony of Singapore,,,,People's Action Party,politician,National University of Singapore,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
190,Lily Neo,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'ar', 'name': 'ليلي تيرتاسانا نيو', ...",female,,1953-08-12T00:00:00,,Singapore,,Medan,,,,People's Action Party,physician,Royal College of Surgeons in Ireland,Singaporean politician,[{'position': 'Member of the Parliament of Sin...


In [12]:
%pip install elasticsearch

Collecting elasticsearch
  Downloading elasticsearch-8.13.1-py3-none-any.whl.metadata (6.5 kB)
Collecting elastic-transport<9,>=8.13 (from elasticsearch)
  Downloading elastic_transport-8.13.0-py3-none-any.whl.metadata (3.7 kB)
Downloading elasticsearch-8.13.1-py3-none-any.whl (477 kB)
   ---------------------------------------- 0.0/477.5 kB ? eta -:--:--
   ---------------------------------------- 0.0/477.5 kB ? eta -:--:--
    --------------------------------------- 10.2/477.5 kB ? eta -:--:--
   -- ------------------------------------ 30.7/477.5 kB 325.1 kB/s eta 0:00:02
   --- ----------------------------------- 41.0/477.5 kB 279.3 kB/s eta 0:00:02
   ------ -------------------------------- 81.9/477.5 kB 456.6 kB/s eta 0:00:01
   ----------- -------------------------- 143.4/477.5 kB 652.5 kB/s eta 0:00:01
   ---------------------------------------- 477.5/477.5 kB 1.9 MB/s eta 0:00:00
Downloading elastic_transport-8.13.0-py3-none-any.whl (64 kB)
   ----------------------------------


[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [67]:
from elasticsearch import Elasticsearch, helpers
client = Elasticsearch("https://localhost:9200/",
                       basic_auth=("elastic", "*Td3NjQLaQPQQ*OYc5ES"))
print(client.ping())

True


In [68]:
index_name = "profile"
index_mappings = {
  "mappings": {
    "properties": {
      "name": {"type": "text"},
      "image": {"type": "text"},
      "alternate_names": {"type": "text"},
      "gender": {"type": "text"},
      "email": {"type": "text"},
      "birthDate": {"type": "text"},
      "deathDate": {"type": "text"},
      "countryCitizenship": {"type": "text"},
      "religionWorldview": {"type": "text"},
      "placeBirth": {"type": "text"},
      "nationality": {"type": "text"},
      "numberChildren": {"type": "long"},
      "residence": {"type": "text"},
      "politicalParty": {"type": "text"},
      "image": {"type": "text"},
      "occupation": {"type": "text"},
      "positionHeld": {"type": "text"},
      "educatedAt": {"type": "text"},
      "dateBirth": {"type": "text"},
      "aliases": {"type": "text"},
      "description": {"type": "text"},
      "positionsHeld": {"type": "nested",
        "properties": {
          "position": {"type": "text"},
          "startDate": {"type": "text"},
          "endDate": {"type": "text"},
          "replaces": {"type": "text"},
          "replacedBy": {"type": "text"}
        }
      }
    }
  }
}
if not client.indices.exists(index=index_name):
    client.indices.create(index=index_name, body=index_mappings)

In [76]:
def ingest(df):
    index_name = "profile"
    docs = df.to_dict(orient='records')
    actions = [
        {
            "_index": index_name,
            "_source": doc
        } for doc in docs
    ]    
    # Attempt to perform bulk indexing
    try:
        # Perform bulk indexing
        resp = helpers.bulk(client, actions)
        # Fetch the current document count after insertion
        current_count = client.count(index=index_name)['count']
        # Print the status of the bulk indexing
        print(f"Before: {current_count} documents")
        print(f"Added: +{resp[0]} documents")
        print(f"Now: {current_count + resp[0]} documents.")
    except helpers.BulkIndexError as e:
        # Handle errors during bulk indexing
        print(f"Error: {e}")
        for error in e.errors:
            print(f"Error for document {error['index']['_id']}: {error['index']['error']}")

# Example usage:
ingest(singapore_politicians_df)


Before: 0 documents
Added: +192 documents
Now: 192 documents.


In [78]:
ingest(get_country_data("Indonesia", "id"))

Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Before: 192 documents
Added: +272 documents
Now: 464 documents.


In [79]:
# Let's start with the country with the least politicians.
# 1. Singapore 403 192
# 2. Indonesia 662 272
# 3. Vietnam 500 350
# 4. Japan 567 504
# 5. Australia 515 515
# 6. Malaysia 1121 533
# 7. India 541 541
# 8. China 2956 2269
ingest(get_country_data("Vietnam", "vi"))

Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Before: 464 documents
Added: +350 documents
Now: 814 documents.


In [80]:
ingest(get_country_data("Japan", "ja"))

Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Before: 1318 documents
Added: +504 documents
Now: 1822 documents.


In [81]:
ingest(get_country_data("Australia", "en"))

Before: 1833 documents
Added: +515 documents
Now: 2348 documents.


In [82]:
ingest(get_country_data("Malaysia", "en"))

Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Before: 1833 documen

In [83]:
ingest(get_country_data("India", "hi"))

Before: 2366 documents
Added: +541 documents
Now: 2907 documents.


In [84]:
ingest(get_country_data("China", "zh"))

Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Error navigating JSON: 'en'
Before: 5176 documents
Added: +2269 documents
Now: 7445 documents.


In [None]:
# Demo to ingest all countries
df_list = []

for country, lang_code in lang_pairs.items():
    for politician_id in list(politicians_by_country[country])[:1]:
        politician_basic_info = pd.DataFrame([politicians_by_country[country][politician_id]])
    # print(politician_id)
    if politician_id != "null":
        politician_wikidata_info = pd.DataFrame([get_wikidata(politician_id, lang_code)])
        combined_info = pd.concat([politician_basic_info, politician_wikidata_info], axis=1)
        df_list.append(combined_info)

# Combine all dataframes into a single dataframe
display(pd.concat(df_list))


b'{"entities":{"Q5243805":{"pageid":5013477,"ns":0,"title":"Q5243805","lastrevid":2098807002,"modified":"2024-03-10T19:35:34Z","type":"item","id":"Q5243805","labels":{"en":{"language":"en","value":"De-Anne Kelly"}},"descriptions":{"en":{"language":"en","value":"Australian politician"}},"aliases":{"en":[{"language":"en","value":"De-Anne Margaret Kelly"}]},"claims":{"P102":[{"mainsnak":{"snaktype":"value","property":"P102","hash":"5c32ef211b1b3c6487b7e2a57e397e819fd2a7c5","datavalue":{"value":{"entity-type":"item","numeric-id":946040,"id":"Q946040"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q5243805$75F026A4-4101-424F-A423-24C4C88E5268","rank":"normal","references":[{"hash":"fa278ebfc458360e5aed63d5058cca83c46134f1","snaks":{"P143":[{"snaktype":"value","property":"P143","hash":"e4f6d9441d0600513c4533c672b5ab472dc73694","datavalue":{"value":{"entity-type":"item","numeric-id":328,"id":"Q328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"

Unnamed: 0,name,image,alternate_names,gender,email,birthDate,deathDate,countryCitizenship,religionWorldview,placeBirth,nationality,numberChildren,residence,politicalParty,occupation,educatedAt,dateBirth,description,positionsHeld
0,De-Anne Kelly,,[],female,,1954-03-21T00:00:00,,Australia,,Rockhampton,,,,National Party of Australia,politician,University of Queensland,+1954-03-21T00:00:00Z,Australian politician,[{'position': 'Minister for Veterans' Affairs'...
0,王树芬,,[],female,,1962-01-01T00:00:00,,People's Republic of China,,,,,,Chinese Communist Party,politician,,+1962-00-00T00:00:00Z,,[{'position': 'National People's Congress depu...
0,Masagos Zulkifli Bin Masagos Mohamad,http://www.parliament.gov.sg/sites/default/fil...,"[{'lang': 'en', 'name': 'Masagos Zulkifli', 'n...",male,,1963-04-16T00:00:00,,Singapore,Islam,Colony of Singapore,,,,People's Action Party,politician,USC Marshall School of Business,+1963-04-16T00:00:00Z,Singaporean politician,[{'position': 'Member of the Parliament of Sin...
0,"Rathore, Shri Hariom Singh",http://164.100.47.132/mpimage/photo/4655.jpg,"[{'lang': 'en', 'name': 'Hariom Singh Rathore'...",male,hariomsingh.rathore@sansad.nic.in,1957-08-09T00:00:00,,India,,Rajsamand,,,,Bharatiya Janata Party,politician,Bhupal Noble's College,+1957-08-09T00:00:00Z,,"[{'position': 'Member of the 16th Lok Sabha', ..."
0,Nguyễn Hữu Thuận (Thuận Hữu),http://dbqh.na.gov.vn/data/images/XIII/1982.jpg,"[{'name': 'Nguyễn Hữu Thuận', 'note': 'alterna...",male,,1958-09-12T00:00:00,,Vietnam,,,,,,,politician,,,,[{'position': 'member of the National Assembly...
0,KAWAMURA Takeo,http://www.shugiin.go.jp/internet/itdb_giinpro...,"[{'lang': 'en', 'name': 'Takeo Kawamura', 'not...",male,,1942-11-10T00:00:00,,Japan,,Hagi,,,,Liberal Democratic Party,politician,Keio University,+1942-11-10T00:00:00Z,日本の政治家,[{'position': 'member of the House of Represen...
