In [1]:
%pip install wbdata

import wbdata

Note: you may need to restart the kernel to use updated packages.


Key '4554893263572836092' not in persistent cache.


In [5]:
import wbdata

# Return list of all country/region codes:
wbdata.get_countries()


id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFE   Africa Eastern and Southern
AFG   Afghanistan
AFR   Africa
AFW   Africa Western and Central
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East, North Africa, Afghanistan & Pakistan (IBRD only)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN  

In [8]:
SOURCE = 40 # "Population estimates and projections

indicators = wbdata.get_indicators(source=SOURCE)
indicators



id                 name
-----------------  -------------------------------------------------------------------
SH.DTH.0509        Number of deaths ages 5-9 years
SH.DTH.0514        Number of deaths ages 5-14 years
SH.DTH.1014        Number of deaths ages 10-14 years
SH.DTH.1019        Number of deaths ages 10-19 years
SH.DTH.1519        Number of deaths ages 15-19 years
SH.DTH.2024        Number of deaths ages 20-24 years
SH.DTH.IMRT        Number of infant deaths
SH.DTH.IMRT.FE     Number of infant deaths, female
SH.DTH.IMRT.MA     Number of infant deaths, male
SH.DTH.MORT        Number of under-five deaths
SH.DTH.MORT.FE     Number of under-five deaths, female
SH.DTH.MORT.MA     Number of under-five deaths, male
SH.DTH.NMRT        Number of neonatal deaths
SH.DYN.0509        Probability of dying among children ages 5-9 years (per 1,000)
SH.DYN.0514        Probability of dying at age 5-14 years (per 1,000 children age 5)
SH.DYN.1014        Probability of dying among adolescents ages 1

In [22]:
import logging
logging.getLogger('shelved_cache').setLevel(logging.ERROR)

In [23]:
def population(year=2020, sex="People", age_range=(0, 100), place="USA"):
    """
    Returns the population count.
    If sex is 'People', it sums Male + Female automatically.
    """
    
    # --- STRATEGY: Recursion for 'People' ---
    # The World Bank often misses 'Total' codes for specific ages.
    # So we calculate it: People = Male + Female
    if sex == "People":
        males = population(year, "Male", age_range, place)
        females = population(year, "Female", age_range, place)
        return males + females

    # --- 1. Set up the Gender Suffix ---
    # We only handle Male/Female here because 'People' is handled above
    if sex == "Male":
        suffix = ".MA"
    elif sex == "Female":
        suffix = ".FE"
    else:
        return 0 # Safety catch
        
    # --- 2. Map Ages to World Bank Codes ---
    # Logic: Start Age -> Code Middle (e.g., 0 -> "0004")
    ages = {
        0: "0004", 5: "0509", 10: "1014", 15: "1519", 20: "2024",
        25: "2529", 30: "3034", 35: "3539", 40: "4044", 45: "4549",
        50: "5054", 55: "5559", 60: "6064", 65: "6569", 70: "7074",
        75: "7579", 80: "80UP"
    }
    
    # --- 3. Build the Request List ---
    variable_labels = {}
    low, high = age_range
    
    for start_age in ages:
        # Check if this bucket is inside our range
        if start_age >= low and start_age < high:
            # Code format: SP.POP + AGE + SUFFIX
            code = "SP.POP." + ages[start_age] + suffix
            variable_labels[code] = "Count"
            
    # If the range was weird (like 22-23) and we found no buckets, return 0
    if not variable_labels:
        return 0

    # --- 4. Fetch Data ---
    # We use source=2 (WDI) to be safe, though wbdata usually guesses right
    df = wbdata.get_dataframe(variable_labels, country=place)
    
    # Clean up dates (convert string "2020" to int 2020)
    df.index = df.index.astype(int)
    
    # Return the sum for that year
    # We use .get() to avoid crashing if the year is missing
    try:
        return int(df.loc[year].sum())
    except KeyError:
        return 0

In [24]:
print(population())

12043345
