In [1]:
!pip install wbdata

import wbdata
import numpy as np
import re 

Collecting wbdata
  Using cached wbdata-0.3.0-py3-none-any.whl (14 kB)
Collecting tabulate>=0.8.5
  Using cached tabulate-0.8.7-py3-none-any.whl (24 kB)
Installing collected packages: tabulate, wbdata
Successfully installed tabulate-0.8.7 wbdata-0.3.0


In [2]:
# Return list of all country/region codes:
wbdata.get_country()

id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFG   Afghanistan
AFR   Africa
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East & North Africa (IBRD-only countries)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN   Bhutan
BWA   Botswana
CAA   Sub-Saharan Africa (IFC classification)
CAF   Centr

To see possible datasets we can access via the API, use `get_source()`



In [3]:
wbdata.get_source()

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  24  Poverty and Equity
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclus

In [4]:
SOURCE = 40 # "Population estimates and projections"

indicators_40 = wbdata.get_indicator(source=SOURCE)
indicators_40

id                 name
-----------------  -------------------------------------------------------------------
SH.DTH.0509        Number of deaths ages 5-9 years
SH.DTH.1014        Number of deaths ages 10-14 years
SH.DTH.1519        Number of deaths ages 15-19 years
SH.DTH.2024        Number of deaths ages 20-24 years
SH.DTH.IMRT        Number of infant deaths
SH.DTH.IMRT.FE     Number of infant deaths, female
SH.DTH.IMRT.MA     Number of infant deaths, male
SH.DTH.MORT        Number of under-five deaths
SH.DTH.MORT.FE     Number of under-five deaths, female
SH.DTH.MORT.MA     Number of under-five deaths, male
SH.DTH.NMRT        Number of neonatal deaths
SH.DYN.0509        Probability of dying among children ages 5-9 years (per 1,000)
SH.DYN.1014        Probability of dying among adolescents ages 10-14 years (per 1,000)
SH.DYN.1519        Probability of dying among adolescents ages 15-19 years (per 1,000)
SH.DYN.2024        Probability of dying among youth ages 20-24 years (per 1,000)

In [5]:
# Takes in an indicator object and returns a dictionary of filtered column labels.
def find_labels(indicators):
    labels = {}

    for i in range(len(indicators)):
        col_id = indicators[i]['id']
        col_name = indicators[i]['name']
        labels[col_id] = col_name

    # Select items in the list that contain relevant population data by filtering through string patterns.
    r = re.compile("(SP.POP).[\d]{2}[A-Z0-9]{2}.[MAFE]{2}$")
    col_keys = list(filter(r.match, labels))
    col_keys

    labels_filtered = {}
    for key, value in labels.items():
        if key in col_keys:
            labels_filtered[key] = value
            
    return labels_filtered

In [6]:
variable_labels = find_labels(indicators_40)
variable_labels

{'SP.POP.0004.FE': 'Population ages 00-04, female',
 'SP.POP.0004.MA': 'Population ages 00-04, male',
 'SP.POP.0509.FE': 'Population ages 05-09, female',
 'SP.POP.0509.MA': 'Population ages 05-09, male',
 'SP.POP.1014.FE': 'Population ages 10-14, female',
 'SP.POP.1014.MA': 'Population ages 10-14, male',
 'SP.POP.1519.FE': 'Population ages 15-19, female',
 'SP.POP.1519.MA': 'Population ages 15-19, male',
 'SP.POP.2024.FE': 'Population ages 20-24, female',
 'SP.POP.2024.MA': 'Population ages 20-24, male',
 'SP.POP.2529.FE': 'Population ages 25-29, female',
 'SP.POP.2529.MA': 'Population ages 25-29, male',
 'SP.POP.3034.FE': 'Population ages 30-34, female',
 'SP.POP.3034.MA': 'Population ages 30-34, male',
 'SP.POP.3539.FE': 'Population ages 35-39, female',
 'SP.POP.3539.MA': 'Population ages 35-39, male',
 'SP.POP.4044.FE': 'Population ages 40-44, female',
 'SP.POP.4044.MA': 'Population ages 40-44, male',
 'SP.POP.4549.FE': 'Population ages 45-49, female',
 'SP.POP.4549.MA': 'Population

## Population function

In [7]:
# Function that takes in a SEX ("Male", "Female"), ... , COUNTRY=, .. and returns a statistic for the given function arguments

def population(sex, year, age, country):
    if sex == "Male":
        variable_labels = {"SP.POP." + str(age[0])+str(age[1]) + ".MA": sex}
    elif sex=="Female":
        variable_labels = {"SP.POP." + str(age[0])+str(age[1]) + ".FE": sex}
    pop_stats = wbdata.get_dataframe(variable_labels, country=country)
    pop_stats = pop_stats.filter(like=str(year), axis=0)
    return pop_stats[sex][0]

In [8]:
population("Female", 2010, (15,19), "CHN")

45907253.0

## Population DataFrames


In [9]:
def population_dataframes(indicators):
    def find_labels(indicators):
        labels = {}
        
        for i in range(len(indicators)):
            col_id = indicators[i]['id']
            col_name = indicators[i]['name']
            labels[col_id] = col_name

        # Select items in the list that contain relevant population data by filtering through string patterns.
        r = re.compile("(SP.POP).[\d]{2}[A-Z0-9]{2}.[MAFE]{2}$")
        col_keys = list(filter(r.match, labels))
        col_keys

        labels_filtered = {}
        
        for key, value in labels.items():
            if key in col_keys:
                labels_filtered[key] = value
        return labels_filtered

    variable_labels = find_labels(indicators_40)
    return wbdata.get_dataframe(variable_labels)

In [10]:
pop_df = population_dataframes(indicators_40)
pop_df

Unnamed: 0_level_0,Unnamed: 1_level_0,"Population ages 00-04, female","Population ages 00-04, male","Population ages 05-09, female","Population ages 05-09, male","Population ages 10-14, female","Population ages 10-14, male","Population ages 15-19, female","Population ages 15-19, male","Population ages 20-24, female","Population ages 20-24, male",...,"Population ages 60-64, female","Population ages 60-64, male","Population ages 65-69, female","Population ages 65-69, male","Population ages 70-74, female","Population ages 70-74, male","Population ages 75-79, female","Population ages 75-79, male","Population ages 80 and above, female","Population ages 80 and above, male"
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arab World,2020,,,,,,,,,,,...,,,,,,,,,,
Arab World,2019,24854512.0,26008786.0,23284558.0,24402277.0,20147651.0,21082648.0,18115624.0,18928186.0,17449218.0,18738921.0,...,5379212.0,5608048.0,4114802.0,3992940.0,2772099.0,2503846.0,1867325.0,1530529.0,1840519.0,1307203.0
Arab World,2018,24868488.0,26042050.0,22673632.0,23761021.0,19686652.0,20576105.0,17945291.0,18778592.0,17423893.0,18736142.0,...,5196799.0,5392111.0,3904424.0,3785507.0,2657162.0,2389145.0,1823473.0,1489597.0,1799447.0,1284554.0
Arab World,2017,24842442.0,26028972.0,22056667.0,23103336.0,19290904.0,20147737.0,17827249.0,18664707.0,17393546.0,18663158.0,...,5028670.0,5193784.0,3697402.0,3580807.0,2567872.0,2295115.0,1793010.0,1459539.0,1740291.0,1242957.0
Arab World,2016,24639070.0,25823833.0,21484287.0,22488538.0,18957477.0,19799808.0,17744205.0,18563417.0,17346155.0,18518692.0,...,4861410.0,5002559.0,3511525.0,3392395.0,2497768.0,2217070.0,1770021.0,1435406.0,1665086.0,1184984.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,1964,420464.0,425512.0,340424.0,341934.0,270870.0,272226.0,190024.0,194836.0,176449.0,180409.0,...,41766.0,37141.0,32354.0,27439.0,22607.0,18528.0,13345.0,9977.0,7753.0,5502.0
Zimbabwe,1963,401535.0,406613.0,333137.0,334139.0,251052.0,253308.0,187099.0,192015.0,170774.0,174959.0,...,40969.0,36277.0,31629.0,26891.0,22077.0,17961.0,12917.0,9716.0,7685.0,5451.0
Zimbabwe,1962,383481.0,388511.0,323885.0,324520.0,231145.0,234396.0,187370.0,192241.0,164078.0,168481.0,...,40220.0,35453.0,30988.0,26442.0,21617.0,17382.0,12472.0,9482.0,7463.0,5232.0
Zimbabwe,1961,370358.0,374911.0,310314.0,310962.0,214092.0,218241.0,187305.0,192190.0,157456.0,161950.0,...,39467.0,34663.0,30375.0,25980.0,21147.0,16803.0,11999.0,9214.0,7054.0,4864.0


### Cleaning the data

In [11]:
# Change "date" index to type INT.
pop_df.index = pop_df.index.set_levels(pop_df.index.levels[1].astype(int), level=1)

# Rename MultiIndex column "date" to "year"
pop_df = pop_df.rename_axis(index=['country', 'year'])

# Delete 2020 row for every country code
pop_df = pop_df[~pop_df.index.get_level_values('year').isin([2020])]
pop_df

Unnamed: 0_level_0,Unnamed: 1_level_0,"Population ages 00-04, female","Population ages 00-04, male","Population ages 05-09, female","Population ages 05-09, male","Population ages 10-14, female","Population ages 10-14, male","Population ages 15-19, female","Population ages 15-19, male","Population ages 20-24, female","Population ages 20-24, male",...,"Population ages 60-64, female","Population ages 60-64, male","Population ages 65-69, female","Population ages 65-69, male","Population ages 70-74, female","Population ages 70-74, male","Population ages 75-79, female","Population ages 75-79, male","Population ages 80 and above, female","Population ages 80 and above, male"
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arab World,2019,24854512.0,26008786.0,23284558.0,24402277.0,20147651.0,21082648.0,18115624.0,18928186.0,17449218.0,18738921.0,...,5379212.0,5608048.0,4114802.0,3992940.0,2772099.0,2503846.0,1867325.0,1530529.0,1840519.0,1307203.0
Arab World,2018,24868488.0,26042050.0,22673632.0,23761021.0,19686652.0,20576105.0,17945291.0,18778592.0,17423893.0,18736142.0,...,5196799.0,5392111.0,3904424.0,3785507.0,2657162.0,2389145.0,1823473.0,1489597.0,1799447.0,1284554.0
Arab World,2017,24842442.0,26028972.0,22056667.0,23103336.0,19290904.0,20147737.0,17827249.0,18664707.0,17393546.0,18663158.0,...,5028670.0,5193784.0,3697402.0,3580807.0,2567872.0,2295115.0,1793010.0,1459539.0,1740291.0,1242957.0
Arab World,2016,24639070.0,25823833.0,21484287.0,22488538.0,18957477.0,19799808.0,17744205.0,18563417.0,17346155.0,18518692.0,...,4861410.0,5002559.0,3511525.0,3392395.0,2497768.0,2217070.0,1770021.0,1435406.0,1665086.0,1184984.0
Arab World,2015,24167710.0,25332140.0,20973429.0,21943480.0,18688198.0,19524584.0,17679754.0,18449576.0,17280547.0,18340767.0,...,4687002.0,4810284.0,3358490.0,3230460.0,2442790.0,2151768.0,1748502.0,1411565.0,1576085.0,1113708.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,1964,420464.0,425512.0,340424.0,341934.0,270870.0,272226.0,190024.0,194836.0,176449.0,180409.0,...,41766.0,37141.0,32354.0,27439.0,22607.0,18528.0,13345.0,9977.0,7753.0,5502.0
Zimbabwe,1963,401535.0,406613.0,333137.0,334139.0,251052.0,253308.0,187099.0,192015.0,170774.0,174959.0,...,40969.0,36277.0,31629.0,26891.0,22077.0,17961.0,12917.0,9716.0,7685.0,5451.0
Zimbabwe,1962,383481.0,388511.0,323885.0,324520.0,231145.0,234396.0,187370.0,192241.0,164078.0,168481.0,...,40220.0,35453.0,30988.0,26442.0,21617.0,17382.0,12472.0,9482.0,7463.0,5232.0
Zimbabwe,1961,370358.0,374911.0,310314.0,310962.0,214092.0,218241.0,187305.0,192190.0,157456.0,161950.0,...,39467.0,34663.0,30375.0,25980.0,21147.0,16803.0,11999.0,9214.0,7054.0,4864.0


## Population Pyramids

## Population Maps

In [33]:
!pip install geopandas
import geopandas as geo





KeyError: 'MISSING'

In [None]:
geo