In [24]:
## If import fails with "ModuleNotFoundError"
## uncomment below & try again
!pip install wbdata



In [25]:
# import packages
import pandas as pd
import numpy as np
import wbdata

In [26]:
## Population stats

def population(year,sex,age_range,place):
    age_ranges = []
    for i in range(0,80,5):
        if i >= age_range[0] and i + 4 <= age_range[1]:
            age_ranges.append(f"{i:02d}"+f"{i+4:02d}")
        if (i <= age_range[0] <= i + 4) or (i <= age_range[1] <= i + 4):
            age_ranges.append(f"{i:02d}"+f"{i+4:02d}")
    if age_range[1] >= 80:
        age_ranges.append("80UP")
    if sex == 'Male':
        variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
    if sex == 'Female':
        variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}
    df = wbdata.get_dataframe(variables,country="WLD")
    population_sum = sum(df.filter(items = [str(year)], axis=0).iloc[0])
    print('In',year,', there are',population_sum,sex,'aged',age_range[0],'to',age_range[1],'living in',place)
    return population_sum

In [27]:
## Population df

def population_df(region):
    """Takes in 3 letter country code and returns a pandas DataFrame indexed by region and date, 
    with columns giving counts of people in different age-sex groups."""
    
    # Get all the indicator labels by age-sex bin and store in variable_labels dict
    variable_labels = {}
    for i in range(0, 80, 5):
        m_age = {"SP.POP." + f"{i:02d}" + f"{i+4:02d}" + ".MA": "M:" + f"{i:02d}" + "-" + f"{i+4:02d}"}
        f_age = {"SP.POP." + f"{i:02d}" + f"{i+4:02d}" + ".FE": "F:" + f"{i:02d}" + "-" + f"{i+4:02d}"}
        variable_labels.update(m_age)
        variable_labels.update(f_age)
    variable_labels.update({"SP.POP." + "80UP" + ".MA": "M:" + "80-UP"})
    variable_labels.update({"SP.POP." + "80UP" + ".FE": "F:" + "80-UP"})
    
    # Get population data for region input
    df = wbdata.get_dataframe(variable_labels, country = region)
    
    # Format DF and indices
    df.reset_index(inplace = True)
    df["Region"] = region
    df["date"] = df["date"]
    df.set_index(["Region", "date"], inplace = True)
    return df

In [28]:
## Population df example
df = population_df('WLD')
df

Unnamed: 0_level_0,Unnamed: 1_level_0,M:00-04,F:00-04,M:05-09,F:05-09,M:10-14,F:10-14,M:15-19,F:15-19,M:20-24,F:20-24,...,M:60-64,F:60-64,M:65-69,F:65-69,M:70-74,F:70-74,M:75-79,F:75-79,M:80-UP,F:80-UP
Region,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
WLD,2021,344650023.0,325642464.0,351823148.0,330431921.0,340134768.0,318493196.0,321211465.0,301115793.0,309096185.0,290347797.0,...,154828871.0,165954976.0,129373392.0,145660097.0,91921639.0,109247654.0,56383161.0,71828549.0,58371269.0,95845624.0
WLD,2020,348843527.0,329071642.0,350861843.0,329225029.0,336694403.0,315221758.0,319186586.0,299112617.0,307711749.0,289115754.0,...,154455368.0,165545454.0,126216520.0,141973645.0,87919280.0,103935445.0,55253441.0,70462837.0,57363155.0,94014398.0
WLD,2019,351934193.0,331507104.0,349385852.0,327560200.0,332876020.0,311628516.0,317132294.0,297053915.0,306772989.0,288370783.0,...,153577832.0,164724422.0,122089581.0,137228598.0,83320876.0,98120908.0,54369418.0,69603886.0,55792406.0,91536492.0
WLD,2018,354088174.0,333098585.0,347389724.0,325470037.0,329029796.0,308035159.0,315050425.0,295004074.0,306524173.0,288393154.0,...,151598168.0,162791878.0,117713443.0,132182084.0,79116689.0,92955177.0,53385227.0,68827619.0,53954172.0,88876981.0
WLD,2017,355156429.0,333723909.0,344546700.0,322664475.0,325497715.0,304742832.0,313205095.0,293243362.0,306617594.0,288853402.0,...,148260664.0,159421927.0,113124371.0,126840509.0,75382806.0,88462740.0,52537112.0,68171738.0,52078457.0,86148342.0
WLD,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WLD,1964,239069096.0,228738468.0,208246346.0,199318629.0,184392252.0,176757980.0,148807806.0,142871756.0,127455138.0,122964015.0,...,43478702.0,49896011.0,30943044.0,38267820.0,20060575.0,26880855.0,11898896.0,17371906.0,7760556.0,13009636.0
WLD,1963,230599710.0,220693247.0,206420446.0,197666059.0,178823880.0,171378294.0,142714269.0,136941008.0,127354732.0,122753547.0,...,42405818.0,48676464.0,30136635.0,37307082.0,19767119.0,26387818.0,11597444.0,16790332.0,7593928.0,12587661.0
WLD,1962,224041736.0,214454487.0,202783378.0,194296395.0,172741508.0,165520684.0,137464629.0,131785923.0,127460456.0,122722609.0,...,41315095.0,47453135.0,29309265.0,36251659.0,19564704.0,26019479.0,11276494.0,16202664.0,7473921.0,12237753.0
WLD,1961,222196886.0,212714001.0,198854626.0,190620416.0,166558402.0,159479034.0,133606813.0,128070910.0,127266768.0,122408085.0,...,40297521.0,46346218.0,28589999.0,35283959.0,19386067.0,25648744.0,11014794.0,15739382.0,7361753.0,11915207.0


In [29]:
## Population pyramid func 

def pop_pyramid(df, year):
    df.reset_index(level='Region', drop=True, inplace=True)
    
    women_bins = -df.loc[str(year),:].filter(regex="F:").values
    men_bins = df.loc[str(year),:].filter(regex="M:").values
    y = list(range(0, 100, 10))

    layout = go.Layout(yaxis=go.layout.YAxis(title='Age'),
                   xaxis=go.layout.XAxis(
                       title='Number'),
                   barmode='overlay',
                   bargap=0.1)

    data = [go.Bar(y=y,
               x=men_bins,
               orientation='h',
               name='Men',
               hoverinfo='x',
               marker=dict(color='powderblue')
               ),
        go.Bar(y=y,
               x=women_bins,
               orientation='h',
               name='Women',
               text=-1 * women_bins.astype('int'),
               hoverinfo='text',
               marker=dict(color='pink')
               )]

    return py.iplot(dict(data=data, layout=layout))  


In [30]:
## Population pyramid example
df = population_df('AFE')
pop_pyramid(df, 2020)

In [32]:
### sliders 

# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.
# We construct a list of age-ranges.
# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)


['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']


In [33]:
male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}
variables = male_variables
variables.update(female_variables)

In [37]:
df = wbdata.get_dataframe(variables,country="WLD", cache = False)

In [38]:
import plotly.graph_objs as go
import ipywidgets
from ipywidgets import interactive, HBox, VBox

def helper(year):
    py.init_notebook_mode(connected=True)
    layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))
    bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='green'),
               hoverinfo='skip'
               ),
        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
    py.iplot(dict(data=bins, layout=layout))
ipywidgets.interact(helper, year=(1980, 2018, 1))

interactive(children=(IntSlider(value=1999, description='year', max=2018, min=1980), Output()), _dom_classes=(…

<function __main__.helper(year)>

In [40]:
### animated

import plotly.graph_objects as go
# create the scatter plot
# points = go.Scatter(x=X.flatten(), y=y, mode='markers')
# create initial pyramid
year = 2020
bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
           y = [int(s[:2])+1 for s in age_ranges],
           orientation='h',
           name='Men',
           marker=dict(color='green'),
           hoverinfo='skip'
           ),
    go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
           y=[int(s[:2])+1 for s in age_ranges],
           orientation='h',
           name='Women',
           marker=dict(color='pink'),
           hoverinfo='skip',
           )
    ]
# create a layout with out title (optional)

layout = go.Layout(barmode='overlay',
               yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
               xaxis=go.layout.XAxis(title='Number'))
# combine the graph_objects into a figure
fig = go.Figure(data=bins)

                                 
# to see what we have so far
fig.show()

In [45]:
# create a list of frames
frames = []
# create a frame for every line y
for year in df.index:
    # update the pyramid
    bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='green'),
               hoverinfo='skip'
               ),
        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
    # create the button
    button = {
        "type": "buttons",
        "buttons": [
            {
                "label": "Play",
                "method": "animate",
                "args": [None, {"frame": {"duration": 20}}],
            }
        ],
    }
    # add the button to the layout and update the 
    # title to show the gradient descent step
    layout = go.Layout(updatemenus=[button], 
                       title_text=f"Population Pyramid for {year}")
    # create a frame object
    frame = go.Frame(
        data= bins, 
        layout=go.Layout(title_text=f"Population Pyramid for {year}")
    )
# add the frame object to the frames list
    frames.append(frame)

In [46]:
# combine the graph_objects into a figure
fig = go.Figure(data=bins,
                frames=frames,
                layout = layout)
                                 
# show our animation!
fig.show()