In [123]:
# install necessary packages 

!pip install wbdata
import wbdata # IF NECESSARY
import cufflinks as cf
cf.go_offline()
import pandas as pd
import numpy as np
from scipy import stats



In [124]:
# DEFINE FUNCTIONS

# this one is to get relevant migration statistics we're interested in
def migration_organizing(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    #takes all negative values, we want migration AWAY
    mig_table = data1[data1['Net Migration'] < 0] 
    
    #takes absolute value to get magnitude rather than negative
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    
    #creates new column, called 'Migration Per Capita'
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    
    #takes migration per capita and makes it a rate
    mig_table['Migration Rate (%)'] = mig_table['Migration per Capita']*100   
    
    # This line of code is initialized because we are interested in a population contingent 
    # on trends in the past ten years (rather than since '64). Additionally, we look at data 
    # from the ESG dataset, which only started being compiled in 2014'''
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table



''' This function is a manual way of creating a dataframe that is usually performed by 
    a "group_by" function. The reason for this distinction is because we used the group_by
    function, but were really struggling to perform table-operations on the group_by frame, 
    as that function creates a special type of dataframe. This method was much faster.'''

def setup_long(dataframe):
    
    #percentiles = {'values': [50, 75, 80, 90]}
    unique_country_indices = dataframe.index.unique()

    if not unique_country_indices.empty:
        
        # Create an empty DataFrame with columns
        results = pd.DataFrame(columns=['Net Migration', 'Migration Rate (%)'])

        for country_index in unique_country_indices:
            country_data = dataframe.loc[country_index]
            mig_net_avg = country_data['Net Migration'].mean()
            mig_percap_avg = country_data['Migration Rate (%)'].mean()

            # Append the computed averages to the results DataFrame
            results.loc[country_index] = [mig_net_avg, mig_percap_avg]
        
        # calculate the percentiles for the migration rates 
        results['Percentile Rank'] = results['Migration Rate (%)'].apply(
            lambda x: stats.percentileofscore(results['Migration Rate (%)'], x))

        return results 
    
    
    
''' This is the function that we use to retrieve population statistics, as outlined in the 
    [A] deliverables posted on Ed.'''


def population(year, sex, age_low, age_high, country_code):
    
    # reconstruct the strings for population codes associated 
    # as entered in "age_low" and "age_high" arguments
    
    if sex == "Male":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".MA": sex}
    elif sex == "Female":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".FE": sex}
    
    # construct new dataframe for function to index, isolating 
    # the country by the function's country-code argument
    pop_stats = wbdata.get_dataframe(column_names, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_stats = pop_stats.filter(like=str(year), axis=0)
    # return population number by indexing the function-generated 
    # dataframe by the function's 'sex' argument, and making it an integer

    return int(pop_stats[sex].iloc[0])



''' This function retrieves a dataframe for specific year, country, and indicators selected. 
    The function assumes that the argument pop_indicators has already been defined with a 
    relevant WBData code dictionary.'''

def population_dataframe(year, country_code, pop_indicators):
        
    pop_df = wbdata.get_dataframe(pop_indicators, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_df = pop_df.filter(like=str(year), axis=0)
    
    # return population dataframe by indexing  
    # by the function's 'sex' argument

    return pop_df

In [186]:
# this gets the table WE want to use (with the countries we want)

indicators_new = {"SP.POP.TOTL": "Total Population", 
                  # first, pop stats
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male", 
                  
                  # some environmental variables
                      "AG.PRD.FOOD.XD": "Food Production Index",
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)", 
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources", 
                      "SM.POP.NETM": "Net Migration",
                  "EN.H2O.BDYS.ZS": "Proportion of bodies of water with good ambient water quality", 
                  "ER.H2O.FWTL.ZS": "Annual freshwater withdrawals, total (% of internal resources)",
                  "SH.H2O.SMDW.ZS": "People using safely managed drinking water services (% of population)", 
                  "SH.STA.SMSS.ZS": "People using safely managed sanitation services (% of population)", 
                  "SH.MED.BEDS.ZS": "Hospital beds (per 1,000 people)",  
                  "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                  
                  # some political variables
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                    "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                    "GE.EST": "Government Effectiveness Estimate", 
                    "CC.EST": "Control of Corruption: Estimate", 
                    "SI.DST.FRST.20": "Income Share Held by Lowest 20%", 
                  "SD.ESR.PERF.XQ": "Economic and Social Rights Performance Score",
                  "RL.EST": "Rule of Law: Estimate",  
                  
                  # more scientific environmental variables
                  "EN.LND.LTMP.DC": "Land Surface Temperature",
                  "EN.ATM.PM25.MC.M3": "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)", 
                  "EN.CLC.CDDY.XD": "Cooling Degree Days", 
                  "EN.CLC.HEAT.XD": "Heat Index 35"}

# these do not work                
#"EN.ATM.CO2E.PCCO2": "emissions (metric tons per capita)", 
#"EN.ATM.METH.PC": "Methane emissions (kt of CO2 equivalent per capita)",
#"EN.ATM.NOXE.PC": "Nitrous oxide emissions (metric tons of CO2 equivalent per capita)",
                  



indicators_reduced = wbdata.get_dataframe(indicators_new, country = [
    'CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 
    'OMN', 'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM', 
    'PRI', 'ERI', 'FJI', 'FSM', 'TUV', 'BIH', 'GUM', 'VCT', 'ARM', 'PSS', 
    'STP', 'ZAF', 'SWZ', 'NCL', 'SLV', 'BDI', 'PAK', 'KIR', 'GRL', 'DMA', 
    'LTU', 'NPL'])

#indicators_reduced

indicators_reduced.index

MultiIndex([('American Samoa', '1960'),
            ('American Samoa', '1961'),
            ('American Samoa', '1962'),
            ('American Samoa', '1963'),
            ('American Samoa', '1964'),
            ('American Samoa', '1965'),
            ('American Samoa', '1966'),
            ('American Samoa', '1967'),
            ('American Samoa', '1968'),
            ('American Samoa', '1969'),
            ...
            ( 'Venezuela, RB', '2014'),
            ( 'Venezuela, RB', '2015'),
            ( 'Venezuela, RB', '2016'),
            ( 'Venezuela, RB', '2017'),
            ( 'Venezuela, RB', '2018'),
            ( 'Venezuela, RB', '2019'),
            ( 'Venezuela, RB', '2020'),
            ( 'Venezuela, RB', '2021'),
            ( 'Venezuela, RB', '2022'),
            ( 'Venezuela, RB', '2023')],
           names=['country', 'date'], length=2679)

In [191]:
mig_data_reduced = migration_organizing(indicators_reduced)
#mig_data_reduced.loc[['Curacao']]

In [127]:
# okay maybe we should make a population table for the 90th percentile, 95th percentile and 100th percentile country 

In [289]:
''' getting the countries with the percentile stuff '''

indicators = {"SM.POP.NETM": "Net Migration", "SP.POP.TOTL": "Total Population"}

mig_pop_data = wbdata.get_dataframe(indicators)
mig_pop_cleaned = migration_organizing(mig_pop_data)
mig_pop_data

Unnamed: 0_level_0,date,Net Migration,Total Population
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa Eastern and Southern,2022,,720859132.0
Africa Eastern and Southern,2021,-179444.0,702977106.0
Africa Eastern and Southern,2020,-48955.0,685112979.0
Africa Eastern and Southern,2019,-187410.0,667242986.0
Africa Eastern and Southern,2018,-366105.0,649757148.0
...,...,...,...
Zimbabwe,1964,-10064.0,4310332.0
Zimbabwe,1963,-9369.0,4177931.0
Zimbabwe,1962,-8931.0,4049778.0
Zimbabwe,1961,-8582.0,3925952.0


In [139]:
avgs = setup_long(mig_pop_cleaned)
avgs = avgs.sort_values(by = ['Migration Rate (%)'], ascending = False)
avgs1 = avgs[avgs['Percentile Rank'] >= 90]
country_array = avgs1.index.tolist()

# THIS is our selected "population"!! 

country_array

['Curacao',
 'Marshall Islands',
 'Qatar',
 'Syrian Arab Republic',
 'American Samoa',
 'Lebanon',
 'South Sudan',
 'Kuwait',
 'St. Martin (French part)',
 'Venezuela, RB',
 'Oman',
 'Tonga',
 'Saudi Arabia',
 'Kosovo',
 'Northern Mariana Islands',
 'Moldova',
 'Central African Republic',
 'Bahrain',
 'Guyana',
 'Samoa']

In [137]:
avgs1

Unnamed: 0,Net Migration,Migration Rate (%),Percentile Rank
Curacao,9592.0,6.092441,100.0
Marshall Islands,1901.0,4.107915,99.481865
Qatar,97400.0,3.543467,98.963731
Syrian Arab Republic,678143.5,3.455758,98.445596
American Samoa,1638.5,3.365898,97.927461
Lebanon,197743.142857,3.292441,97.409326
South Sudan,337661.571429,3.104315,96.891192
Kuwait,116678.75,2.798996,96.373057
St. Martin (French part),909.875,2.696374,95.854922
"Venezuela, RB",578053.125,1.960295,95.336788


In [192]:
# assign variables for percentiles

samoa_90 = mig_data_reduced.loc['Samoa']
samoa_90.set_index('date', inplace = True)
venezuela_95 = mig_data_reduced.loc['Venezuela, RB']
venezuela_95.set_index('date', inplace = True)
curacao_100 = mig_data_reduced.loc[['Curacao']]

In [193]:
samoa_90

Unnamed: 0_level_0,Total Population,"Population, female","Population, male","Population ages 00-04, female","Population ages 00-04, male","Population ages 0-14, female","Population ages 0-14, male","Population ages 0-14, total","Population ages 05-09, female","Population ages 05-09, male",...,Control of Corruption: Estimate,Income Share Held by Lowest 20%,Economic and Social Rights Performance Score,Rule of Law: Estimate,Land Surface Temperature,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Cooling Degree Days,Heat Index 35,Migration per Capita,Migration Rate (%)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,201757.0,98400.0,103356.0,14075.0,15163.0,37569.0,40027.0,77596.0,12303.0,13115.0,...,0.358954,,2.513174,0.797606,25.344914,11.140807,4794.35,0.0,0.015667,1.566736
2015,203571.0,99370.0,104201.0,14171.0,15274.0,37786.0,40343.0,78129.0,12443.0,13329.0,...,0.298591,,2.515336,0.744272,25.186954,11.016346,4763.78,0.0,0.01541,1.540986
2016,205544.0,100415.0,105129.0,14197.0,15298.0,38040.0,40701.0,78741.0,12641.0,13603.0,...,0.300258,,2.51667,0.869582,25.439488,11.315084,5070.01,0.06,0.013486,1.348616
2017,207630.0,101510.0,106119.0,14169.0,15264.0,38321.0,41090.0,79412.0,12888.0,13919.0,...,0.663342,,2.4793,0.938193,25.820867,10.90095,4953.73,0.0,0.013601,1.360112
2018,209701.0,102603.0,107099.0,14120.0,15210.0,38593.0,41463.0,80056.0,13128.0,14210.0,...,0.647212,,2.492263,0.912801,25.809946,11.044446,4873.81,0.0,0.013061,1.306145
2019,211905.0,103759.0,108147.0,14072.0,15155.0,38865.0,41823.0,80688.0,13321.0,14432.0,...,0.648035,,,1.162507,25.811655,11.014834,5077.3,0.0,0.011651,1.165145
2020,214929.0,105296.0,109634.0,14054.0,15124.0,39239.0,42273.0,81512.0,13488.0,14614.0,...,0.712195,,,1.009464,25.81745,,5004.33,0.0,0.004932,0.493186
2021,218764.0,107213.0,111550.0,14070.0,15128.0,39707.0,42809.0,82516.0,13620.0,14749.0,...,0.592588,,,0.884038,25.688572,,,,0.004032,0.403174


In [201]:
venezuela_95.index

Index([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021], dtype='int64', name='date')

In [197]:
curacao_100

Unnamed: 0_level_0,date,Total Population,"Population, female","Population, male","Population ages 00-04, female","Population ages 00-04, male","Population ages 0-14, female","Population ages 0-14, male","Population ages 0-14, total","Population ages 05-09, female",...,Control of Corruption: Estimate,Income Share Held by Lowest 20%,Economic and Social Rights Performance Score,Rule of Law: Estimate,Land Surface Temperature,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Cooling Degree Days,Heat Index 35,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Curacao,2019,157441.0,81416.0,76024.0,4107.0,4269.0,13730.0,14313.0,28044.0,4563.0,...,,,,,,,,,0.060924,6.092441


In [180]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.

# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)

['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']


In [223]:
male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

samoa_90_pop = wbdata.get_dataframe(variables, country=['WSM'])
venezuela_95_pop = wbdata.get_dataframe(variables, country=['VEN'])
curacao_100_pop = wbdata.get_dataframe(variables, country=['CUW'])

In [210]:
# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

In [227]:
# for plotting a single country  

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Samoa Population Pyramid, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = samoa_90_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -samoa_90_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [230]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',  
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = venezuela_95_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -venezuela_95_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [231]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',
                    width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = curacao_100_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -curacao_100_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [286]:
codes = ['CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 'OMN', 
         'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM']
 
    
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")
age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)


#date_range = pd.date_range(start=start_date, end=end_date)
#date_range

#print(start_date)

In [292]:
def pyramid_all(yrstring):
    
    for code in codes: 
        
        pop_df = wbdata.get_dataframe(variables, country=[code])
        
        py.init_notebook_mode(connected=True)

        layout = go.Layout(barmode='overlay', title = code + ' Population Pyramid, 2019',
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

        bins = [go.Bar(x = pop_df.loc[yrstring,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

               go.Bar(x = -pop_df.loc[yrstring,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )]
        
        py.iplot(dict(data=bins, layout=layout))

In [293]:
# writing a for-loop to plot all of the pyramids 


pyramid_all('2019')

In [254]:
year_begin = pd.to_datetime('2019')
year_end = pd.to_datetime('2020')
date_range = pd.Series([year_begin, year_end])
date_range

0   2019-01-01
1   2020-01-01
dtype: datetime64[ns]

In [27]:
df = df.reset_index().set_index(['country','date'])


In [32]:
l =[]
for x in range(3):
    l += [x**2]
l

[0, 1, 4]

In [30]:
[x**2 for x in range(3)]

[0, 1, 4]

In [25]:
#countries_for_plot = df.index[0]

#for country in countries_for_plot


import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = '2016'

bins = [go.Bar(x = df.xs(year,level='date').query(f"country=={country}").filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men {:d}'.format(year),
               #marker=dict(color='purple'),
               hoverinfo='skip',
               opacity=0.5
               )
             for country in countries_for_plot]

bins += [go.Bar(x = -df.xs(year,level='date'.query(f"country=={country}").filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
            for country in countries_for_plot

py.iplot(dict(data=bins, layout=layout))