In [1]:
# install necessary packages 

!pip install wbdata
import wbdata # IF NECESSARY
import cufflinks as cf
cf.go_offline()
import pandas as pd
import numpy as np
from scipy import stats



In [2]:
# DEFINE FUNCTIONS

# this one is to get relevant migration statistics we're interested in
def migration_organizing(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    #takes all negative values, we want migration AWAY
    mig_table = data1[data1['Net Migration'] < 0] 
    
    #takes absolute value to get magnitude rather than negative
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    
    #creates new column, called 'Migration Per Capita'
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    
    #takes migration per capita and makes it a rate
    mig_table['Migration Rate (%)'] = mig_table['Migration per Capita']*100   
    
    # This line of code is initialized because we are interested in a population contingent 
    # on trends in the past ten years (rather than since '64). Additionally, we look at data 
    # from the ESG dataset, which only started being compiled in 2014'''
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table



''' This function is a manual way of creating a dataframe that is usually performed by 
    a "group_by" function. The reason for this distinction is because we used the group_by
    function, but were really struggling to perform table-operations on the group_by frame, 
    as that function creates a special type of dataframe. This method was much faster.'''

def setup_long(dataframe):
    
    #percentiles = {'values': [50, 75, 80, 90]}
    unique_country_indices = dataframe.index.unique()

    if not unique_country_indices.empty:
        
        # Create an empty DataFrame with columns
        results = pd.DataFrame(columns=['Net Migration', 'Migration Rate (%)'])

        for country_index in unique_country_indices:
            country_data = dataframe.loc[country_index]
            mig_net_avg = country_data['Net Migration'].mean()
            mig_percap_avg = country_data['Migration Rate (%)'].mean()

            # Append the computed averages to the results DataFrame
            results.loc[country_index] = [mig_net_avg, mig_percap_avg]
        
        # calculate the percentiles for the migration rates 
        results['Percentile Rank'] = results['Migration Rate (%)'].apply(
            lambda x: stats.percentileofscore(results['Migration Rate (%)'], x))

        return results 
    
    
    
''' This is the function that we use to retrieve population statistics, as outlined in the 
    [A] deliverables posted on Ed.'''


def population(year, sex, age_low, age_high, country_code):
    
    # reconstruct the strings for population codes associated 
    # as entered in "age_low" and "age_high" arguments
    
    if sex == "Male":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".MA": sex}
    elif sex == "Female":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".FE": sex}
    
    # construct new dataframe for function to index, isolating 
    # the country by the function's country-code argument
    pop_stats = wbdata.get_dataframe(column_names, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_stats = pop_stats.filter(like=str(year), axis=0)
    # return population number by indexing the function-generated 
    # dataframe by the function's 'sex' argument, and making it an integer

    return int(pop_stats[sex].iloc[0])



''' This function retrieves a dataframe for specific year, country, and indicators selected. 
    The function assumes that the argument pop_indicators has already been defined with a 
    relevant WBData code dictionary.'''

def population_dataframe(year, country_code, pop_indicators):
        
    pop_df = wbdata.get_dataframe(pop_indicators, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_df = pop_df.filter(like=str(year), axis=0)
    
    # return population dataframe by indexing  
    # by the function's 'sex' argument

    return pop_df

In [3]:
# this gets the table WE want to use (with the countries we want)

indicators_new = {"SP.POP.TOTL": "Total Population", 
                  # first, pop stats
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male", 
                  
                  # some environmental variables
                      "AG.PRD.FOOD.XD": "Food Production Index",
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)", 
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources", 
                      "SM.POP.NETM": "Net Migration",
                  "EN.H2O.BDYS.ZS": "Proportion of bodies of water with good ambient water quality", 
                  "ER.H2O.FWTL.ZS": "Annual freshwater withdrawals, total (% of internal resources)",
                  "SH.H2O.SMDW.ZS": "People using safely managed drinking water services (% of population)", 
                  "SH.STA.SMSS.ZS": "People using safely managed sanitation services (% of population)", 
                  "SH.MED.BEDS.ZS": "Hospital beds (per 1,000 people)",  
                  "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                  
                  # some political variables
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                    "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                    "GE.EST": "Government Effectiveness Estimate", 
                    "CC.EST": "Control of Corruption: Estimate", 
                    "SI.DST.FRST.20": "Income Share Held by Lowest 20%", 
                  "SD.ESR.PERF.XQ": "Economic and Social Rights Performance Score",
                  "RL.EST": "Rule of Law: Estimate",  
                  
                  # more scientific environmental variables
                  "EN.LND.LTMP.DC": "Land Surface Temperature",
                  "EN.ATM.PM25.MC.M3": "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)", 
                  "EN.CLC.CDDY.XD": "Cooling Degree Days", 
                  "EN.CLC.HEAT.XD": "Heat Index 35"}

# these do not work                
#"EN.ATM.CO2E.PCCO2": "emissions (metric tons per capita)", 
#"EN.ATM.METH.PC": "Methane emissions (kt of CO2 equivalent per capita)",
#"EN.ATM.NOXE.PC": "Nitrous oxide emissions (metric tons of CO2 equivalent per capita)",
                  



indicators_reduced = wbdata.get_dataframe(indicators_new, country = [
    'CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 
    'OMN', 'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM', 
    'PRI', 'ERI', 'FJI', 'FSM', 'TUV', 'BIH', 'GUM', 'VCT', 'ARM', 'PSS', 
    'STP', 'ZAF', 'SWZ', 'NCL', 'SLV', 'BDI', 'PAK', 'KIR', 'GRL', 'DMA', 
    'LTU', 'NPL'])

#indicators_reduced

indicators_reduced.index

MultiIndex([('American Samoa', '1960'),
            ('American Samoa', '1961'),
            ('American Samoa', '1962'),
            ('American Samoa', '1963'),
            ('American Samoa', '1964'),
            ('American Samoa', '1965'),
            ('American Samoa', '1966'),
            ('American Samoa', '1967'),
            ('American Samoa', '1968'),
            ('American Samoa', '1969'),
            ...
            ( 'Venezuela, RB', '2014'),
            ( 'Venezuela, RB', '2015'),
            ( 'Venezuela, RB', '2016'),
            ( 'Venezuela, RB', '2017'),
            ( 'Venezuela, RB', '2018'),
            ( 'Venezuela, RB', '2019'),
            ( 'Venezuela, RB', '2020'),
            ( 'Venezuela, RB', '2021'),
            ( 'Venezuela, RB', '2022'),
            ( 'Venezuela, RB', '2023')],
           names=['country', 'date'], length=2679)

In [4]:
mig_data_reduced = migration_organizing(indicators_reduced)
#mig_data_reduced.loc[['Curacao']]

In [5]:
# okay maybe we should make a population table for the 90th percentile, 95th percentile and 100th percentile country 

In [213]:
''' getting the countries with the percentile stuff '''

indicators = {"SM.POP.NETM": "Net Migration", "SP.POP.TOTL": "Total Population"}

mig_pop_data = wbdata.get_dataframe(indicators)
mig_pop_cleaned = migration_organizing(mig_pop_data)
mig_pop_data
mig_pop_cleaned

Unnamed: 0_level_0,date,Net Migration,Total Population,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Africa Eastern and Southern,2021,179444.0,702977106.0,0.000255,0.025526
Africa Eastern and Southern,2020,48955.0,685112979.0,0.000071,0.007146
Africa Eastern and Southern,2019,187410.0,667242986.0,0.000281,0.028087
Africa Eastern and Southern,2018,366105.0,649757148.0,0.000563,0.056345
Africa Eastern and Southern,2017,343075.0,632746570.0,0.000542,0.054220
...,...,...,...,...,...
Zimbabwe,2018,59918.0,15052184.0,0.003981,0.398068
Zimbabwe,2017,59918.0,14751101.0,0.004062,0.406193
Zimbabwe,2016,59918.0,14452704.0,0.004146,0.414580
Zimbabwe,2015,59918.0,14154937.0,0.004233,0.423301


In [7]:
avgs = setup_long(mig_pop_cleaned)
avgs = avgs.sort_values(by = ['Migration Rate (%)'], ascending = False)
avgs1 = avgs[avgs['Percentile Rank'] >= 90]
country_array = avgs1.index.tolist()

# THIS is our selected "population"!! 

country_array

['Curacao',
 'Marshall Islands',
 'Qatar',
 'Syrian Arab Republic',
 'American Samoa',
 'Lebanon',
 'South Sudan',
 'Kuwait',
 'St. Martin (French part)',
 'Venezuela, RB',
 'Oman',
 'Tonga',
 'Saudi Arabia',
 'Kosovo',
 'Northern Mariana Islands',
 'Moldova',
 'Central African Republic',
 'Bahrain',
 'Guyana',
 'Samoa']

In [8]:
avgs1

Unnamed: 0,Net Migration,Migration Rate (%),Percentile Rank
Curacao,9592.0,6.092441,100.0
Marshall Islands,1901.0,4.107915,99.481865
Qatar,97400.0,3.543467,98.963731
Syrian Arab Republic,678143.5,3.455758,98.445596
American Samoa,1638.5,3.365898,97.927461
Lebanon,197743.142857,3.292441,97.409326
South Sudan,337661.571429,3.104315,96.891192
Kuwait,116678.75,2.798996,96.373057
St. Martin (French part),909.875,2.696374,95.854922
"Venezuela, RB",578053.125,1.960295,95.336788


In [9]:
# assign variables for percentiles

samoa_90 = mig_data_reduced.loc['Samoa']
samoa_90.set_index('date', inplace = True)
venezuela_95 = mig_data_reduced.loc['Venezuela, RB']
venezuela_95.set_index('date', inplace = True)
curacao_100 = mig_data_reduced.loc[['Curacao']]

In [10]:
samoa_90

Unnamed: 0_level_0,Total Population,"Population, female","Population, male","Population ages 00-04, female","Population ages 00-04, male","Population ages 0-14, female","Population ages 0-14, male","Population ages 0-14, total","Population ages 05-09, female","Population ages 05-09, male",...,Control of Corruption: Estimate,Income Share Held by Lowest 20%,Economic and Social Rights Performance Score,Rule of Law: Estimate,Land Surface Temperature,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Cooling Degree Days,Heat Index 35,Migration per Capita,Migration Rate (%)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,201757.0,98400.0,103356.0,14075.0,15163.0,37569.0,40027.0,77596.0,12303.0,13115.0,...,0.358954,,2.513174,0.797606,25.344914,11.140807,4794.35,0.0,0.015667,1.566736
2015,203571.0,99370.0,104201.0,14171.0,15274.0,37786.0,40343.0,78129.0,12443.0,13329.0,...,0.298591,,2.515336,0.744272,25.186954,11.016346,4763.78,0.0,0.01541,1.540986
2016,205544.0,100415.0,105129.0,14197.0,15298.0,38040.0,40701.0,78741.0,12641.0,13603.0,...,0.300258,,2.51667,0.869582,25.439488,11.315084,5070.01,0.06,0.013486,1.348616
2017,207630.0,101510.0,106119.0,14169.0,15264.0,38321.0,41090.0,79412.0,12888.0,13919.0,...,0.663342,,2.4793,0.938193,25.820867,10.90095,4953.73,0.0,0.013601,1.360112
2018,209701.0,102603.0,107099.0,14120.0,15210.0,38593.0,41463.0,80056.0,13128.0,14210.0,...,0.647212,,2.492263,0.912801,25.809946,11.044446,4873.81,0.0,0.013061,1.306145
2019,211905.0,103759.0,108147.0,14072.0,15155.0,38865.0,41823.0,80688.0,13321.0,14432.0,...,0.648035,,,1.162507,25.811655,11.014834,5077.3,0.0,0.011651,1.165145
2020,214929.0,105296.0,109634.0,14054.0,15124.0,39239.0,42273.0,81512.0,13488.0,14614.0,...,0.712195,,,1.009464,25.81745,,5004.33,0.0,0.004932,0.493186
2021,218764.0,107213.0,111550.0,14070.0,15128.0,39707.0,42809.0,82516.0,13620.0,14749.0,...,0.592588,,,0.884038,25.688572,,,,0.004032,0.403174


In [11]:
venezuela_95.index

Index([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021], dtype='int64', name='date')

In [12]:
curacao_100

Unnamed: 0_level_0,date,Total Population,"Population, female","Population, male","Population ages 00-04, female","Population ages 00-04, male","Population ages 0-14, female","Population ages 0-14, male","Population ages 0-14, total","Population ages 05-09, female",...,Control of Corruption: Estimate,Income Share Held by Lowest 20%,Economic and Social Rights Performance Score,Rule of Law: Estimate,Land Surface Temperature,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Cooling Degree Days,Heat Index 35,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Curacao,2019,157441.0,81416.0,76024.0,4107.0,4269.0,13730.0,14313.0,28044.0,4563.0,...,,,,,,,,,0.060924,6.092441


In [216]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.



age_ranges = []

for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

In [15]:
# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

In [208]:
df = wbdata.get_dataframe(variables,country="WLD")

In [212]:
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'World Population, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2019

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

In [16]:
# for plotting a single country  

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Samoa Population Pyramid, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = samoa_90_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -samoa_90_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [17]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',  
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = venezuela_95_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -venezuela_95_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [18]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',
                    width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = curacao_100_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -curacao_100_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [77]:
codes = ['CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 'OMN', 
         'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM']
 
    
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")
age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)


#date_range = pd.date_range(start=start_date, end=end_date)
#date_range

#print(start_date)

In [218]:
#pop_df = wbdata.get_dataframe(variables, country='CUW')
#pop_df.index

countries_4plot = {
    'ASM': 'American Samoa',
    'BHR': 'Bahrain',
    'CAF': 'Central African Republic',
    'CUW': 'Curacao',
    'GUY': 'Guyana',
    'KWT': 'Kuwait',
    'LBN': 'Lebanon',
    'MAF': 'St. Martin (French part)',
    'MDA': 'Moldova',
    'MHL': 'Marshall Islands',
    'MNP': 'Northern Mariana Islands',
    'OMN': 'Oman',
    'QAT': 'Qatar',
    'SAU': 'Saudi Arabia',
    'SSD': 'South Sudan',
    'SYR': 'Syrian Arab Republic',
    'TON': 'Tonga',
    'VEN': 'Venezuela, RB',
    'WSM': 'Samoa',
    'XKX': 'Kosovo'}

In [219]:
py.init_notebook_mode(connected=True)

def pyramid_all(yrstring):
    
    py.init_notebook_mode(connected=True)
    
    for code in codes: 
        
        pop_df = wbdata.get_dataframe(variables, country=[code])
        

        layout = go.Layout(barmode='overlay', title = countries_4plot[code] + ' Population, 2019',
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

        bins = [go.Bar(x = pop_df.loc[yrstring,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

               go.Bar(x = -pop_df.loc[yrstring,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )]
        
        py.iplot(dict(data=bins, layout=layout))

In [198]:
pwd

'/Users/leayamashiro/Documents/GitHub/Kitagawa_EEP153'

In [201]:
# writing a for-loop to plot all of the pyramids 

import plotly.io as pio

pyramids = pyramid_all('2019')

In [22]:
# getting sum data

total_90th_pop = wbdata.get_dataframe(variables, country=codes)
total_90th_pop

Unnamed: 0_level_0,Unnamed: 1_level_0,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,Males 4549,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
American Samoa,2022,2001.0,2125.0,2060.0,1903.0,1620.0,1863.0,1561.0,1063.0,1223.0,1361.0,...,1161.0,1275.0,1347.0,1357.0,1355.0,1134.0,806.0,537.0,275.0,208.0
American Samoa,2021,2079.0,2189.0,2111.0,1954.0,1687.0,2003.0,1427.0,1130.0,1278.0,1417.0,...,1192.0,1342.0,1356.0,1396.0,1358.0,1084.0,783.0,500.0,257.0,202.0
American Samoa,2020,2185.0,2303.0,2223.0,2038.0,1802.0,2078.0,1307.0,1207.0,1336.0,1473.0,...,1247.0,1401.0,1380.0,1437.0,1356.0,1042.0,763.0,471.0,241.0,197.0
American Samoa,2019,2302.0,2420.0,2334.0,2107.0,1955.0,2069.0,1240.0,1281.0,1395.0,1518.0,...,1312.0,1448.0,1420.0,1472.0,1342.0,1003.0,739.0,445.0,229.0,195.0
American Samoa,2018,2423.0,2539.0,2444.0,2166.0,2137.0,1980.0,1234.0,1347.0,1461.0,1549.0,...,1383.0,1488.0,1460.0,1496.0,1319.0,972.0,711.0,415.0,230.0,189.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kosovo,1964,93701.0,80570.0,69476.0,48565.0,43183.0,39129.0,35370.0,25572.0,18429.0,14783.0,...,26171.0,18716.0,15887.0,17371.0,14975.0,13187.0,9855.0,7660.0,4923.0,3526.0
Kosovo,1963,91664.0,79270.0,66998.0,45655.0,43138.0,38274.0,34446.0,23446.0,17826.0,14758.0,...,24597.0,17862.0,16014.0,17428.0,14393.0,13181.0,9407.0,7749.0,4654.0,3482.0
Kosovo,1962,89546.0,77916.0,63275.0,44324.0,42728.0,37654.0,32939.0,21761.0,17139.0,15212.0,...,23093.0,17194.0,16320.0,17158.0,14066.0,12959.0,9131.0,7705.0,4455.0,3438.0
Kosovo,1961,87487.0,76150.0,58932.0,43870.0,42116.0,37303.0,30899.0,20580.0,16406.0,16112.0,...,21746.0,16774.0,16809.0,16663.0,13966.0,12425.0,9042.0,7504.0,4344.0,3394.0


In [172]:
pyramids[0]

TypeError: 'NoneType' object is not subscriptable

In [147]:
wbdata.get_country?

In [23]:
total_90th_pop.loc['American Samoa']
#total_90th_pop['date'] = total_90th_pop['date'].astype(int)
#sum_90th = total_90th_pop[total_90th_pop.loc(level=1) > 2013]

Unnamed: 0_level_0,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,Males 4549,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022,2001.0,2125.0,2060.0,1903.0,1620.0,1863.0,1561.0,1063.0,1223.0,1361.0,...,1161.0,1275.0,1347.0,1357.0,1355.0,1134.0,806.0,537.0,275.0,208.0
2021,2079.0,2189.0,2111.0,1954.0,1687.0,2003.0,1427.0,1130.0,1278.0,1417.0,...,1192.0,1342.0,1356.0,1396.0,1358.0,1084.0,783.0,500.0,257.0,202.0
2020,2185.0,2303.0,2223.0,2038.0,1802.0,2078.0,1307.0,1207.0,1336.0,1473.0,...,1247.0,1401.0,1380.0,1437.0,1356.0,1042.0,763.0,471.0,241.0,197.0
2019,2302.0,2420.0,2334.0,2107.0,1955.0,2069.0,1240.0,1281.0,1395.0,1518.0,...,1312.0,1448.0,1420.0,1472.0,1342.0,1003.0,739.0,445.0,229.0,195.0
2018,2423.0,2539.0,2444.0,2166.0,2137.0,1980.0,1234.0,1347.0,1461.0,1549.0,...,1383.0,1488.0,1460.0,1496.0,1319.0,972.0,711.0,415.0,230.0,189.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,2123.0,1835.0,1611.0,1298.0,1016.0,698.0,545.0,536.0,502.0,428.0,...,585.0,454.0,366.0,295.0,194.0,175.0,103.0,87.0,57.0,41.0
1963,2063.0,1792.0,1575.0,1292.0,965.0,633.0,528.0,528.0,493.0,411.0,...,567.0,437.0,355.0,275.0,192.0,165.0,101.0,90.0,51.0,41.0
1962,2010.0,1752.0,1552.0,1282.0,892.0,576.0,517.0,525.0,480.0,394.0,...,548.0,415.0,349.0,253.0,192.0,153.0,103.0,91.0,47.0,40.0
1961,1964.0,1716.0,1544.0,1257.0,806.0,532.0,510.0,527.0,462.0,376.0,...,524.0,392.0,345.0,232.0,194.0,140.0,109.0,91.0,44.0,38.0


In [24]:
#sum_90th.set_index(['date'], inplace=True)
#sum_90th

In [25]:

total_90th_pop.reset_index(inplace=True)
total_90th_pop

Unnamed: 0,country,date,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
0,American Samoa,2022,2001.0,2125.0,2060.0,1903.0,1620.0,1863.0,1561.0,1063.0,...,1161.0,1275.0,1347.0,1357.0,1355.0,1134.0,806.0,537.0,275.0,208.0
1,American Samoa,2021,2079.0,2189.0,2111.0,1954.0,1687.0,2003.0,1427.0,1130.0,...,1192.0,1342.0,1356.0,1396.0,1358.0,1084.0,783.0,500.0,257.0,202.0
2,American Samoa,2020,2185.0,2303.0,2223.0,2038.0,1802.0,2078.0,1307.0,1207.0,...,1247.0,1401.0,1380.0,1437.0,1356.0,1042.0,763.0,471.0,241.0,197.0
3,American Samoa,2019,2302.0,2420.0,2334.0,2107.0,1955.0,2069.0,1240.0,1281.0,...,1312.0,1448.0,1420.0,1472.0,1342.0,1003.0,739.0,445.0,229.0,195.0
4,American Samoa,2018,2423.0,2539.0,2444.0,2166.0,2137.0,1980.0,1234.0,1347.0,...,1383.0,1488.0,1460.0,1496.0,1319.0,972.0,711.0,415.0,230.0,189.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1255,Kosovo,1964,93701.0,80570.0,69476.0,48565.0,43183.0,39129.0,35370.0,25572.0,...,26171.0,18716.0,15887.0,17371.0,14975.0,13187.0,9855.0,7660.0,4923.0,3526.0
1256,Kosovo,1963,91664.0,79270.0,66998.0,45655.0,43138.0,38274.0,34446.0,23446.0,...,24597.0,17862.0,16014.0,17428.0,14393.0,13181.0,9407.0,7749.0,4654.0,3482.0
1257,Kosovo,1962,89546.0,77916.0,63275.0,44324.0,42728.0,37654.0,32939.0,21761.0,...,23093.0,17194.0,16320.0,17158.0,14066.0,12959.0,9131.0,7705.0,4455.0,3438.0
1258,Kosovo,1961,87487.0,76150.0,58932.0,43870.0,42116.0,37303.0,30899.0,20580.0,...,21746.0,16774.0,16809.0,16663.0,13966.0,12425.0,9042.0,7504.0,4344.0,3394.0


In [26]:
total_90th_pop.index

RangeIndex(start=0, stop=1260, step=1)

In [27]:
total_90th_pop.columns

Index(['country', 'date', 'Males 0004', 'Males 0509', 'Males 1014',
       'Males 1519', 'Males 2024', 'Males 2529', 'Males 3034', 'Males 3539',
       'Males 4044', 'Males 4549', 'Males 5054', 'Males 5559', 'Males 6064',
       'Males 6569', 'Males 7074', 'Males 7579', 'Males 80UP', 'Females 0004',
       'Females 0509', 'Females 1014', 'Females 1519', 'Females 2024',
       'Females 2529', 'Females 3034', 'Females 3539', 'Females 4044',
       'Females 4549', 'Females 5054', 'Females 5559', 'Females 6064',
       'Females 6569', 'Females 7074', 'Females 7579', 'Females 80UP'],
      dtype='object')

In [28]:
totals = total_90th_pop.groupby(['date']).sum()
totals.reset_index(inplace=True)
totals.rename(columns={'index': 'country'}, inplace=True)
totals.index = pd.Index(['Total'] * len(totals), name='country')

totals

Unnamed: 0,date,country,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
0,1960,American SamoaBahrainCentral African RepublicC...,2603202.0,2070929.0,1636371.0,1325889.0,1160432.0,1032666.0,910774.0,764957.0,...,752785.0,602820.0,528653.0,445833.0,371385.0,298050.0,221502.0,160041.0,101205.0,76831.0
1,1961,American SamoaBahrainCentral African RepublicC...,2677734.0,2150097.0,1715019.0,1366725.0,1185794.0,1053638.0,933954.0,789884.0,...,780017.0,623713.0,535701.0,458622.0,379167.0,307073.0,226798.0,162526.0,102687.0,78220.0
2,1962,American SamoaBahrainCentral African RepublicC...,2750432.0,2231191.0,1795995.0,1415632.0,1212956.0,1077102.0,956290.0,815645.0,...,807156.0,648894.0,541907.0,472457.0,387498.0,315761.0,233257.0,165116.0,104380.0,79626.0
3,1963,American SamoaBahrainCentral African RepublicC...,2820219.0,2312996.0,1878590.0,1472580.0,1241559.0,1101776.0,977466.0,841946.0,...,834192.0,676686.0,549322.0,485855.0,396875.0,323811.0,240391.0,167827.0,106175.0,81001.0
4,1964,American SamoaBahrainCentral African RepublicC...,2890107.0,2394279.0,1961701.0,1537627.0,1273149.0,1127784.0,998752.0,869011.0,...,861191.0,704824.0,560813.0,497393.0,407805.0,331430.0,248190.0,170939.0,108202.0,82460.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,2018,American SamoaBahrainCentral African RepublicC...,6377424.0,6792146.0,6311633.0,5637390.0,5112813.0,5346241.0,5557217.0,5791971.0,...,4195049.0,3900643.0,3227955.0,2568354.0,2064747.0,1623131.0,1185005.0,817958.0,572417.0,558677.0
59,2019,American SamoaBahrainCentral African RepublicC...,6223131.0,6772902.0,6428844.0,5773922.0,5179597.0,5240132.0,5499600.0,5780903.0,...,4182213.0,3930729.0,3283836.0,2632220.0,2108447.0,1672158.0,1224644.0,849280.0,585185.0,576151.0
60,2020,American SamoaBahrainCentral African RepublicC...,6102637.0,6695862.0,6538443.0,5841646.0,5135325.0,5053046.0,5316292.0,5662687.0,...,4146907.0,3961742.0,3362767.0,2712782.0,2164325.0,1729632.0,1274188.0,888060.0,599281.0,593245.0
61,2021,American SamoaBahrainCentral African RepublicC...,6010369.0,6566326.0,6634871.0,5880058.0,5075069.0,4857009.0,5087635.0,5487964.0,...,4091153.0,3988524.0,3465198.0,2808754.0,2233212.0,1790247.0,1331617.0,928302.0,614222.0,605928.0


In [29]:
#totals.rename(columns={'index': 'country'}, inplace=True)

totals.rename(columns={'index': 'country'}, inplace=True)
totals.index = pd.Index(['Total'] * len(totals), name='country')

In [30]:
totals.drop(columns=['country'])

Unnamed: 0_level_0,date,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Total,1960,2603202.0,2070929.0,1636371.0,1325889.0,1160432.0,1032666.0,910774.0,764957.0,620071.0,...,752785.0,602820.0,528653.0,445833.0,371385.0,298050.0,221502.0,160041.0,101205.0,76831.0
Total,1961,2677734.0,2150097.0,1715019.0,1366725.0,1185794.0,1053638.0,933954.0,789884.0,640210.0,...,780017.0,623713.0,535701.0,458622.0,379167.0,307073.0,226798.0,162526.0,102687.0,78220.0
Total,1962,2750432.0,2231191.0,1795995.0,1415632.0,1212956.0,1077102.0,956290.0,815645.0,664050.0,...,807156.0,648894.0,541907.0,472457.0,387498.0,315761.0,233257.0,165116.0,104380.0,79626.0
Total,1963,2820219.0,2312996.0,1878590.0,1472580.0,1241559.0,1101776.0,977466.0,841946.0,689693.0,...,834192.0,676686.0,549322.0,485855.0,396875.0,323811.0,240391.0,167827.0,106175.0,81001.0
Total,1964,2890107.0,2394279.0,1961701.0,1537627.0,1273149.0,1127784.0,998752.0,869011.0,715381.0,...,861191.0,704824.0,560813.0,497393.0,407805.0,331430.0,248190.0,170939.0,108202.0,82460.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Total,2018,6377424.0,6792146.0,6311633.0,5637390.0,5112813.0,5346241.0,5557217.0,5791971.0,5403623.0,...,4195049.0,3900643.0,3227955.0,2568354.0,2064747.0,1623131.0,1185005.0,817958.0,572417.0,558677.0
Total,2019,6223131.0,6772902.0,6428844.0,5773922.0,5179597.0,5240132.0,5499600.0,5780903.0,5460704.0,...,4182213.0,3930729.0,3283836.0,2632220.0,2108447.0,1672158.0,1224644.0,849280.0,585185.0,576151.0
Total,2020,6102637.0,6695862.0,6538443.0,5841646.0,5135325.0,5053046.0,5316292.0,5662687.0,5481803.0,...,4146907.0,3961742.0,3362767.0,2712782.0,2164325.0,1729632.0,1274188.0,888060.0,599281.0,593245.0
Total,2021,6010369.0,6566326.0,6634871.0,5880058.0,5075069.0,4857009.0,5087635.0,5487964.0,5488802.0,...,4091153.0,3988524.0,3465198.0,2808754.0,2233212.0,1790247.0,1331617.0,928302.0,614222.0,605928.0


In [31]:
total_90th_pop.set_index('country', inplace=True)
total_90th_pop

Unnamed: 0_level_0,date,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American Samoa,2022,2001.0,2125.0,2060.0,1903.0,1620.0,1863.0,1561.0,1063.0,1223.0,...,1161.0,1275.0,1347.0,1357.0,1355.0,1134.0,806.0,537.0,275.0,208.0
American Samoa,2021,2079.0,2189.0,2111.0,1954.0,1687.0,2003.0,1427.0,1130.0,1278.0,...,1192.0,1342.0,1356.0,1396.0,1358.0,1084.0,783.0,500.0,257.0,202.0
American Samoa,2020,2185.0,2303.0,2223.0,2038.0,1802.0,2078.0,1307.0,1207.0,1336.0,...,1247.0,1401.0,1380.0,1437.0,1356.0,1042.0,763.0,471.0,241.0,197.0
American Samoa,2019,2302.0,2420.0,2334.0,2107.0,1955.0,2069.0,1240.0,1281.0,1395.0,...,1312.0,1448.0,1420.0,1472.0,1342.0,1003.0,739.0,445.0,229.0,195.0
American Samoa,2018,2423.0,2539.0,2444.0,2166.0,2137.0,1980.0,1234.0,1347.0,1461.0,...,1383.0,1488.0,1460.0,1496.0,1319.0,972.0,711.0,415.0,230.0,189.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kosovo,1964,93701.0,80570.0,69476.0,48565.0,43183.0,39129.0,35370.0,25572.0,18429.0,...,26171.0,18716.0,15887.0,17371.0,14975.0,13187.0,9855.0,7660.0,4923.0,3526.0
Kosovo,1963,91664.0,79270.0,66998.0,45655.0,43138.0,38274.0,34446.0,23446.0,17826.0,...,24597.0,17862.0,16014.0,17428.0,14393.0,13181.0,9407.0,7749.0,4654.0,3482.0
Kosovo,1962,89546.0,77916.0,63275.0,44324.0,42728.0,37654.0,32939.0,21761.0,17139.0,...,23093.0,17194.0,16320.0,17158.0,14066.0,12959.0,9131.0,7705.0,4455.0,3438.0
Kosovo,1961,87487.0,76150.0,58932.0,43870.0,42116.0,37303.0,30899.0,20580.0,16406.0,...,21746.0,16774.0,16809.0,16663.0,13966.0,12425.0,9042.0,7504.0,4344.0,3394.0


In [32]:
pop90th_w_totals = pd.concat([total_90th_pop, totals])

In [33]:
pop90th_w_totals

Unnamed: 0_level_0,date,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,...,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP,country
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American Samoa,2022,2001.0,2125.0,2060.0,1903.0,1620.0,1863.0,1561.0,1063.0,1223.0,...,1275.0,1347.0,1357.0,1355.0,1134.0,806.0,537.0,275.0,208.0,
American Samoa,2021,2079.0,2189.0,2111.0,1954.0,1687.0,2003.0,1427.0,1130.0,1278.0,...,1342.0,1356.0,1396.0,1358.0,1084.0,783.0,500.0,257.0,202.0,
American Samoa,2020,2185.0,2303.0,2223.0,2038.0,1802.0,2078.0,1307.0,1207.0,1336.0,...,1401.0,1380.0,1437.0,1356.0,1042.0,763.0,471.0,241.0,197.0,
American Samoa,2019,2302.0,2420.0,2334.0,2107.0,1955.0,2069.0,1240.0,1281.0,1395.0,...,1448.0,1420.0,1472.0,1342.0,1003.0,739.0,445.0,229.0,195.0,
American Samoa,2018,2423.0,2539.0,2444.0,2166.0,2137.0,1980.0,1234.0,1347.0,1461.0,...,1488.0,1460.0,1496.0,1319.0,972.0,711.0,415.0,230.0,189.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Total,2018,6377424.0,6792146.0,6311633.0,5637390.0,5112813.0,5346241.0,5557217.0,5791971.0,5403623.0,...,3900643.0,3227955.0,2568354.0,2064747.0,1623131.0,1185005.0,817958.0,572417.0,558677.0,American SamoaBahrainCentral African RepublicC...
Total,2019,6223131.0,6772902.0,6428844.0,5773922.0,5179597.0,5240132.0,5499600.0,5780903.0,5460704.0,...,3930729.0,3283836.0,2632220.0,2108447.0,1672158.0,1224644.0,849280.0,585185.0,576151.0,American SamoaBahrainCentral African RepublicC...
Total,2020,6102637.0,6695862.0,6538443.0,5841646.0,5135325.0,5053046.0,5316292.0,5662687.0,5481803.0,...,3961742.0,3362767.0,2712782.0,2164325.0,1729632.0,1274188.0,888060.0,599281.0,593245.0,American SamoaBahrainCentral African RepublicC...
Total,2021,6010369.0,6566326.0,6634871.0,5880058.0,5075069.0,4857009.0,5087635.0,5487964.0,5488802.0,...,3988524.0,3465198.0,2808754.0,2233212.0,1790247.0,1331617.0,928302.0,614222.0,605928.0,American SamoaBahrainCentral African RepublicC...


In [34]:
# for figure, actually just gonna use the totals table 

totals.set_index('date', inplace=True)

In [202]:
# woohoo! now get figure

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Total High Migration Countries Population, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [36]:
# countries taking in highest number of refugees 

mig_pop_data

Unnamed: 0_level_0,date,Net Migration,Total Population
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa Eastern and Southern,2022,,720859132.0
Africa Eastern and Southern,2021,-179444.0,702977106.0
Africa Eastern and Southern,2020,-48955.0,685112979.0
Africa Eastern and Southern,2019,-187410.0,667242986.0
Africa Eastern and Southern,2018,-366105.0,649757148.0
...,...,...,...
Zimbabwe,1964,-10064.0,4310332.0
Zimbabwe,1963,-9369.0,4177931.0
Zimbabwe,1962,-8931.0,4049778.0
Zimbabwe,1961,-8582.0,3925952.0


In [37]:
def migration_in(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    #takes all negative values, we want migration AWAY
    mig_table = data1[data1['Net Migration'] > 0] 
    
    #takes absolute value to get magnitude rather than negative
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    
    #creates new column, called 'Migration Per Capita'
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    
    #takes migration per capita and makes it a rate
    mig_table['Immigration Rate (%)'] = mig_table['Migration per Capita']*100   
    
    # This line of code is initialized because we are interested in a population contingent 
    # on trends in the past ten years (rather than since '64). Additionally, we look at data 
    # from the ESG dataset, which only started being compiled in 2014'''
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table

In [38]:
mig_in = migration_in(mig_pop_data).sort_values(by = ['Immigration Rate (%)'], ascending = False)
mig_in

Unnamed: 0_level_0,date,Net Migration,Total Population,Migration per Capita,Immigration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Curacao,2018,25591.0,1.593360e+05,0.160610,16.061028
Jordan,2014,803008.0,8.658026e+06,0.092747,9.274724
Qatar,2014,179768.0,2.214465e+06,0.081179,8.117898
Qatar,2015,178133.0,2.414573e+06,0.073774,7.377412
Kuwait,2015,259114.0,3.908743e+06,0.066291,6.629088
...,...,...,...,...,...
Madagascar,2018,485.0,2.684654e+07,0.000018,0.001807
Thailand,2021,1133.0,7.160110e+07,0.000016,0.001582
Arab World,2016,3140.0,4.150780e+08,0.000008,0.000756
East Asia & Pacific,2021,9227.0,2.370190e+09,0.000004,0.000389


In [39]:
avgs2 = mig_in.groupby('country').mean()
avgs2.reset_index(inplace=True)
avgs2

#in_avgd.set_index('date', inplace=True)
#in_avgd

#in_avgd.set_index(['country','date'], inplace=True)
#in_avgd

Unnamed: 0,country,date,Net Migration,Total Population,Migration per Capita,Immigration Rate (%)
0,Afghanistan,2017.000000,2.112160e+05,3.584422e+07,0.006047,0.604674
1,Africa Eastern and Southern,2015.000000,3.949250e+05,6.000084e+08,0.000658,0.065820
2,Andorra,2018.500000,1.243833e+03,7.574450e+04,0.016395,1.639482
3,Angola,2017.500000,6.066525e+04,3.077235e+07,0.002041,0.204147
4,Antigua and Barbuda,2015.000000,6.133333e+01,8.991367e+04,0.000685,0.068468
...,...,...,...,...,...,...
144,Ukraine,2021.000000,8.310000e+02,4.382290e+07,0.000019,0.001896
145,United Kingdom,2017.500000,2.333009e+05,6.609915e+07,0.003534,0.353390
146,United States,2017.500000,1.112018e+06,3.257538e+08,0.003424,0.342417
147,Upper middle income,2017.714286,4.107523e+05,2.725101e+09,0.000152,0.015170


In [40]:
avgs2['Percentile Rank'] = avgs2['Immigration Rate (%)'].apply(
            lambda x: stats.percentileofscore(mig_in['Immigration Rate (%)'], x))

In [41]:
avgs2.sort_values(by = ['Percentile Rank'], ascending = False)

Unnamed: 0,country,date,Net Migration,Total Population,Migration per Capita,Immigration Rate (%),Percentile Rank
78,Lebanon,2014.000000,301113.000000,6.274342e+06,0.047991,4.799117,99.183197
114,Qatar,2016.500000,95068.500000,2.584988e+06,0.039222,3.922222,98.949825
74,Kuwait,2017.250000,138463.500000,4.197983e+06,0.034006,3.400563,98.483081
142,Turks and Caicos Islands,2017.500000,1191.250000,4.044625e+04,0.030021,3.002135,97.899650
33,Curacao,2017.285714,4311.571429,1.571971e+05,0.027129,2.712930,97.316219
...,...,...,...,...,...,...,...
131,Sub-Saharan Africa (IDA & IBRD countries),2015.000000,34532.000000,1.008699e+09,0.000034,0.003423,2.042007
130,Sub-Saharan Africa,2015.000000,34532.000000,1.008699e+09,0.000034,0.003423,2.042007
132,Sub-Saharan Africa (excluding high income),2015.000000,34287.000000,1.008605e+09,0.000034,0.003399,1.866978
84,Madagascar,2016.000000,485.000000,2.551698e+07,0.000019,0.001903,0.933489


In [42]:
avgs2_90th = avgs2[avgs2['Percentile Rank'] >= 90]
#country_array_mig_in = mig_in_90th.index.tolist()
avgs2_90th.sort_values(by=['Percentile Rank'], ascending=False)
avgs2_90th = avgs2_90th.set_index('country')

In [43]:
avgs2_90th.index

Index(['Andorra', 'Bahrain', 'Cayman Islands', 'Curacao', 'Guyana', 'Jordan',
       'Kuwait', 'Lebanon', 'Luxembourg', 'Maldives', 'Malta', 'Monaco',
       'Oman', 'Qatar', 'Sint Maarten (Dutch part)', 'Syrian Arab Republic',
       'Turks and Caicos Islands'],
      dtype='object', name='country')

In [44]:
unique_countries_in = avgs1.index.unique()

# Drop rows from df1 where the country names appear in df2
filtered = avgs2_90th[~avgs2_90th.index.isin(unique_countries_in)]

In [45]:
country_array_mig_in = filtered.index.tolist()
print(country_array_mig_in)
filtered

['Andorra', 'Cayman Islands', 'Jordan', 'Luxembourg', 'Maldives', 'Malta', 'Monaco', 'Sint Maarten (Dutch part)', 'Turks and Caicos Islands']


Unnamed: 0_level_0,date,Net Migration,Total Population,Migration per Capita,Immigration Rate (%),Percentile Rank
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Andorra,2018.5,1243.833333,75744.5,0.016395,1.639482,92.648775
Cayman Islands,2017.5,913.875,64096.38,0.014406,1.440641,91.715286
Jordan,2017.5,184997.875,10195980.0,0.020294,2.029375,94.749125
Luxembourg,2017.5,9876.75,600338.4,0.016606,1.660586,92.882147
Maldives,2017.5,8563.5,476146.9,0.018713,1.871344,93.815636
Malta,2017.5,10335.125,478190.8,0.021609,2.160919,95.682614
Monaco,2017.5,510.375,36832.0,0.013899,1.389934,91.365228
Sint Maarten (Dutch part),2017.571429,680.571429,40565.71,0.017061,1.706113,93.115519
Turks and Caicos Islands,2017.5,1191.25,40446.25,0.030021,3.002135,97.89965


In [46]:
filtered['date'].mean()

2017.6190476190475

In [47]:
# okay, now, going to make population pyramid for the 95th, 97th, 98th

In [48]:
mig_in_codes = ['AND', 'CYM', 'JOR', 'LUX', 'MDV', 'MLT', 'MCO', 'SXM', 'TCA']

In [49]:
def pyramid_all_mig_in(yrstring):
    
    for code in mig_in_codes: 
        
        pop_df = wbdata.get_dataframe(variables, country=[code])
        
        py.init_notebook_mode(connected=True)

        layout = go.Layout(barmode='overlay', title = code + ' Population Pyramid, Average > 2014',
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

        bins = [go.Bar(x = pop_df.loc[yrstring,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

               go.Bar(x = -pop_df.loc[yrstring,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )]
        
        py.iplot(dict(data=bins, layout=layout))

In [50]:
# getting sum data

total_90th_in = wbdata.get_dataframe(variables, country= mig_in_codes)


total_90th_in.reset_index(inplace=True)


totals_in = total_90th_in.groupby(['date']).sum()
#totals.set_index('country', inplace = False)
totals_in.reset_index(inplace=True)
#totals.set_index(['date', ['Totals'] * len(totals)])
totals_in

#totals.rename(columns={'index': 'country'}, inplace=True)

totals_in.rename(columns={'index': 'country'}, inplace=True)
totals_in.index = pd.Index(['Total'] * len(totals), name='country')

In [51]:
totals_in['date'] = totals_in['date'].astype(int)
totals_in = totals_in[totals_in['date'] > 2013]

In [54]:
totals_in.set_index('date', inplace=True)
totals_in

Unnamed: 0_level_0,country,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,AndorraCayman IslandsJordanLuxembourgMonacoMal...,615474.0,574471.0,513724.0,522921.0,519999.0,474462.0,413564.0,366281.0,333852.0,...,325904.0,292283.0,250735.0,201669.0,152378.0,111092.0,87080.0,64953.0,48882.0,53209.0
2015,AndorraCayman IslandsJordanLuxembourgMonacoMal...,655446.0,630436.0,555764.0,552382.0,573459.0,528207.0,462050.0,404395.0,363363.0,...,355636.0,316572.0,271598.0,219377.0,165918.0,119412.0,92104.0,68303.0,51351.0,56603.0
2016,AndorraCayman IslandsJordanLuxembourgMonacoMal...,667327.0,661862.0,580305.0,560637.0,595727.0,555706.0,488980.0,426005.0,380029.0,...,374468.0,331646.0,287710.0,234683.0,178870.0,129078.0,97192.0,72328.0,53369.0,60040.0
2017,AndorraCayman IslandsJordanLuxembourgMonacoMal...,665920.0,673281.0,598459.0,558449.0,601894.0,572605.0,507976.0,441121.0,390104.0,...,387489.0,340916.0,298947.0,246444.0,189699.0,138132.0,101090.0,76046.0,54574.0,63149.0
2018,AndorraCayman IslandsJordanLuxembourgMonacoMal...,665487.0,678151.0,618771.0,562266.0,600571.0,588869.0,527155.0,456725.0,400878.0,...,400524.0,350679.0,309349.0,257975.0,200856.0,147557.0,105674.0,79827.0,56141.0,66510.0
2019,AndorraCayman IslandsJordanLuxembourgMonacoMal...,664066.0,682463.0,637802.0,571270.0,593696.0,603664.0,546233.0,472697.0,412472.0,...,413454.0,361024.0,319256.0,269228.0,212423.0,157021.0,111380.0,83553.0,58200.0,69952.0
2020,AndorraCayman IslandsJordanLuxembourgMonacoMal...,663178.0,682705.0,656309.0,580741.0,586132.0,614815.0,563432.0,488394.0,424366.0,...,425595.0,371449.0,328271.0,279776.0,223741.0,166350.0,117613.0,87046.0,60267.0,72754.0
2021,AndorraCayman IslandsJordanLuxembourgMonacoMal...,665609.0,678108.0,672453.0,591162.0,578390.0,621626.0,578199.0,504378.0,436502.0,...,436988.0,382163.0,336239.0,289970.0,234484.0,175736.0,124441.0,89970.0,62403.0,74679.0
2022,AndorraCayman IslandsJordanLuxembourgMonacoMal...,664725.0,670458.0,677586.0,603148.0,568314.0,618928.0,588238.0,518117.0,447097.0,...,445660.0,391634.0,342560.0,298629.0,244178.0,185040.0,132053.0,92780.0,65060.0,76569.0


In [207]:
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'High Immigration Country Populations, Year 2017', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals_in.loc[2017,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals_in.loc[2017,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')


layout = go.Layout(barmode='overlay', title = 'Total High Migration Countries Population, Year 2017', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals.loc[str(2017),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals.loc[str(2017),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [205]:
px.line_dash?

Object `px.line_dash` not found.
