In [None]:
# install necessary packages 

!pip install wbdata
import wbdata # IF NECESSARY
import cufflinks as cf
cf.go_offline()
import pandas as pd
import numpy as np
from scipy import stats

In [None]:
# DEFINE FUNCTIONS

# this one is to get relevant migration statistics we're interested in
def migration_organizing(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    #takes all negative values, we want migration AWAY
    mig_table = data1[data1['Net Migration'] < 0] 
    
    #takes absolute value to get magnitude rather than negative
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    
    #creates new column, called 'Migration Per Capita'
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    
    #takes migration per capita and makes it a rate
    mig_table['Migration Rate (%)'] = mig_table['Migration per Capita']*100   
    
    # This line of code is initialized because we are interested in a population contingent 
    # on trends in the past ten years (rather than since '64). Additionally, we look at data 
    # from the ESG dataset, which only started being compiled in 2014'''
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table



''' This function is a manual way of creating a dataframe that is usually performed by 
    a "group_by" function. The reason for this distinction is because we used the group_by
    function, but were really struggling to perform table-operations on the group_by frame, 
    as that function creates a special type of dataframe. This method was much faster.'''

def setup_long(dataframe):
    
    #percentiles = {'values': [50, 75, 80, 90]}
    unique_country_indices = dataframe.index.unique()

    if not unique_country_indices.empty:
        
        # Create an empty DataFrame with columns
        results = pd.DataFrame(columns=['Net Migration', 'Migration Rate (%)'])

        for country_index in unique_country_indices:
            country_data = dataframe.loc[country_index]
            mig_net_avg = country_data['Net Migration'].mean()
            mig_percap_avg = country_data['Migration Rate (%)'].mean()

            # Append the computed averages to the results DataFrame
            results.loc[country_index] = [mig_net_avg, mig_percap_avg]
        
        # calculate the percentiles for the migration rates 
        results['Percentile Rank'] = results['Migration Rate (%)'].apply(
            lambda x: stats.percentileofscore(results['Migration Rate (%)'], x))

        return results 
    
    
    
''' This is the function that we use to retrieve population statistics, as outlined in the 
    [A] deliverables posted on Ed.'''


def population(year, sex, age_low, age_high, country_code):
    
    # reconstruct the strings for population codes associated 
    # as entered in "age_low" and "age_high" arguments
    
    if sex == "Male":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".MA": sex}
    elif sex == "Female":
        column_names = {"SP.POP." + str(age_low)+str(age_high) + ".FE": sex}
    
    # construct new dataframe for function to index, isolating 
    # the country by the function's country-code argument
    pop_stats = wbdata.get_dataframe(column_names, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_stats = pop_stats.filter(like=str(year), axis=0)
    # return population number by indexing the function-generated 
    # dataframe by the function's 'sex' argument, and making it an integer

    return int(pop_stats[sex].iloc[0])



''' This function retrieves a dataframe for specific year, country, and indicators selected. 
    The function assumes that the argument pop_indicators has already been defined with a 
    relevant WBData code dictionary.'''

def population_dataframe(year, country_code, pop_indicators):
        
    pop_df = wbdata.get_dataframe(pop_indicators, country = country_code)
    
    # filter the table by the function's year' argument
    
    pop_df = pop_df.filter(like=str(year), axis=0)
    
    # return population dataframe by indexing  
    # by the function's 'sex' argument

    return pop_df

In [None]:
# this gets the table WE want to use (with the countries we want)

indicators_new = {"SP.POP.TOTL": "Total Population", 
                  # first, pop stats
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male", 
                  
                  # some environmental variables
                      "AG.PRD.FOOD.XD": "Food Production Index",
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)", 
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources", 
                      "SM.POP.NETM": "Net Migration",
                  "EN.H2O.BDYS.ZS": "Proportion of bodies of water with good ambient water quality", 
                  "ER.H2O.FWTL.ZS": "Annual freshwater withdrawals, total (% of internal resources)",
                  "SH.H2O.SMDW.ZS": "People using safely managed drinking water services (% of population)", 
                  "SH.STA.SMSS.ZS": "People using safely managed sanitation services (% of population)", 
                  "SH.MED.BEDS.ZS": "Hospital beds (per 1,000 people)",  
                  "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                  
                  # some political variables
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                    "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                    "GE.EST": "Government Effectiveness Estimate", 
                    "CC.EST": "Control of Corruption: Estimate", 
                    "SI.DST.FRST.20": "Income Share Held by Lowest 20%", 
                  "SD.ESR.PERF.XQ": "Economic and Social Rights Performance Score",
                  "RL.EST": "Rule of Law: Estimate",  
                  
                  # more scientific environmental variables
                  "EN.LND.LTMP.DC": "Land Surface Temperature",
                  "EN.ATM.PM25.MC.M3": "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)", 
                  "EN.CLC.CDDY.XD": "Cooling Degree Days", 
                  "EN.CLC.HEAT.XD": "Heat Index 35"}

# these do not work                
#"EN.ATM.CO2E.PCCO2": "emissions (metric tons per capita)", 
#"EN.ATM.METH.PC": "Methane emissions (kt of CO2 equivalent per capita)",
#"EN.ATM.NOXE.PC": "Nitrous oxide emissions (metric tons of CO2 equivalent per capita)",
                  



indicators_reduced = wbdata.get_dataframe(indicators_new, country = [
    'CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 
    'OMN', 'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM', 
    'PRI', 'ERI', 'FJI', 'FSM', 'TUV', 'BIH', 'GUM', 'VCT', 'ARM', 'PSS', 
    'STP', 'ZAF', 'SWZ', 'NCL', 'SLV', 'BDI', 'PAK', 'KIR', 'GRL', 'DMA', 
    'LTU', 'NPL'])

#indicators_reduced

indicators_reduced.index

In [None]:
mig_data_reduced = migration_organizing(indicators_reduced)
#mig_data_reduced.loc[['Curacao']]

In [None]:
# okay maybe we should make a population table for the 90th percentile, 95th percentile and 100th percentile country 

In [None]:
''' getting the countries with the percentile stuff '''

indicators = {"SM.POP.NETM": "Net Migration", "SP.POP.TOTL": "Total Population"}

mig_pop_data = wbdata.get_dataframe(indicators)
mig_pop_cleaned = migration_organizing(mig_pop_data)
mig_pop_data
mig_pop_cleaned

In [None]:
avgs = setup_long(mig_pop_cleaned)
avgs = avgs.sort_values(by = ['Migration Rate (%)'], ascending = False)
avgs1 = avgs[avgs['Percentile Rank'] >= 90]
country_array = avgs1.index.tolist()

# THIS is our selected "population"!! 

country_array

In [None]:
avgs1

In [None]:
# assign variables for percentiles

samoa_90 = mig_data_reduced.loc['Samoa']
samoa_90.set_index('date', inplace = True)
venezuela_95 = mig_data_reduced.loc['Venezuela, RB']
venezuela_95.set_index('date', inplace = True)
curacao_100 = mig_data_reduced.loc[['Curacao']]

In [None]:
samoa_90

In [None]:
venezuela_95.index

In [None]:
curacao_100

In [None]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.



age_ranges = []

for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

In [None]:
# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

In [None]:
df = wbdata.get_dataframe(variables,country="WLD")

In [None]:
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'World Population, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2019

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

In [None]:
# for plotting a single country  

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Samoa Population Pyramid, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = samoa_90_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -samoa_90_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [None]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',  
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = venezuela_95_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -venezuela_95_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [None]:
py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Venezuela, RB Population Pyramid, Year 2019',
                    width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = curacao_100_pop.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -curacao_100_pop.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [None]:
codes = ['CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 'OMN', 
         'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM']
 
    
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")
age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)


#date_range = pd.date_range(start=start_date, end=end_date)
#date_range

#print(start_date)

In [None]:
#pop_df = wbdata.get_dataframe(variables, country='CUW')
#pop_df.index

countries_4plot = {
    'ASM': 'American Samoa',
    'BHR': 'Bahrain',
    'CAF': 'Central African Republic',
    'CUW': 'Curacao',
    'GUY': 'Guyana',
    'KWT': 'Kuwait',
    'LBN': 'Lebanon',
    'MAF': 'St. Martin (French part)',
    'MDA': 'Moldova',
    'MHL': 'Marshall Islands',
    'MNP': 'Northern Mariana Islands',
    'OMN': 'Oman',
    'QAT': 'Qatar',
    'SAU': 'Saudi Arabia',
    'SSD': 'South Sudan',
    'SYR': 'Syrian Arab Republic',
    'TON': 'Tonga',
    'VEN': 'Venezuela, RB',
    'WSM': 'Samoa',
    'XKX': 'Kosovo'}

In [None]:
py.init_notebook_mode(connected=True)

def pyramid_all(yrstring):
    
    py.init_notebook_mode(connected=True)
    
    for code in codes: 
        
        pop_df = wbdata.get_dataframe(variables, country=[code])
        

        layout = go.Layout(barmode='overlay', title = countries_4plot[code] + ' Population, 2019',
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

        bins = [go.Bar(x = pop_df.loc[yrstring,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

               go.Bar(x = -pop_df.loc[yrstring,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )]
        
        py.iplot(dict(data=bins, layout=layout))

In [None]:
pwd

In [None]:
# writing a for-loop to plot all of the pyramids 

import plotly.io as pio

pyramids = pyramid_all('2019')

In [None]:
# getting sum data

total_90th_pop = wbdata.get_dataframe(variables, country=codes)
total_90th_pop

In [None]:
pyramids[0]

In [None]:
wbdata.get_country?

In [None]:
total_90th_pop.loc['American Samoa']
#total_90th_pop['date'] = total_90th_pop['date'].astype(int)
#sum_90th = total_90th_pop[total_90th_pop.loc(level=1) > 2013]

In [None]:
#sum_90th.set_index(['date'], inplace=True)
#sum_90th

In [None]:

total_90th_pop.reset_index(inplace=True)
total_90th_pop

In [None]:
total_90th_pop.index

In [None]:
total_90th_pop.columns

In [None]:
totals = total_90th_pop.groupby(['date']).sum()
totals.reset_index(inplace=True)
totals.rename(columns={'index': 'country'}, inplace=True)
totals.index = pd.Index(['Total'] * len(totals), name='country')

totals

In [None]:
#totals.rename(columns={'index': 'country'}, inplace=True)

totals.rename(columns={'index': 'country'}, inplace=True)
totals.index = pd.Index(['Total'] * len(totals), name='country')

In [None]:
totals.drop(columns=['country'])

In [None]:
total_90th_pop.set_index('country', inplace=True)
total_90th_pop

In [None]:
pop90th_w_totals = pd.concat([total_90th_pop, totals])

In [None]:
pop90th_w_totals

In [None]:
# for figure, actually just gonna use the totals table 

totals.set_index('date', inplace=True)

In [None]:
# woohoo! now get figure

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'Total High Migration Countries Population, Year 2019', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals.loc[str(2019),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals.loc[str(2019),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [None]:
# countries taking in highest number of refugees 

mig_pop_data

In [None]:
def migration_in(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    #takes all negative values, we want migration AWAY
    mig_table = data1[data1['Net Migration'] > 0] 
    
    #takes absolute value to get magnitude rather than negative
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    
    #creates new column, called 'Migration Per Capita'
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    
    #takes migration per capita and makes it a rate
    mig_table['Immigration Rate (%)'] = mig_table['Migration per Capita']*100   
    
    # This line of code is initialized because we are interested in a population contingent 
    # on trends in the past ten years (rather than since '64). Additionally, we look at data 
    # from the ESG dataset, which only started being compiled in 2014'''
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table

In [None]:
mig_in = migration_in(mig_pop_data).sort_values(by = ['Immigration Rate (%)'], ascending = False)
mig_in

In [None]:
avgs2 = mig_in.groupby('country').mean()
avgs2.reset_index(inplace=True)
avgs2

#in_avgd.set_index('date', inplace=True)
#in_avgd

#in_avgd.set_index(['country','date'], inplace=True)
#in_avgd

In [None]:
avgs2['Percentile Rank'] = avgs2['Immigration Rate (%)'].apply(
            lambda x: stats.percentileofscore(mig_in['Immigration Rate (%)'], x))

In [None]:
avgs2.sort_values(by = ['Percentile Rank'], ascending = False)

In [None]:
avgs2_90th = avgs2[avgs2['Percentile Rank'] >= 90]
#country_array_mig_in = mig_in_90th.index.tolist()
avgs2_90th.sort_values(by=['Percentile Rank'], ascending=False)
avgs2_90th = avgs2_90th.set_index('country')

In [None]:
avgs2_90th.index

In [None]:
unique_countries_in = avgs1.index.unique()

# Drop rows from df1 where the country names appear in df2
filtered = avgs2_90th[~avgs2_90th.index.isin(unique_countries_in)]

In [None]:
country_array_mig_in = filtered.index.tolist()
print(country_array_mig_in)
filtered

In [None]:
filtered['date'].mean()

In [None]:
# okay, now, going to make population pyramid for the 95th, 97th, 98th

In [None]:
mig_in_codes = ['AND', 'CYM', 'JOR', 'LUX', 'MDV', 'MLT', 'MCO', 'SXM', 'TCA']

In [None]:
def pyramid_all_mig_in(yrstring):
    
    for code in mig_in_codes: 
        
        pop_df = wbdata.get_dataframe(variables, country=[code])
        
        py.init_notebook_mode(connected=True)

        layout = go.Layout(barmode='overlay', title = code + ' Population Pyramid, Average > 2014',
                   width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

        bins = [go.Bar(x = pop_df.loc[yrstring,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

               go.Bar(x = -pop_df.loc[yrstring,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )]
        
        py.iplot(dict(data=bins, layout=layout))

In [None]:
# getting sum data

total_90th_in = wbdata.get_dataframe(variables, country= mig_in_codes)


total_90th_in.reset_index(inplace=True)


totals_in = total_90th_in.groupby(['date']).sum()
#totals.set_index('country', inplace = False)
totals_in.reset_index(inplace=True)
#totals.set_index(['date', ['Totals'] * len(totals)])
totals_in

#totals.rename(columns={'index': 'country'}, inplace=True)

totals_in.rename(columns={'index': 'country'}, inplace=True)
totals_in.index = pd.Index(['Total'] * len(totals), name='country')

In [None]:
totals_in['date'] = totals_in['date'].astype(int)
totals_in = totals_in[totals_in['date'] > 2013]

In [None]:
totals_in.set_index('date', inplace=True)
totals_in

In [None]:
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay', title = 'High Immigration Country Populations, Year 2017', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals_in.loc[2017,:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals_in.loc[2017,:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')


layout = go.Layout(barmode='overlay', title = 'Total High Migration Countries Population, Year 2017', width=600, height=600,
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = totals.loc[str(2017),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='orange'),
               hoverinfo='skip'
               ),

        go.Bar(x = -totals.loc[str(2017),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='skyblue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))
#go.title('Samoa Population Pyramid')

In [None]:
px.line_dash?