In [2]:
%%capture
!pip install wbdata
import wbdata # IF NECESSARY
import cufflinks as cf
cf.go_offline()
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [33]:
# DEFINE FUNCTIONS

# this one is to get relevant migration statistics we're interested in

def migration_cleaning(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    mig_table = data1[data1['Net Migration'] < 0] 
    #takes all negative values, we want migration AWAY
    
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    #takes absolute value to get magnitude rather than negative 
    
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    #creates new column, called 'Migration Per Capita'
    
    mig_table['Migration Rate (%)'] = mig_table['Migration per Capita']*100  
    #takes migration per capita and makes it a rate 
    
    mig_table = mig_table[mig_table['date'] > 2013]

    return mig_table


# 

In [34]:
# this gets the table WE want to use (with the countries we want)

indicators_new = {"SP.POP.TOTL": "Total Population", 
                  # first, pop stats
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male", 
                  
                  # some environmental variables
                      "AG.PRD.FOOD.XD": "Food Production Index",
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)", 
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources", 
                      "SM.POP.NETM": "Net Migration",
                  "EN.H2O.BDYS.ZS": "Proportion of bodies of water with good ambient water quality", 
                  "ER.H2O.FWTL.ZS": "Annual freshwater withdrawals, total (% of internal resources)",
                  "SH.H2O.SMDW.ZS": "People using safely managed drinking water services (% of population)", 
                  "SH.STA.SMSS.ZS": "People using safely managed sanitation services (% of population)", 
                  "SH.MED.BEDS.ZS": "Hospital beds (per 1,000 people)",  
                  "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                  
                  # some political variables
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                    "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                    "GE.EST": "Government Effectiveness Estimate", 
                    "CC.EST": "Control of Corruption: Estimate", 
                    "SI.DST.FRST.20": "Income Share Held by Lowest 20%", 
                  "SD.ESR.PERF.XQ": "Economic and Social Rights Performance Score",
                  "RL.EST": "Rule of Law: Estimate",  
                  
                  # more scientific environmental variables
                  "EN.LND.LTMP.DC": "Land Surface Temperature",
                  "EN.ATM.PM25.MC.M3": "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)", 
                  "EN.CLC.CDDY.XD": "Cooling Degree Days", 
                  "EN.CLC.HEAT.XD": "Heat Index 35"}

# these do not work                
#"EN.ATM.CO2E.PCCO2": "emissions (metric tons per capita)", 
#"EN.ATM.METH.PC": "Methane emissions (kt of CO2 equivalent per capita)",
#"EN.ATM.NOXE.PC": "Nitrous oxide emissions (metric tons of CO2 equivalent per capita)",
                  



indicators_reduced = wbdata.get_dataframe(indicators_new, country = [
    'CUW', 'MHL', 'QAT', 'SYR', 'ASM', 'LBN', 'SSD', 'KWT', 'MAF', 'VEN', 
    'OMN', 'TON', 'SAU', 'XKX', 'MNP', 'MDA', 'CAF', 'BHR', 'GUY', 'WSM', 
    'PRI', 'ERI', 'FJI', 'FSM', 'TUV', 'BIH', 'GUM', 'VCT', 'ARM', 'PSS', 
    'STP', 'ZAF', 'SWZ', 'NCL', 'SLV', 'BDI', 'PAK', 'KIR', 'GRL', 'DMA', 
    'LTU', 'NPL'])

#indicators_reduced

In [43]:
mig_data_reduced = migration_cleaning(indicators_reduced)
#mig_data_reduced.set_index([['country'], ['date']], inplace = True)
mig_data_reduced

plot_df = mig_data_reduced.loc['American Samoa']
plot_df

Unnamed: 0_level_0,date,Total Population,"Population, female","Population, male","Population ages 00-04, female","Population ages 00-04, male","Population ages 0-14, female","Population ages 0-14, male","Population ages 0-14, total","Population ages 05-09, female",...,Control of Corruption: Estimate,Income Share Held by Lowest 20%,Economic and Social Rights Performance Score,Rule of Law: Estimate,Land Surface Temperature,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Cooling Degree Days,Heat Index 35,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American Samoa,2014,52217.0,26017.0,26199.0,2730.0,2893.0,8070.0,8679.0,16750.0,2784.0,...,1.220709,,,1.40708,,5.977969,,,0.03068,3.067966
American Samoa,2015,51368.0,25629.0,25739.0,2602.0,2778.0,7789.0,8390.0,16180.0,2721.0,...,1.151811,,,1.285363,,6.029517,,,0.031712,3.171235
American Samoa,2016,50448.0,25208.0,25240.0,2494.0,2661.0,7504.0,8080.0,15585.0,2627.0,...,1.159556,,,1.279536,,6.54329,,,0.032529,3.252854
American Samoa,2017,49463.0,24762.0,24701.0,2384.0,2543.0,7215.0,7751.0,14966.0,2522.0,...,1.762151,,,1.294769,,6.339296,,,0.033399,3.33987
American Samoa,2018,48424.0,24290.0,24134.0,2272.0,2423.0,6920.0,7406.0,14326.0,2402.0,...,1.78091,,,1.296713,,6.359077,,,0.034115,3.411531
American Samoa,2019,47321.0,23786.0,23535.0,2158.0,2302.0,6617.0,7055.0,13672.0,2263.0,...,1.78071,,,1.26846,,6.300155,,,0.034868,3.486824
American Samoa,2020,46189.0,23268.0,22921.0,2049.0,2185.0,6313.0,6711.0,13025.0,2135.0,...,1.266005,,,1.11943,,,,,0.03542,3.541969
American Samoa,2021,45035.0,22746.0,22289.0,1949.0,2079.0,6012.0,6378.0,12390.0,2027.0,...,1.279872,,,1.138902,,,,,0.036549,3.654935


In [36]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.

# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)

['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']


In [37]:
male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

print(variables)

{'SP.POP.0004.MA': 'Males 0004', 'SP.POP.0509.MA': 'Males 0509', 'SP.POP.1014.MA': 'Males 1014', 'SP.POP.1519.MA': 'Males 1519', 'SP.POP.2024.MA': 'Males 2024', 'SP.POP.2529.MA': 'Males 2529', 'SP.POP.3034.MA': 'Males 3034', 'SP.POP.3539.MA': 'Males 3539', 'SP.POP.4044.MA': 'Males 4044', 'SP.POP.4549.MA': 'Males 4549', 'SP.POP.5054.MA': 'Males 5054', 'SP.POP.5559.MA': 'Males 5559', 'SP.POP.6064.MA': 'Males 6064', 'SP.POP.6569.MA': 'Males 6569', 'SP.POP.7074.MA': 'Males 7074', 'SP.POP.7579.MA': 'Males 7579', 'SP.POP.80UP.MA': 'Males 80UP', 'SP.POP.0004.FE': 'Females 0004', 'SP.POP.0509.FE': 'Females 0509', 'SP.POP.1014.FE': 'Females 1014', 'SP.POP.1519.FE': 'Females 1519', 'SP.POP.2024.FE': 'Females 2024', 'SP.POP.2529.FE': 'Females 2529', 'SP.POP.3034.FE': 'Females 3034', 'SP.POP.3539.FE': 'Females 3539', 'SP.POP.4044.FE': 'Females 4044', 'SP.POP.4549.FE': 'Females 4549', 'SP.POP.5054.FE': 'Females 5054', 'SP.POP.5559.FE': 'Females 5559', 'SP.POP.6064.FE': 'Females 6064', 'SP.POP.6569.

In [9]:
# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

#df1 = wbdata.get_dataframe(variables,country=['TON', 'CUW'])
#print(df1.query("date=='2015'").sum(axis=0))

Males 0004      12156.0
Males 0509      12417.0
Males 1014      12384.0
Males 1519      11285.0
Males 2024      10238.0
Males 2529       7759.0
Males 3034       7110.0
Males 3539       6622.0
Males 4044       7235.0
Males 4549       7701.0
Males 5054       7506.0
Males 5559       6478.0
Males 6064       5489.0
Males 6569       4611.0
Males 7074       3222.0
Males 7579       2094.0
Males 80UP       1524.0
Females 0004    11469.0
Females 0509    11627.0
Females 1014    11796.0
Females 1519    10562.0
Females 2024    10115.0
Females 2529     8323.0
Females 3034     8353.0
Females 3539     8076.0
Females 4044     8693.0
Females 4549     9097.0
Females 5054     9054.0
Females 5559     8160.0
Females 6064     6914.0
Females 6569     5713.0
Females 7074     4346.0
Females 7579     3003.0
Females 80UP     2972.0
dtype: float64


In [42]:
# for plotting a single country  

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2016

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

KeyError: '2016'

In [None]:
df = wbdata.get_dataframe(variables,country=['TON'])
print(df.query("date=='2015'").sum(axis=0))

In [19]:
df.reset_index(inplace = True)
df.set_index(['country'], inplace = True)
df.index

Index(['Curacao', 'Curacao', 'Curacao', 'Curacao', 'Curacao', 'Curacao',
       'Curacao', 'Curacao', 'Curacao', 'Curacao',
       ...
       'Tonga', 'Tonga', 'Tonga', 'Tonga', 'Tonga', 'Tonga', 'Tonga', 'Tonga',
       'Tonga', 'Tonga'],
      dtype='object', name='country', length=126)

In [27]:
df = df.reset_index().set_index(['country','date'])


In [32]:
l =[]
for x in range(3):
    l += [x**2]
l

[0, 1, 4]

In [30]:
[x**2 for x in range(3)]

[0, 1, 4]

In [25]:
countries_for_plot = df.index[0]

#for country in countries_for_plot


import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = '2016'

bins = [go.Bar(x = df.xs(year,level='date').query(f"country=={country}").filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men {:d}'.format(year),
               #marker=dict(color='purple'),
               hoverinfo='skip',
               opacity=0.5
               )
             for country in countries_for_plot]

bins += [go.Bar(x = -df.xs(year,level='date'.query(f"country=={country}").filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
            for country in countries_for_plot

py.iplot(dict(data=bins, layout=layout))


(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men {:d}'.format(year),
               hoverinfo='skip',
               opacity=0.5