## Project 1: World Migration, Causes and Associations

###### Group Evelyn Kitagawa: Neha Lala, Jackie Schneider, Himalia Joshi, Monica Wilson, Lea Yamashiro, Kevin Dunn

In [1]:
%%capture
!pip install wbdata
import wbdata
!pip install cufflinks # IF NECESSARY
import cufflinks as cf
cf.go_offline()
!pip install pandas
import pandas as pd
import numpy as np
import statsmodels.api as sm

#### Cleaning Data

In [2]:
SOURCE = 75
esg_indic = wbdata.get_indicators(source=SOURCE)
esg_indic

id                 name
-----------------  ---------------------------------------------------------------------------------------------------------
AG.LND.AGRI.ZS     Agricultural land (% of land area)
AG.LND.FRLS.HA     Tree Cover Loss (hectares)
AG.LND.FRST.ZS     Forest area (% of land area)
AG.PRD.FOOD.XD     Food production index (2014-2016 = 100)
CC.EST             Control of Corruption: Estimate
EG.CFT.ACCS.ZS     Access to clean fuels and technologies for cooking (% of population)
EG.EGY.PRIM.PP.KD  Energy intensity level of primary energy (MJ/$2017 PPP GDP)
EG.ELC.ACCS.ZS     Access to electricity (% of population)
EG.ELC.COAL.ZS     Electricity production from coal sources (% of total)
EG.ELC.RNEW.ZS     Renewable electricity output (% of total electricity output)
EG.FEC.RNEW.ZS     Renewable energy consumption (% of total final energy consumption)
EG.IMP.CONS.ZS     Energy imports, net (% of energy use)
EG.USE.COMM.FO.ZS  Fossil fuel energy consumption (% of total)
EG.USE.P

In [4]:
mig_away = esg_indic[mig_away['Net Migration'] < 0]
mig_away['Net Migration'] = mig_away['Net Migration'].abs()
mig_away['Migration per Capita'] = mig_away['Net Migration']/mig_away['Total Population']
mig_away['Migration Rate (%)'] = mig_away['Migration per Capita']*100
mig_away

NameError: name 'mig_away' is not defined

In [5]:
mig_env_indicators = {"SP.POP.TOTL": "Total Population",
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male",
                      "SM.POP.NETM":"Net Migration", 
                      #"ER.H2O.FWST.ZS": "Fresh Water Stress (withdrawal prop. avail. resource)", 
                      "EN.LND.LTMP.DC": "Land Surface Temperature", 
                      "AG.PRD.FOOD.XD": "Food Production Index", 
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)", 
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources"}
mig_env = wbdata.get_dataframe(mig_env_indicators)

# Make years ints instead of strings
mig_env.reset_index(inplace=True)
mig_env['date'] = mig_env['date'].astype(int)

# index the table by country instead of year 
mig_env.set_index(['country'],inplace=True)
mig_env

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
mig_pol_indicators = {"SP.POP.TOTL": "Total Population",
                      "SP.POP.TOTL.FE.IN": "Population, female",
                      "SP.POP.TOTL.MA.IN": "Population, male",
                      "SP.POP.0004.FE": "Population ages 00-04, female",
                      "SP.POP.0004.MA": "Population ages 00-04, male",
                      "SP.POP.0014.FE.IN": "Population ages 0-14, female",
                      "SP.POP.0014.MA.IN": "Population ages 0-14, male",
                      "SP.POP.0014.TO": "Population ages 0-14, total",
                      "SP.POP.0509.FE": "Population ages 05-09, female",
                      "SP.POP.0509.MA": "Population ages 05-09, male",
                      "SP.POP.1014.FE": "Population ages 10-14, female",
                      "SP.POP.1014.MA": "Population ages 10-14, male",
                      "SP.POP.1519.FE": "Population ages 15-19, female",
                      "SP.POP.1519.MA": "Population ages 15-19, male",
                      "SP.POP.1564.FE.IN": "Population ages 15-64, female",
                      "SP.POP.1564.MA.IN": "Population ages 15-64, male",
                      "SP.POP.1564.TO": "Population ages 15-64, total",
                      "SP.POP.2024.FE": "Population ages 20-24, female",
                      "SP.POP.2024.MA": "Population ages 20-24, male",
                      "SP.POP.2529.FE": "Population ages 25-29, female",
                      "SP.POP.2529.MA": "Population ages 25-29, male",
                      "SP.POP.3034.FE": "Population ages 30-34, female",
                      "SP.POP.3034.MA": "Population ages 30-34, male",
                      "SP.POP.3539.FE": "Population ages 35-39, female",
                      "SP.POP.3539.MA": "Population ages 35-39, male",
                      "SP.POP.4044.FE": "Population ages 40-44, female",
                      "SP.POP.4044.MA": "Population ages 40-44, male",
                      "SP.POP.4549.FE": "Population ages 45-49, female",
                      "SP.POP.4549.MA": "Population ages 45-49, male",
                      "SP.POP.5054.FE": "Population ages 50-54, female",
                      "SP.POP.5054.MA": "Population ages 50-54, male",
                      "SP.POP.5559.FE": "Population ages 55-59, female",
                      "SP.POP.5559.MA": "Population ages 55-59, male",
                      "SP.POP.6064.FE": "Population ages 60-64, female",
                      "SP.POP.6064.MA": "Population ages 60-64, male",
                      "SP.POP.6569.FE": "Population ages 65-69, female",
                      "SP.POP.6569.MA": "Population ages 65-69, male",
                      "SP.POP.65UP.FE.IN": "Population ages 65 and above, female",
                      "SP.POP.65UP.MA.IN": "Population ages 65 and above, male",
                      "SP.POP.65UP.TO": "Population ages 65 and above, total",
                      "SP.POP.7074.FE": "Population ages 70-74, female",
                      "SP.POP.7074.MA": "Population ages 70-74, male",
                      "SP.POP.7579.FE": "Population ages 75-79, female",
                      "SP.POP.7579.MA": "Population ages 75-79, male",
                      "SP.POP.80UP.FE": "Population ages 80 and above, female",
                      "SP.POP.80UP.MA": "Population ages 80 and above, male",
                      "SM.POP.NETM":"Net Migration",
                      "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                      "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                      "GE.EST": "Government Effectiveness Estimate", 
                      "CC.EST": "Control of Corruption: Estimate", 
                      "GB.XPD.RSDV.GD.ZS": "Research and development expenditure (% of GDP)", 
                      "SI.DST.FRST.20": "Income Share Held by Lowest 20%"} 
mig_pol = wbdata.get_dataframe(mig_pol_indicators)

# Make years ints instead of strings
mig_pol.reset_index(inplace=True)
mig_pol['date'] = mig_pol['date'].astype(int)

# index the table by country instead of year 
mig_pol.set_index(['country'],inplace=True)
mig_pol

In [None]:
# getting population dataset 

indicators = {"SP.POP.TOTL": "Total Population", "SM.POP.NETM":"Net Migration"}

data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)
data

In [None]:
var_labels = {"SM.POP.NETM":"Net Migration"}
world_migration = wbdata.get_dataframe(var_labels)
world_migration_np = world_migration.dropna()
population_statistics = world_migration_np.groupby('country').agg({
    'Net Migration': ['sum', 'mean']
}).reset_index()
world_migration_np = population_statistics[population_statistics['Net Migration']<0]
world_migration_np

In [None]:
population_statistics = world_migration_np.groupby('country').agg({
    'Net Migration': ['sum', 'mean']
}).reset_index()
population_statistics

In [None]:
mig_away = mig_away.dropna()
population_statistics = mig_away.groupby('country').agg({
    'Migration per Capita': ['sum', 'mean']
}).reset_index()
population_statistics

In [None]:
data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)

mig_away = data[data['Net Migration'] < 0]
mig_away['Net Migration'] = mig_away['Net Migration'].abs()
mig_away['Migration per Capita'] = mig_away['Net Migration']/mig_away['Total Population']
mig_away['Migration Rate (%)'] = mig_away['Migration per Capita']*100

#### Population Statistics

In [None]:
def population(year=1966, sex='Male', age_range=(18,26), place='wrld'):
    print("Migration Rate", Migration Rate (%), "Total Population", SP.POP.TOTL)