## Project 1: World Migration, Causes and Associations

###### Group Evelyn Kitagawa: Neha Lala, Jackie Schneider, Himalia Joshi, Monica Wilson, Lea Yamashiro, Kevin Dunn

In [1]:
%%capture
!pip install wbdata
import wbdata
!pip install cufflinks # IF NECESSARY
import cufflinks as cf
cf.go_offline()
!pip install pandas
import pandas as pd
import numpy as np
import statsmodels.api as sm

#### Cleaning Data

In [23]:
indicators = {"NY.GDP.PCAP.CD":"GDP per capita", 
              "SH.DYN.1019": "Death Probability (5-9)",
              "SH.DYN.2024": 'Death Probability (20-24)',
              "SP.DYN.TFRT.IN":"Total Fertility Rate",
              "SP.POP.GROW":"Population Growth Rate",
              "SP.DYN.AMRT.MA":"Male Mortality",
              "SP.DYN.AMRT.FE":"Female Mortality",
              "SP.POP.1564.FE.ZS":"% Adult Female",
              "SP.POP.TOTL.FE.ZS":"% Female", 
              "SM.POP.NETM": "Net Migration", 
              "SP.POP.TOTL": "Total Population"}
data = wbdata.get_dataframe(indicators)

SOURCE=75
esg_indic = wbdata.get_indicator(source=SOURCE)
esg_indic

id                 name
-----------------  ---------------------------------------------------------------------------------------------------------
AG.LND.AGRI.ZS     Agricultural land (% of land area)
AG.LND.FRLS.HA     Tree Cover Loss (hectares)
AG.LND.FRST.ZS     Forest area (% of land area)
AG.PRD.FOOD.XD     Food production index (2014-2016 = 100)
CC.EST             Control of Corruption: Estimate
EG.CFT.ACCS.ZS     Access to clean fuels and technologies for cooking (% of population)
EG.EGY.PRIM.PP.KD  Energy intensity level of primary energy (MJ/$2017 PPP GDP)
EG.ELC.ACCS.ZS     Access to electricity (% of population)
EG.ELC.COAL.ZS     Electricity production from coal sources (% of total)
EG.ELC.RNEW.ZS     Renewable electricity output (% of total electricity output)
EG.FEC.RNEW.ZS     Renewable energy consumption (% of total final energy consumption)
EG.IMP.CONS.ZS     Energy imports, net (% of energy use)
EG.USE.COMM.FO.ZS  Fossil fuel energy consumption (% of total)
EG.USE.P

In [25]:
mig_away = data[data['Net Migration'] < 0]
mig_away['Net Migration'] = mig_away['Net Migration'].abs()
mig_away['Migration per Capita'] = mig_away['Net Migration']/mig_away['Total Population']
mig_away['Migration Rate (%)'] = mig_away['Migration per Capita']*100
mig_away

Unnamed: 0_level_0,Unnamed: 1_level_0,GDP per capita,Death Probability (5-9),Death Probability (20-24),Total Fertility Rate,Population Growth Rate,Male Mortality,Female Mortality,% Adult Female,% Female,Net Migration,Total Population,Migration per Capita,Migration Rate (%)
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Africa Eastern and Southern,2021,1545.613215,15.237635,13.303625,4.354709,2.607472,332.823489,237.602676,55.774440,50.478916,179444.0,702977106.0,0.000255,0.025526
Africa Eastern and Southern,2020,1355.805923,15.549840,13.464270,4.416900,2.678184,311.485254,224.232001,55.549322,50.486544,48955.0,685112979.0,0.000071,0.007146
Africa Eastern and Southern,2019,1507.982881,15.878293,13.663853,4.482898,2.691134,302.772370,218.643251,55.348700,50.495993,187410.0,667242986.0,0.000281,0.028087
Africa Eastern and Southern,2018,1558.307482,16.214853,13.879416,4.527705,2.688371,309.165595,224.119316,55.174578,50.504887,366105.0,649757148.0,0.000563,0.056345
Africa Eastern and Southern,2017,1625.286236,16.545213,14.129777,4.570409,2.655672,314.493882,231.688216,55.018897,50.515859,343075.0,632746570.0,0.000542,0.054220
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,1964,282.376856,,,7.261000,3.119878,358.796000,289.764000,48.293704,50.592112,10064.0,4310332.0,0.002335,0.233485
Zimbabwe,1963,277.532515,,,7.253000,3.115408,361.354000,292.298000,48.477738,50.571874,9369.0,4177931.0,0.002242,0.224250
Zimbabwe,1962,275.966139,,,7.255000,3.105320,366.062000,295.259000,48.906566,50.550845,8931.0,4049778.0,0.002205,0.220531
Zimbabwe,1961,279.332656,,,7.233000,3.094865,369.918000,298.708000,49.274670,50.528305,8582.0,3925952.0,0.002186,0.218597


In [29]:
# getting population dataset 

indicators = {"SP.POP.TOTL": "Total Population", "SM.POP.NETM":"Net Migration"}

data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)
data

Unnamed: 0_level_0,date,Total Population,Net Migration
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa Eastern and Southern,2022,720859132.0,
Africa Eastern and Southern,2021,702977106.0,-179444.0
Africa Eastern and Southern,2020,685112979.0,-48955.0
Africa Eastern and Southern,2019,667242986.0,-187410.0
Africa Eastern and Southern,2018,649757148.0,-366105.0
...,...,...,...
Zimbabwe,1964,4310332.0,-10064.0
Zimbabwe,1963,4177931.0,-9369.0
Zimbabwe,1962,4049778.0,-8931.0
Zimbabwe,1961,3925952.0,-8582.0


In [21]:
var_labels = {"SM.POP.NETM":"Net Migration"}
world_migration = wbdata.get_dataframe(var_labels)
world_migration_np = world_migration.dropna()
population_statistics = world_migration_np.groupby('country').agg({
    'Net Migration': ['sum', 'mean']
}).reset_index()
world_migration_np = population_statistics[population_statistics['Net Migration']<0]
world_migration_np

ValueError: cannot join with no overlapping index names

In [None]:
population_statistics = world_migration_np.groupby('country').agg({
    'Net Migration': ['sum', 'mean']
}).reset_index()
population_statistics

In [28]:
mig_away = mig_away.dropna()
population_statistics = mig_away.groupby('country').agg({
    'Migration per Capita': ['sum', 'mean']
}).reset_index()
population_statistics

Unnamed: 0_level_0,country,Migration per Capita,Migration per Capita
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
0,Afghanistan,0.072475,0.007247
1,Africa Eastern and Southern,0.025730,0.000858
2,Africa Western and Central,0.019213,0.000640
3,Albania,0.424827,0.013276
4,Algeria,0.029802,0.000961
...,...,...,...
192,Viet Nam,0.017016,0.000608
193,West Bank and Gaza,0.155771,0.006490
194,"Yemen, Rep.",0.075420,0.002357
195,Zambia,0.046117,0.004192


In [5]:
data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)

mig_away = data[data['Net Migration'] < 0]
mig_away['Net Migration'] = mig_away['Net Migration'].abs()
mig_away['Migration per Capita'] = mig_away['Net Migration']/mig_away['Total Population']
mig_away['Migration Rate (%)'] = mig_away['Migration per Capita']*100

NameError: name 'indicators' is not defined

#### Population Statistics

In [None]:
def population(year=1966, sex='Male', age_range=(18,26), place='wrld'):
    print("Migration Rate", Migration Rate (%), "Total Population", SP.POP.TOTL)