## Project 1: World Migration, Causes and Associations

###### Group Evelyn Kitagawa: Neha Lala, Jackie Schneider, Himalia Joshi, Monica Wilson, Lea Yamashiro, Kevin Dunn

In [1]:
%%capture
!pip install wbdata
!pip install pandas
import wbdata
!pip install cufflinks # IF NECESSARY
import cufflinks as cf
cf.go_offline()
!pip install pandas
import pandas as pd
import numpy as np
import statsmodels.api as sm

#### Cleaning Data

In [2]:
SOURCE=75
esg_indic = wbdata.get_indicator(source=SOURCE)
esg_indic

id                 name
-----------------  ---------------------------------------------------------------------------------------------------------
AG.LND.AGRI.ZS     Agricultural land (% of land area)
AG.LND.FRLS.HA     Tree Cover Loss (hectares)
AG.LND.FRST.ZS     Forest area (% of land area)
AG.PRD.FOOD.XD     Food production index (2014-2016 = 100)
CC.EST             Control of Corruption: Estimate
EG.CFT.ACCS.ZS     Access to clean fuels and technologies for cooking (% of population)
EG.EGY.PRIM.PP.KD  Energy intensity level of primary energy (MJ/$2017 PPP GDP)
EG.ELC.ACCS.ZS     Access to electricity (% of population)
EG.ELC.COAL.ZS     Electricity production from coal sources (% of total)
EG.ELC.RNEW.ZS     Renewable electricity output (% of total electricity output)
EG.FEC.RNEW.ZS     Renewable energy consumption (% of total final energy consumption)
EG.IMP.CONS.ZS     Energy imports, net (% of energy use)
EG.USE.COMM.FO.ZS  Fossil fuel energy consumption (% of total)
EG.USE.P

In [3]:
# getting population dataset 

indicators = {"SP.POP.TOTL": "Total Population", "SM.POP.NETM":"Net Migration"}

data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)
data

Unnamed: 0_level_0,date,Total Population,Net Migration
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa Eastern and Southern,2022,720859132.0,
Africa Eastern and Southern,2021,702977106.0,-179444.0
Africa Eastern and Southern,2020,685112979.0,-48955.0
Africa Eastern and Southern,2019,667242986.0,-187410.0
Africa Eastern and Southern,2018,649757148.0,-366105.0
...,...,...,...
Zimbabwe,1964,4310332.0,-10064.0
Zimbabwe,1963,4177931.0,-9369.0
Zimbabwe,1962,4049778.0,-8931.0
Zimbabwe,1961,3925952.0,-8582.0


In [4]:
data_mig = data[data['Net Migration'] < 0] #takes all negative values 
data_mig['Net Migration'] = data_mig['Net Migration'].abs() #takes absolute value to get magnitude rather than negative 
data_mig['Migration per Capita'] = data_mig['Net Migration']/data_mig['Total Population'] #creates new column, called 'Migration Per Capita'
data_mig['Migration Rate (%)'] = data_mig['Migration per Capita']*100  #takes migration per capita and makes it a rate 

In [5]:
data_mig

Unnamed: 0_level_0,date,Total Population,Net Migration,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Africa Eastern and Southern,2021,702977106.0,179444.0,0.000255,0.025526
Africa Eastern and Southern,2020,685112979.0,48955.0,0.000071,0.007146
Africa Eastern and Southern,2019,667242986.0,187410.0,0.000281,0.028087
Africa Eastern and Southern,2018,649757148.0,366105.0,0.000563,0.056345
Africa Eastern and Southern,2017,632746570.0,343075.0,0.000542,0.054220
...,...,...,...,...,...
Zimbabwe,1964,4310332.0,10064.0,0.002335,0.233485
Zimbabwe,1963,4177931.0,9369.0,0.002242,0.224250
Zimbabwe,1962,4049778.0,8931.0,0.002205,0.220531
Zimbabwe,1961,3925952.0,8582.0,0.002186,0.218597


In [12]:
indicators = {"SP.POP.TOTL": "Total Population", 
                      "SM.POP.NETM":"Net Migration", 
                      "EN.LND.LTMP.DC": "Land Surface Temperature", 
                      "AG.PRD.FOOD.XD": "Food Production Index", 
                      "EN.POP.DNST": "Population per sq km", 
                      "SN.ITK.DEFC.ZS": "Prevalence of Undernourishment (% of Population)",
                      "EG.ELC.COAL.ZS": "Electricity production from coal sources"} 
                      
#"ER.H20.FWST.ZS": "Fresh Water Stress (withdrawal prop. avail. resource)", 

mig_env = wbdata.get_dataframe(indicators)


# Make years ints instead of strings
#mig_env.reset_index(inplace=True)
#mig_env['date'] = mig_env['date'].astype(int)

# index the table by country instead of year 
#mig_env.set_index(['country'],inplace=True)


#mig_env

In [58]:
#table cleaning for environmental data

data_mig_env = mig_env[mig_env['Net Migration'] < 0] #takes all negative values 
data_mig_env['Net Migration'] = data_mig_env['Net Migration'].abs() #takes absolute value to get magnitude rather than negative 
data_mig_env['Migration per Capita'] = data_mig_env['Net Migration']/data_mig_env['Total Population'] #creates new column, called 'Migration Per Capita'
data_mig_env['Migration Rate (%)'] = data_mig_env['Migration per Capita']*100  #takes migration per capita and makes it a rate 

In [15]:
# function assumes the inputted dataset already has columns named "Net Migration" and "Total Population"

def migration_cleaning(data1):
    
    # Make years ints instead of strings
    data1.reset_index(inplace=True)
    data1['date'] = data1['date'].astype(int)

    # index the table by country instead of year 
    data1.set_index(['country'],inplace=True)
    
    mig_table = data1[data1['Net Migration'] < 0] 
    #takes all negative values, we want migration AWAY
    
    mig_table['Net Migration'] = mig_table['Net Migration'].abs() 
    #takes absolute value to get magnitude rather than negative 
    
    mig_table['Migration per Capita'] = mig_table['Net Migration']/mig_table['Total Population'] 
    #creates new column, called 'Migration Per Capita'
    
    mig_table['Migration Rate (%)'] = mig_table['Migration per Capita']*100  
    #takes migration per capita and makes it a rate 

    return mig_table

In [16]:
migration_cleaning(mig_env)

Unnamed: 0_level_0,date,Total Population,Net Migration,Land Surface Temperature,Food Production Index,Population per sq km,Prevalence of Undernourishment (% of Population),Electricity production from coal sources,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,1971,11015857.0,12801.0,,48.72,16.889528,,,0.001162,0.116205
Afghanistan,1972,11286753.0,4050.0,,51.98,17.304866,,,0.000359,0.035883
Afghanistan,1973,11575305.0,3030.0,,55.88,17.747275,,,0.000262,0.026176
Afghanistan,1974,11869879.0,20009.0,,57.56,18.198916,,,0.001686,0.168570
Afghanistan,1975,12157386.0,44418.0,,59.20,18.639722,,,0.003654,0.365358
...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,2017,14751101.0,59918.0,30.437847,106.55,38.131320,36.3,,0.004062,0.406193
Zimbabwe,2018,15052184.0,59918.0,32.686932,108.86,38.909614,38.2,,0.003981,0.398068
Zimbabwe,2019,15354608.0,59918.0,33.812884,105.55,39.691374,38.9,,0.003902,0.390228
Zimbabwe,2020,15669666.0,29955.0,32.436873,109.60,40.505793,39.1,,0.001912,0.191166


In [None]:
migration_cleaning()

In [54]:
#"ER.H20.FWST.ZS": "Fresh Water Stress (withdrawal prop. avail. resource)", 
# waterstress = wbdata.get_dataframe('ER.H20.FWST.ZS')
# this variable is messed up 

In [18]:
indicators_pol = {"SP.POP.TOTL": "Total Population", "SM.POP.NETM":"Net Migration",
                      "SE.XPD.TOTL.GB.ZS": "Government Expenditure on Education (% of Total Expenditure)", 
                      "PV.EST": "Political Stability and Absence of Violence/Terrorism (estimate)", 
                      "IC.LGL.CRED.XQ": "Strength of Legal Rights Index (0-12 Scale)", 
                      "EG.ELC.ACCS.ZS": "Access to electricity (% of population)", 
                      "GE.EST": "Government Effectiveness Estimate", 
                      "CC.EST": "Control of Corruption: Estimate", 
                      "GB.XPD.RSDV.GD.ZS": "Research and development expenditure (% of GDP)", 
                      "SI.DST.FRST.20": "Income Share Held by Lowest 20%"} 

mig_pol = wbdata.get_dataframe(indicators_pol)

In [36]:
wbdata.get_dataframe?

In [19]:
migration_cleaning(mig_pol)

Unnamed: 0_level_0,date,Total Population,Net Migration,Government Expenditure on Education (% of Total Expenditure),Political Stability and Absence of Violence/Terrorism (estimate),Strength of Legal Rights Index (0-12 Scale),Access to electricity (% of population),Government Effectiveness Estimate,Control of Corruption: Estimate,Research and development expenditure (% of GDP),Income Share Held by Lowest 20%,Migration per Capita,Migration Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Africa Eastern and Southern,2021,702977106.0,179444.0,15.516190,,,48.103609,,,,,0.000255,0.025526
Africa Eastern and Southern,2020,685112979.0,48955.0,15.501787,,,46.268621,,,,,0.000071,0.007146
Africa Eastern and Southern,2019,667242986.0,187410.0,15.291865,,4.538462,44.389773,,,,,0.000281,0.028087
Africa Eastern and Southern,2018,649757148.0,366105.0,17.221012,,4.538462,43.028332,,,,,0.000563,0.056345
Africa Eastern and Southern,2017,632746570.0,343075.0,16.480368,,4.307692,40.197332,,,,,0.000542,0.054220
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,1964,4310332.0,10064.0,,,,,,,,,0.002335,0.233485
Zimbabwe,1963,4177931.0,9369.0,,,,,,,,,0.002242,0.224250
Zimbabwe,1962,4049778.0,8931.0,,,,,,,,,0.002205,0.220531
Zimbabwe,1961,3925952.0,8582.0,,,,,,,,,0.002186,0.218597


In [None]:
mig_pollution_indicators = {"EN.ATM.PM25.MC.M3": "PM 2.5 Mean Annual Exposure, micgrograms/cm^3", ...}

In [3]:
var_labels = {"SM.POP.NETM":"Net Migration"}
world_migration = wbdata.get_dataframe(var_labels)
world_migration_np = world_migration.dropna()
population_statistics = world_migration_np.groupby('country').agg({
    'Net Migration': ['sum', 'mean']
}).reset_index()
population_statistics



Unnamed: 0_level_0,country,Net Migration,Net Migration
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
0,Afghanistan,-2438634.0,-39332.806452
1,Africa Eastern and Southern,-10018348.0,-161586.258065
2,Africa Western and Central,-8981611.0,-144864.693548
3,Albania,-1395489.0,-22507.887097
4,Algeria,-853556.0,-13767.032258
...,...,...,...
260,West Bank and Gaza,-1052497.0,-16975.758065
261,World,0.0,0.000000
262,"Yemen, Rep.",-2206494.0,-35588.612903
263,Zambia,-542949.0,-8757.241935


In [5]:
world_migration_np = world_migration_np[world_migration_np['Net Migration'] < 0]
world_migration_np

Unnamed: 0_level_0,Unnamed: 1_level_0,Net Migration
country,date,Unnamed: 2_level_1
Africa Eastern and Southern,2021,-179444.0
Africa Eastern and Southern,2020,-48955.0
Africa Eastern and Southern,2019,-187410.0
Africa Eastern and Southern,2018,-366105.0
Africa Eastern and Southern,2017,-343075.0
...,...,...
Zimbabwe,1964,-10064.0
Zimbabwe,1963,-9369.0
Zimbabwe,1962,-8931.0
Zimbabwe,1961,-8582.0


In [4]:
df.sort_values?

Object `df.sort_values` not found.


In [5]:
data = wbdata.get_dataframe(indicators)

# Make years ints instead of strings
data.reset_index(inplace=True)
data['date'] = data['date'].astype(int)

# index the table by country instead of year 
data.set_index(['country'],inplace=True)

mig_away = data[data['Net Migration'] < 0]
mig_away['Net Migration'] = mig_away['Net Migration'].abs()
mig_away['Migration per Capita'] = mig_away['Net Migration']/mig_away['Total Population']
mig_away['Migration Rate (%)'] = mig_away['Migration per Capita']*100

NameError: name 'indicators' is not defined

#### Population Statistics

In [None]:
def population(year=1966, sex='Male', age_range=(18,26), place='wrld'):
    print("Migration Rate", Migration Rate (%), "Total Population", SP.POP.TOTL)