#### Getting date from the World Bank to build a dataframe

In [79]:
# here's where our code goes

In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot
import seaborn as sns

from functools import reduce

-----
## Read in and Aggregate Agriculture/Rural Development Data

In [25]:
def world_bank_csv(name, csv_filepath, indicator_name):
    '''
    Reads in a CSV file from World Bank
    Drops unnecessary columns and renames
    Selects 2017 column
    preps to be merged with other dataframes
    '''
    name = pd.read_csv(csv_filepath, skiprows = 4)
    name.drop(columns = ['Unnamed: 66'], inplace = True)
    
    #selecting useful columns
    name = name[['Country Name','Country Code','2017']].copy()
    
    # rename to specify 
    name.rename(columns = {'2017': indicator_name}, inplace = True)
    
    return name

In [26]:
rural_pop_17 = world_bank_csv('rural_pop_17', '../datasets/API_SP.RUR.TOTL.ZS_DS2_en_csv_v2_4261416.csv', 'rural_pop_percent_2017')

In [27]:
rural_pop_17

Unnamed: 0,Country Name,Country Code,rural_pop_percent_2017
0,Aruba,ABW,56.707000
1,Africa Eastern and Southern,AFE,64.667627
2,Afghanistan,AFG,74.750000
3,Africa Western and Central,AFW,53.980666
4,Angola,AGO,35.161000
...,...,...,...
261,Kosovo,XKX,
262,"Yemen, Rep.",YEM,63.984000
263,South Africa,ZAF,34.150000
264,Zambia,ZMB,57.024000


In [28]:
food_production_index_17 = world_bank_csv('food_production_index_17', '../datasets/API_AG.PRD.FOOD.XD_DS2_en_csv_v2_4254742.csv', 'food_prod_index_2017')

In [29]:
ag_land_area_17 = world_bank_csv('ag_land_area_17', '../datasets/API_AG.LND.AGRI.ZS_DS2_en_csv_v2_4254639.csv', 'ag_land_percent_2017')

In [30]:
arable_land_area_17 = world_bank_csv('arable_land_area_17', '../datasets/API_AG.LND.ARBL.ZS_DS2_en_csv_v2_4252676.csv', 'arable_land_percent_2017')

In [31]:
# the reduce function code was adapted from everestial007 response on stack overflow (https://stackoverflow.com/questions/44327999/python-pandas-merge-multiple-dataframes)
data_frames = [arable_land_area_17, ag_land_area_17, food_production_index_17, rural_pop_17]

ag_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

In [47]:
ag_merged.head()

Unnamed: 0,Country Name,Country Code,arable_land_percent_2017,ag_land_percent_2017,food_prod_index_2017,rural_pop_percent_2017
0,Aruba,ABW,11.111111,11.111111,,56.707
1,Africa Eastern and Southern,AFE,8.231181,44.042629,,64.667627
2,Afghanistan,AFG,11.792727,58.06758,99.169998,74.75
3,Africa Western and Central,AFW,11.224018,39.930699,,53.980666
4,Angola,AGO,3.930376,45.237371,103.419998,35.161


--------
## Read in and Aggregate Aid Data

In [40]:
net_migration = world_bank_csv('net_migration', '../datasets/API_SM.POP.NETM_DS2_en_csv_v2_4250793.csv', 'net_migration_2017')

In [41]:
hiv_prevalence = world_bank_csv('hiv_prevalence', '../datasets/API_SH.DYN.AIDS.ZS_DS2_en_csv_v2_4250791.csv','hiv_prevalence_2017')

In [42]:
maternal_mort_ratio = world_bank_csv('maternal_mort_ratio', '../datasets/API_SH.STA.MMRT_DS2_en_csv_v2_4252399.csv', 'mat_mortality_ratio_2017')

In [43]:
u5_mort_ratio = world_bank_csv('u5_mort_ratio', '../datasets/API_SH.DYN.MORT_DS2_en_csv_v2_4252415.csv', 'under5_mortality_ratio_2017')

In [44]:
tuberculosis = world_bank_csv('tuberculosis', '../datasets/API_SH.TBS.INCD_DS2_en_csv_v2_4250622.csv', 'tubercul_incidence_2017')

In [45]:
# merging the data frames into combined for aid category
data_frames = [net_migration, hiv_prevalence, maternal_mort_ratio, u5_mort_ratio, tuberculosis]

aid_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

In [46]:
aid_merged.head()

Unnamed: 0,Country Name,Country Code,net_migration_2017,hiv_prevalence_2017,mat_mortality_ratio_2017,under5_mortality_ratio_2017,tubercul_incidence_2017
0,Aruba,ABW,1004.0,,,,8.7
1,Africa Eastern and Southern,AFE,-791282.0,5.371653,398.0,64.243994,
2,Afghanistan,AFG,-314602.0,0.1,638.0,64.8,189.0
3,Africa Western and Central,AFW,-1022839.0,1.563411,717.0,102.032086,
4,Angola,AGO,32066.0,1.9,241.0,80.6,359.0


---------
## Read in and Aggregate Energy and Environment Data

In [48]:
electric_access = world_bank_csv('electric_access', '../datasets/API_SP.RUR.TOTL.ZS_DS2_en_csv_v2_4261416.csv', 'elec_access_2017')

In [49]:
renew_consumption = world_bank_csv('renew_consumption', '../datasets/API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_4251598.csv', 'ren_energy_percent_2017')

In [50]:
ff_consumption = world_bank_csv('ff_consumption', '../datasets/API_EG.USE.COMM.FO.ZS_DS2_en_csv_v2_4250919.csv', 'ffuel_energy_percent_2017')

In [51]:
co2_emissions = world_bank_csv('co2_emissions', '../datasets/API_EN.ATM.CO2E.PC_DS2_en_csv_v2_4251354.csv', 'co2_emissions_2017')

In [52]:
pop_exposed_air_pollution = world_bank_csv('pop_exposed_air_pollution', '../datasets/API_EN.ATM.PM25.MC.M3_DS2_en_csv_v2_4251710.csv', 'pop_air_pollution_2017')

In [53]:
# merging the data frames into combined for aid category
data_frames = [electric_access, renew_consumption, ff_consumption, co2_emissions, pop_exposed_air_pollution]

engy_env_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

In [54]:
engy_env_merged.head()

Unnamed: 0,Country Name,Country Code,elec_access_2017,ren_energy_percent_2017,ffuel_energy_percent_2017,co2_emissions_2017,pop_air_pollution_2017
0,Aruba,ABW,56.707,6.7,,,
1,Africa Eastern and Southern,AFE,64.667627,,,0.937926,35.512988
2,Afghanistan,AFG,74.75,19.21,,0.131695,56.910808
3,Africa Western and Central,AFW,53.980666,,,0.48314,58.064482
4,Angola,AGO,35.161,56.25,,0.813301,32.388505


------
## Read in and Aggregate Financial Data

In [63]:
foreign_dir_inv = world_bank_csv('foreign_dir_inv', '../datasets/API_BX.KLT.DINV.CD.WD_DS2_en_csv_v2_4250821.csv', 'foreign_dir_inv_2017')

In [64]:
atms = world_bank_csv('atms', '../datasets/API_FB.ATM.TOTL.P5_DS2_en_csv_v2_4260830.csv', 'atm_access_2017')

In [65]:
legal_rights_strength = world_bank_csv('legal_rights_strength', '../datasets/API_IC.LGL.CRED.XQ_DS2_en_csv_v2_4261026.csv', 'legal_rights_index_2017')

In [66]:
# merging the data frames into combined for aid category
data_frames = [foreign_dir_inv, atms, legal_rights_strength]

finance_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

------
## Read in and Aggregate Gender Data

In [69]:
adol_fertility_rate = world_bank_csv('adol_fertility_rate', '../datasets/API_SP.ADO.TFRT_DS2_en_csv_v2_4252410.csv', 'adol_fertility_rate_2017')

In [70]:
fem_labor_participation = world_bank_csv('fem_labor_participation', '../datasets/API_SL.TLF.CACT.FE.ZS_DS2_en_csv_v2_4250833.csv', 'fem_labor_part_rate_2017')

In [71]:
male_labor_participation = world_bank_csv('male_labor_participation', '../datasets/API_SL.TLF.CACT.MA.ZS_DS2_en_csv_v2_4251123.csv', 'male_labor_part_rate_2017')

In [72]:
fertility_rate = world_bank_csv('fertility_rate', '../datasets/API_SP.DYN.TFRT.IN_DS2_en_csv_v2_4252390.csv', 'fertility_rate_2017')

In [73]:
# merging the data frames into combined for aid category
data_frames = [adol_fertility_rate, fertility_rate, fem_labor_participation, male_labor_participation]

gender_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

In [74]:
gender_merged.head()

Unnamed: 0,Country Name,Country Code,adol_fertility_rate_2017,fertility_rate_2017,fem_labor_part_rate_2017,male_labor_part_rate_2017
0,Aruba,ABW,22.674,1.886,,
1,Africa Eastern and Southern,AFE,96.146492,4.493744,66.125922,77.021661
2,Afghanistan,AFG,68.957,4.633,20.886999,72.182999
3,Africa Western and Central,AFW,112.847046,5.182391,54.2955,66.625131
4,Angola,AGO,150.526,5.6,74.912003,79.755997


------
## Read in and Aggregate Health Data

In [81]:
dpt_immuniz_rate = world_bank_csv('dpt_immuniz_rate', '../datasets/API_SH.IMM.IDPT_DS2_en_csv_v2_4261936.csv', 'dpt_immuniz_rate_2017')

In [82]:
undernourishment = world_bank_csv('undernourishment', '../datasets/API_SN.ITK.DEFC.ZS_DS2_en_csv_v2_4251163.csv', 'undernourished_rate_2017')

In [83]:
# merging the data frames into combined for aid category
data_frames = [dpt_immuniz_rate, undernourishment]

health_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

------
## Read in and Aggregate Infrastructure Data

In [86]:
cell_subscriptions_per100 = world_bank_csv('cell_subscriptions_per100', '../datasets/API_IT.CEL.SETS.P2_DS2_en_csv_v2_4251970.csv', 'cell_subscriptions_per100')

In [87]:
secure_internet_per_mil = world_bank_csv('secure_internet_per_mil', '../datasets/API_IT.CEL.SETS.P2_DS2_en_csv_v2_4251970.csv', 'internet_per_mil_2017')

In [88]:
# merging the data frames into combined for aid category
data_frames = [cell_subscriptions_per100, secure_internet_per_mil]

infra_merged = reduce(lambda left,right: pd.merge(left,right,on=['Country Code', 'Country Name'],
                                            how='outer'), data_frames)

------
## Read in and Aggregate Private Sector Data

------
## Read in and Aggregate Public Sector Data

----
## Read in and Aggregate Science, Technology and Social Development Data

----
## Read in and Aggregate Social Protection & Labor Data

----
## Read in and Aggregate Urban Development Data

-----
*Aggregate into one dataframe for further cleaning*