# This code reads out various data sets and compares data availabily across several data sets

In [1]:
import os
import pandas as pd
import numpy as np
from pandas_datareader import wb
%load_ext autoreload
%autoreload 2

In [2]:
cwd = os.getcwd()
os.chdir("../git/wb_api_wrapper/")
from  wb_api_wrapper import * #Import functions to obtain World Bank data from the web and code to rename country names
os.chdir(cwd) #Get back to initial working directory

Get a dictionaty translating country spellings to the spelling usually used by the World Bank data sets

In [3]:
any_name_to_wb_name = pd.read_csv("../git/country_names/out/any_name_to_wb_name.csv",index_col="any",squeeze=True, encoding="utf-8")

The goal of this Notebook is to create a matrix of countries and the availability of indicators relevant for assessing resilience. We start off with loading some work which has been done already, contained in the file have_and_have_nots.csv.

In [5]:
have_nothave = pd.read_csv('have_and_have_nots.csv', index_col=0, squeeze=True, encoding="utf-8")
have_nothave = have_nothave.rename(index=any_name_to_wb_name) # This is a sweet one-line command to rename the index using a dictionary-like translation, in this case an_name_to_wb_name
have_nothave.head(2)

Unnamed: 0_level_0,Code,Region,Income group,Lending category,HIPC,Data,missing_data
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,AFG,South Asia,Low income,IDA,HIPC,0.0,"plgp, avg_prod_k"
Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,,1.0,


# Now obtain further indicators from locally saved csv files. See the Notebook pull_data_wb

In [6]:
wb_data_inc20 = pd.read_csv('wb_data_raw/WDI_SI.DST.FRST.20.csv', encoding='utf-8')
wb_data_inc20.head(3)

Unnamed: 0,country,Income share of bootom 20% [SI.DST.FRST.20],year
0,Albania,0.0885,2012
1,Algeria,0.0696,1995
2,Angola,0.0543,2008


Concatenate, i.e. add, the new data to the existing data matrix

In [7]:
data_matrix = pd.concat([have_nothave, wb_data_inc20], axis=1)
data_matrix.head(8)

  result = result.union(other)


Unnamed: 0,Code,Region,Income group,Lending category,HIPC,Data,missing_data,country,Income share of bootom 20% [SI.DST.FRST.20],year
Afghanistan,AFG,South Asia,Low income,IDA,HIPC,0.0,"plgp, avg_prod_k",,,
Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,,1.0,,,,
Algeria,DZA,Middle East & North Africa,Upper middle income,IBRD,,0.0,"share1, social_p, social_r, avg_prod_k, share1...",,,
American Samoa,ASM,East Asia & Pacific,Upper middle income,,,0.0,"gdp_pc_pp, share1, plgp, unemp, axfin_p, axfin...",,,
Andorra,ADO,..,High income: nonOECD,,,0.0,"gdp_pc_pp, share1, plgp, unemp, axfin_p, axfin...",,,
Angola,AGO,Sub-Saharan Africa,Upper middle income,IBRD,,0.0,"social_p, social_r",,,
Antigua and Barbuda,ATG,..,High income: nonOECD,IBRD,,0.0,"share1, unemp, axfin_p, axfin_r, social_p, soc...",,,
Argentina,ARG,Latin America & Caribbean,Upper middle income,IBRD,,1.0,,,,


## Now load finance access data from FINDEX (Global Financial Inclusion Database)

In [None]:
wb_findex = pd.read_csv('wb_data_raw/FINDEX_selection.csv', index_col=0, encoding='utf-8')

Change the index to country

In [None]:
wb_findex

In [None]:
mrv(wb_findex)

### Export the list of data availability to Excel and csv

In [None]:
data_matrix.to_excel("output/data_availability.xlsx")
data_matrix.to_csv("output/data_availability.csv")

# Now select only IDA countries from data_matrix

Choose the IDA countries, which means dropping the non-IDA countries.
The cell below chooses all IDA countries, Blend countries and India and puts it into a new df called ida_countries

In [None]:
ida_countries = data_matrix.loc[data_matrix['Lending category'] == "Blend"]
ida_countries = ida_countries.append(data_matrix.loc[data_matrix['Lending category'] == "IDA"])
ida_countries = ida_countries.append(data_matrix.loc['India'])
ida_countries = ida_countries.sort_index()

Drop some of the columns which are not necessary for the IDA comparison

In [None]:
ida_countries.head(1)

In [None]:
ida_countries.drop(['Income group', 'missing_data', 'Data'], axis=1, inplace=True)

In [None]:
ida_countries.head(1)

Save the ida_countries file

Export the list of data availability to Excel and csv

In [None]:
ida_countries.to_excel("output/IDA_countries_data_availability.xlsx")
ida_countries.to_csv("output/IDA_countries_data_availability.csv")