In [1]:
from data_dictionary import query_dataDict
from fileUtils import fileDownload
from sdmx import sdmx_struc
import os
import re
import pandas as pd
import numpy as np

#### Read from data_dictionary all API sources (so far only two among all indicators populated)

In [2]:
# path to excel data dictionary in repo
data_dict_file = './populate_data_dictionary/indicator_dictionary_TM_v2.xlsx'

In [3]:
# get indicators that are extracted by API (code and address in dataframe)
api_code_addr_df = query_dataDict.get_API_code_address_etc(data_dict_file)
api_code_addr_df

Unnamed: 0,Code,Address,Data_Source,Obs_Footnote
5,DM_BRTS,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: DM_BRTS,Only available for 2018
16,DM_POP_URBN,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: DM_POP_URBN,
17,DM_FRATE_TOT,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: DM_FRATE_TOT,
18,MNCH_ABR,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: MNCH_ABR,
19,NT_BW_LBW,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: NT_BW_LBW,
...,...,...,...,...
167,PT_CHLD_1-14_PS-PSY-V_CGVR,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: PT_CHLD_1-14_PS-PSY-V_CGVR,Given as PERCENTAGE in Helix
168,ECD_CHLD_U5_LFT-ALN,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: ECD_CHLD_U5_LFT-ALN,Given as PERCENTAGE in Helix
169,GN_MTNTY_LV_BNFTS,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: GN_MTNTY_LV_BNFTS,In Helix: gender female
170,GN_PTNTY_LV_BNFTS,https://sdmx.data.unicef.org/ws/public/sdmxapi...,Helix: GN_PTNTY_LV_BNFTS,In Helix: gender male


#### Get Unicef DataFlow TransMonEE country list

In [4]:
# UNICEF’s REST API data endpoint for TransMonEE Dataflow
url_endpoint = 'https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/ECARO,TRANSMONEE,1.0/'

In [5]:
# address for dataflow structure request
api_address = url_endpoint + 'all'
# parameters for dataflow structure request
api_params = {'format':'sdmx-json', 'detail':'structureOnly'}

# API request (dataflow structure)
d_flow_struc = fileDownload.api_request(api_address,api_params)

In [6]:
# all TransMonEE country codes are taken from its dataflow (requested by Eduard)
country_codes = sdmx_struc.get_all_country_codes(d_flow_struc.json())
# Join string of all countries for SDMX requests
country_call = '+'.join(country_codes.values())

In [7]:
# get country codes equivalence from 'all_countrynames_list.xlsx'
country_codes_file = "all_countrynames_list.xlsx"
country_codes_df = pd.read_excel(country_codes_file)

In [8]:
# map country_codes (three-letters/two-letters equivalence)
country_codes_2 = [country_codes_df.CountryIso2[country_codes_df.CountryIso3 == elem].values
                   for elem in country_codes.values()]
country_codes_2 = np.unique(np.concatenate(country_codes_2))
# Join string of all countries for SDMX requests
country_call_2 = '+'.join(country_codes_2)

In [9]:
# TransMonEE UIS API Key
uis_key = "9d48382df9ad408ca538352a4186791b"

#### Loop pandas dataframe `api_code_addr_df` and download raw data for all API sources therein

In [10]:
# raw data destination path
raw_path = './data_raw/'

In [11]:
# this is a loop along the `api_code_addr_df` pandas dataframe
for index, row in api_code_addr_df.iterrows():
    
    # sanity check on strings: strip leading and ending spaces
    url_endpoint = row['Address'].strip()
    indicator_code = row['Code'].strip()
    indicator_source = row['Data_Source'].strip()
    # get source_key from indicator_source
    pattern = "(.*?):"
    source_key = re.findall(pattern, indicator_source)[0].strip()
    
    print(f"Dealing with indicator: {indicator_code}")
    
    # Skip extraction if indicator already downloaded
    # This skip would need extra info to be executed for update purposes!
    # File names could include the year of execution?
    if os.path.exists(f"{raw_path}{indicator_code}.csv"):
        print(f"Indicator {indicator_code} skipped (already downloaded)")
        # skip loop to next file
        continue
    
    # separate here how addresses are built:
    # source_key: helix (reads dataflows DSD)
    # source_key: UIS (no dataflow DSD read, only countries added to address / api key to parameters)
    
    if source_key.lower() == 'helix':
        
        # first get dataflow number of dimensions
        api_address = url_endpoint + 'all'
        api_params = {'format':'sdmx-json', 'detail':'structureOnly'}
        d_flow_struc = fileDownload.api_request(api_address,api_params)
        n_dim = sdmx_struc.get_sdmx_dim(d_flow_struc.json())

        # prepare to dowload data (assumes all dataflows have the same country codes)
        api_address = url_endpoint + country_call + '.' + indicator_code + '.'*(n_dim-2)
        api_params = {'startPeriod':'1950', 'endPeriod':'2050', 'locale':'en'}
        
    else:
        
        # prepare to dowload data (already know UIS have two-letter country codes)
        api_address = url_endpoint + country_call_2
        api_params = {'startPeriod':'1950', 'endPeriod':'2050', 'locale':'en', 'subscription-key':uis_key}
    
    api_headers = {'Accept':'application/vnd.sdmx.data+csv;version=1.0.0', 'Accept-Encoding':'gzip'}
    
    # now request indicator raw data
    indicator_raw = fileDownload.api_request(api_address,api_params,api_headers)
    
    # write raw data to destination file if requests satisfactory
    if indicator_raw.status_code == 200:
        dest_file = f"{raw_path}{indicator_code}.csv"
        with open(dest_file, 'wb') as f:
            f.write(indicator_raw.content)
        print(f"Indicator {indicator_code} succesfully downloaded")

Dealing with indicator: DM_BRTS
Indicator DM_BRTS skipped (already downloaded)
Dealing with indicator: DM_POP_URBN
Indicator DM_POP_URBN skipped (already downloaded)
Dealing with indicator: DM_FRATE_TOT
Indicator DM_FRATE_TOT skipped (already downloaded)
Dealing with indicator: MNCH_ABR
HTTP error occurred: 404 Client Error: 404 for url: https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,MNCH,1.0/ALB+ARM+AZE+BLR+BIH+BGR+HRV+CZE+EST+GEO+HUN+KAZ+KGZ+LVA+LTU+MNE+MKD+POL+MDA+ROU+RUS+SRB+SVK+SVN+TJK+TUR+TKM+UKR+UZB.MNCH_ABR........?startPeriod=1950&endPeriod=2050&locale=en
Dealing with indicator: NT_BW_LBW
Indicator NT_BW_LBW skipped (already downloaded)
Dealing with indicator: DM_LIFE_EXP
Indicator DM_LIFE_EXP skipped (already downloaded)
Dealing with indicator: CME_MRY0
Indicator CME_MRY0 skipped (already downloaded)
Dealing with indicator: CME_MRY0T4
Indicator CME_MRY0T4 skipped (already downloaded)
Dealing with indicator: CME_MRM0
Indicator CME_MRM0 skipped (already downloa

Indicator EDU_FIN_PPP_CONST succesfully downloaded
Dealing with indicator: EDU_FIN_GOV_EXP_L02
Indicator EDU_FIN_GOV_EXP_L02 skipped (already downloaded)
Dealing with indicator: EDU_FIN_GOV_EXP_L1
Indicator EDU_FIN_GOV_EXP_L1 skipped (already downloaded)
Dealing with indicator: EDU_FIN_GOV_EXP_EDU_L2
Indicator EDU_FIN_GOV_EXP_EDU_L2 skipped (already downloaded)
Dealing with indicator: EDU_FIN_GOV_EXP_EDU_L3
Indicator EDU_FIN_GOV_EXP_EDU_L3 skipped (already downloaded)
Dealing with indicator: EDU_FIN_GOV_EXP_EDU_L4
Indicator EDU_FIN_GOV_EXP_EDU_L4 skipped (already downloaded)
Dealing with indicator: EDU_FIN_GOV_EXP_EDU_L5T8
Indicator EDU_FIN_GOV_EXP_EDU_L5T8 skipped (already downloaded)
Dealing with indicator: EDU_SDG_SCH_L1
Indicator EDU_SDG_SCH_L1 skipped (already downloaded)
Dealing with indicator: EDU_SDG_SCH_L2
Indicator EDU_SDG_SCH_L2 skipped (already downloaded)
Dealing with indicator: EDU_SDG_SCH_L3
Indicator EDU_SDG_SCH_L3 skipped (already downloaded)
Dealing with indicator: ED