# DHIS2 Data Download Web API - Central Server V2
## Downloading Health Indicators

In [1]:
import os
import time
import requests
import pandas as pd

In [2]:
WD = '/Users/edinhamzic/Symphony/wb_bangladesh/'
os.chdir(WD)
os.getcwd()

'/Users/edinhamzic/Symphony/wb_bangladesh'

## Organisation Units
### Organisation Unit Levels

In [3]:
organisationUnitsLevels = 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/26/organisationUnitLevels.json?paging=false'
with requests.Session() as s:
    organisationUnitsLevels_json = s.get(organisationUnitsLevels,auth=('wbbigdata_view', 'DGHS@1234#')).json()
pd.DataFrame.from_dict(organisationUnitsLevels_json['organisationUnitLevels'])

Unnamed: 0,displayName,id
0,Community Clinic and Union Level HF,P3xaO9CNrVY
1,Country,kv8G6cp7G8n
2,District and National/Divisional level HF,TY67fOo5ywC
3,Division,hrLzNYoLILu
4,Level 7,sVti5YayDAf
5,Union and Upazila level HF,IWh6WrGXOyN
6,Upazila and District level HF,WDBz0AfvG8C


### Organisation Unit Groups

In [4]:
organisationUnitGroups = 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/26/organisationUnitGroups.json?paging=false'
with requests.Session() as s:
    organisationUnitGroups_json = s.get(organisationUnitGroups,auth=('wbbigdata_view', 'DGHS@1234#')).json()
organisationUnitGroups_json = pd.DataFrame.from_dict(organisationUnitGroups_json['organisationUnitGroups'])

In [5]:
geos = {'District and National_Divisional level HF':'3',
        'Union and Upazila level HF': '5',
        'Upazila and District level HF': '4',
        'Upazila':'XpE4AlQZRkV',
        'Upazila Health Complex':'TwFxBSq8iAg',
        'District': 'PyK69cVqr5l',
        'District Hospital_General Hospital':'yiUFzYnD1f9',
        'District NGO & Private Total': 'Uey6NprfqrE'}
geo_types = {'District and National_Divisional level HF':'LEVEL',
        'Union and Upazila level HF': 'LEVEL',
        'Upazila and District level HF': 'LEVEL',
        'Upazila':'GROUP',
        'Upazila Health Complex':'GROUP',
        'District': 'GROUP',
        'District Hospital_General Hospital':'GROUP',
        'District NGO & Private Total': 'GROUP'
            }
print(geos)
print(geo_types)

{'District and National_Divisional level HF': '3', 'Union and Upazila level HF': '5', 'Upazila and District level HF': '4', 'Upazila': 'XpE4AlQZRkV', 'Upazila Health Complex': 'TwFxBSq8iAg', 'District': 'PyK69cVqr5l', 'District Hospital_General Hospital': 'yiUFzYnD1f9', 'District NGO & Private Total': 'Uey6NprfqrE'}
{'District and National_Divisional level HF': 'LEVEL', 'Union and Upazila level HF': 'LEVEL', 'Upazila and District level HF': 'LEVEL', 'Upazila': 'GROUP', 'Upazila Health Complex': 'GROUP', 'District': 'GROUP', 'District Hospital_General Hospital': 'GROUP', 'District NGO & Private Total': 'GROUP'}


## Predefined Health Indicators

In [6]:
health_indicators = pd.read_csv('data/dhis2/health_indicators/health_indicators.csv',index_col=None)
health_indicators


Unnamed: 0,Health Indicators,IDs
0,01. Maternal Health,?dimension=dx:sjorO3gQABy;nHvaVDaRcQU;llFRYYZL...
1,02. Child Health,?dimension=dx:LwxiPaHQwif;IQ1WyeMu4PS;Pd6AH5ko...
2,03. Immunization,?dimension=dx:a2UI5138tBe;w0rSItvQO65;vHWCFOMx...
3,04. Newborn,?dimension=dx:f46jAdwYWDD;FzTEiMtiSQN;n8sW5SK2...
4,05. Logistics,?dimension=dx:t3oOvEIdZuw;gCIHrhpVM3p
5,06. Vaccine & Logistics stock of District,?dimension=dx:EI3rYMbue2l;x8f1fsbgspb;Hxwqpvzm...
6,07. Vaccine & Logistics stock of Upazila/Mun/CC,?dimension=dx:oWkwB8F0w3f;cdqjn347Zbe;MXblPCxm...
7,Antenatal Care (ANC),?dimension=dx:lqGV3xf4MnW;EMN9ubIGpGh;wpDTeNQUuEb


## Time Periods

In [7]:
years = {'2009':2009, '2010':2010, '2011':2011, '2012':2012,
         '2013':2013, '2014':2014, '2015':2015, '2016':2016,
         '2017':2017, '2018':2018,

        }
years

{'2009': 2009,
 '2010': 2010,
 '2011': 2011,
 '2012': 2012,
 '2013': 2013,
 '2014': 2014,
 '2015': 2015,
 '2016': 2016,
 '2017': 2017,
 '2018': 2018}

## Building the API queries


## Functions

In [8]:
def create_api_queries(indicators, period, geos, geo_types):
    api_queries = {}
    for i in range(len(indicators)):
        for year_item, year_value in period.items():
            for geo_item, geo_value in geos.items():
                dhis_main = 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/29/analytics.csv'
                dataElementGroups_variables= indicators['IDs'][i]
                geo_variables = f'&dimension=ou:OU_{geo_types[geo_item]}-{geo_value};'
                time_variables = f'dNLjKwsVjod&filter=pe:{year_item};LAST_12_MONTHS&displayProperty=NAME&outputIdScheme='
                url = dhis_main + dataElementGroups_variables + geo_variables + time_variables
                file_name = indicators['Health Indicators'][i] + '_' + year_item + '_' + geo_item
                file_name = file_name.replace(" ", "").replace(".", "")
                api_queries[file_name] = url
    return api_queries

def data_download(api_queries, pause_length, dest_dir):
    for file_name, api_query in api_queries.items():
        print(f"Starting the process for {file_name} files: with geo codes and names respectively")
        print(f"Constructing file path for {file_name} file")
        file_with_codes = os.path.join(dest_dir, file_name + '_CODE' + '.csv') 
        file_with_names = os.path.join(dest_dir, file_name + '_NAME' + '.csv') 
        if file_name + '_CODE' + '.csv' in os.listdir(dest_dir):
            print(f"{file_name} file already downloaded")
        else:
            with requests.Session() as s:
                print(f"Getting API query for {file_with_codes} file")
                data_codes = s.get(api_query + 'CODE', auth=('wbbigdata_view', 'DGHS@1234#'),stream=True)
                with open(file_with_codes, 'wb') as f:
                    print(file_with_codes)
                    print(f"Writing in chunks {file_with_codes} file")
                    for chunk in data_codes.iter_content(chunk_size=1024):
                        if chunk:
                            f.write(chunk)
                print(f"Pausing the process for {pause_length} seconds")
                time.sleep(pause_length)
                
        if file_name + '_NAME' + '.csv' in os.listdir(dest_dir):
            print(f"{file_name} file already downloaded")
        else:
            with requests.Session() as s:
                print(f"Restarting the process for {file_with_names} file")
                data_names = s.get(api_query + 'NAME', auth=('wbbigdata_view', 'DGHS@1234#'),stream=True)
                with open(file_with_names, 'wb') as f:
                    print(f"Writing in chunks {file_with_names} file")
                    for chunk in data_names.iter_content(chunk_size=1024):
                        if chunk:
                            f.write(chunk)
            print(f"Pausing the process for {pause_length} seconds")
            time.sleep(pause_length)


### Periods

In [9]:
years

{'2009': 2009,
 '2010': 2010,
 '2011': 2011,
 '2012': 2012,
 '2013': 2013,
 '2014': 2014,
 '2015': 2015,
 '2016': 2016,
 '2017': 2017,
 '2018': 2018}

### Geos

In [10]:
geos

{'District': 'PyK69cVqr5l',
 'District Hospital_General Hospital': 'yiUFzYnD1f9',
 'District NGO & Private Total': 'Uey6NprfqrE',
 'District and National_Divisional level HF': '3',
 'Union and Upazila level HF': '5',
 'Upazila': 'XpE4AlQZRkV',
 'Upazila Health Complex': 'TwFxBSq8iAg',
 'Upazila and District level HF': '4'}

### Geo Types

In [11]:
geo_types

{'District': 'GROUP',
 'District Hospital_General Hospital': 'GROUP',
 'District NGO & Private Total': 'GROUP',
 'District and National_Divisional level HF': 'LEVEL',
 'Union and Upazila level HF': 'LEVEL',
 'Upazila': 'GROUP',
 'Upazila Health Complex': 'GROUP',
 'Upazila and District level HF': 'LEVEL'}

### Downloading data

In [12]:
health_indicators

Unnamed: 0,Health Indicators,IDs
0,01. Maternal Health,?dimension=dx:sjorO3gQABy;nHvaVDaRcQU;llFRYYZL...
1,02. Child Health,?dimension=dx:LwxiPaHQwif;IQ1WyeMu4PS;Pd6AH5ko...
2,03. Immunization,?dimension=dx:a2UI5138tBe;w0rSItvQO65;vHWCFOMx...
3,04. Newborn,?dimension=dx:f46jAdwYWDD;FzTEiMtiSQN;n8sW5SK2...
4,05. Logistics,?dimension=dx:t3oOvEIdZuw;gCIHrhpVM3p
5,06. Vaccine & Logistics stock of District,?dimension=dx:EI3rYMbue2l;x8f1fsbgspb;Hxwqpvzm...
6,07. Vaccine & Logistics stock of Upazila/Mun/CC,?dimension=dx:oWkwB8F0w3f;cdqjn347Zbe;MXblPCxm...
7,Antenatal Care (ANC),?dimension=dx:lqGV3xf4MnW;EMN9ubIGpGh;wpDTeNQUuEb


In [13]:
dghis_queries = create_api_queries(indicators=health_indicators, 
                                   period=years, 
                                   geos=geos, 
                                   geo_types=geo_types)   

In [14]:
dghis_queries

{'01MaternalHealth_2009_District': 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/29/analytics.csv?dimension=dx:sjorO3gQABy;nHvaVDaRcQU;llFRYYZLkur;IVpv2Tj5rbj;UeBuoOKVSt9;DA4SHBTcotS;EN6VvWHuz6H;suIr6bcCKVv;ejt6ia79ixB;IQEbbjwChym;CRstBHmZrRi;pqxYwxpF3yV;eykgM4I5jQF&dimension=ou:OU_GROUP-PyK69cVqr5l;dNLjKwsVjod&filter=pe:2009;LAST_12_MONTHS&displayProperty=NAME&outputIdScheme=',
 '01MaternalHealth_2009_DistrictHospital_GeneralHospital': 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/29/analytics.csv?dimension=dx:sjorO3gQABy;nHvaVDaRcQU;llFRYYZLkur;IVpv2Tj5rbj;UeBuoOKVSt9;DA4SHBTcotS;EN6VvWHuz6H;suIr6bcCKVv;ejt6ia79ixB;IQEbbjwChym;CRstBHmZrRi;pqxYwxpF3yV;eykgM4I5jQF&dimension=ou:OU_GROUP-yiUFzYnD1f9;dNLjKwsVjod&filter=pe:2009;LAST_12_MONTHS&displayProperty=NAME&outputIdScheme=',
 '01MaternalHealth_2009_DistrictNGO&PrivateTotal': 'https://centraldhis.mohfw.gov.bd/dhismohfw/api/29/analytics.csv?dimension=dx:sjorO3gQABy;nHvaVDaRcQU;llFRYYZLkur;IVpv2Tj5rbj;UeBuoOKVSt9;DA4SHBTcotS;EN6VvWH

In [15]:
len(dghis_queries)

640

In [None]:
data_download(api_queries=dghis_queries, pause_length=45, dest_dir='data/dhis2/health_indicators/')

Starting the process for 01MaternalHealth_2009_DistrictandNational_DivisionallevelHF files: with geo codes and names respectively
Constructing file path for 01MaternalHealth_2009_DistrictandNational_DivisionallevelHF file
Getting API query for data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_UID.csv file
data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_UID.csv
Writing in chunks data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_UID.csv file
Pausing the process for 45 seconds
Getting API query for data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_CODE.csv file
data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_CODE.csv
Writing in chunks data/dhis2/health_indicators/01MaternalHealth_2009_DistrictandNational_DivisionallevelHF_CODE.csv file
Pausing the process for 45 seconds
Restarting the process 