In [1]:
# Retrieve DHIS2 data for a specified time range
# using Python wrapper and export to excel file.
# by Ali Tobah - atobah@gmail.com

# Resources:
# https://pypi.org/project/dhis2.py/
# https://docs.dhis2.org/en/develop/using-the-api/dhis-core-version-240/introduction.html
# https://play.dhis2.org/40.2.0/api/resources

# Only need to run this once for each session.
!pip install dhis2.py

Collecting dhis2.py
  Downloading dhis2.py-2.3.0-py2.py3-none-any.whl (17 kB)
Collecting logzero>=1.5.0 (from dhis2.py)
  Downloading logzero-1.7.0-py2.py3-none-any.whl (16 kB)
Installing collected packages: logzero, dhis2.py
Successfully installed dhis2.py-2.3.0 logzero-1.7.0


In [2]:
import pandas as pd
import getpass
from dhis2 import Api

In [3]:
def retrieveNameID(dataParam):
    '''
        Input: Parameter for the API request.
        Output: Prints DF shape and head.
        Return: Dataframe.
    '''
    # Run a request on an API object to get data parameter names
    # and IDs. The request returns a dictionary of one key with a
    # list of dictionaries as its value.
    response = api.get_paged(dataParam, merge=True)

    # Read the data and feed into a dataframe
    dhisData = response[dataParam]
    dataDF = pd.DataFrame(dhisData)

    # Rename columns to unique names and print
    colName = dataParam[:-1] + 'Name'
    colID = dataParam[:-1] + 'ID'
    dataDF = dataDF.rename(columns={'displayName': colName, 'id': colID})

    # Print dataframe info for user visual check
    print("Dataframe shape: ", dataDF.shape, '\n')
    print(dataDF.head())

    return dataDF

In [4]:
# Create an API object for the demo data.

# Prompt for URL where data is located.
# For demo data, should use "https://play.dhis2.org/demo".
# NOTE: While writing this (JAN 2024), demo was not up.
# So use "https://play.dhis2.org/2.39.4.1" instead for now.
dhisURL = input("Data location (URL): ")

# Prompt for username and password while creating the object.
# User name is "admin" for demo data
# Password is "district" for demo data. Use getpass to protect it.
api = Api(dhisURL, input("Username: "), getpass.getpass(prompt="Password: "))

Data location (URL): https://play.dhis2.org/2.39.4.1
Username: admin
Password: ··········


In [5]:
# Get dataset names and IDs dataframe.
# Create a list for use in the request.
dataSetDF = retrieveNameID('dataSets')
dataSetIDList = list(dataSetDF.dataSetID)

Dataframe shape:  (26, 2) 

                      dataSetName    dataSetID
0             ART monthly summary  lyLU2wR22tC
1                    Child Health  BfMAe6Itzgt
2  Clinical Monitoring Checklist   VTdjfLXXmoi
3                       EPI Stock  TuL8IOPzpHh
4              Emergency Response  Lpw6GcnTrmS


In [6]:
# Get organisational unit names and IDs dataframe.
# Create a list for use in the request.
# Create a dictionary to map IDs to names in final result.
orgUnitDF = retrieveNameID('organisationUnits')
orgUnitIDList = list(orgUnitDF.organisationUnitID)
orgUnitDict = orgUnitDF.set_index('organisationUnitID')['organisationUnitName'].to_dict()

Dataframe shape:  (1332, 2) 

   organisationUnitName organisationUnitID
0          Panderu MCHP        ueuQlqb8ccl
1           Adonkia CHP        Rp268JB6Ne4
2      Afro Arab Clinic        cDw53Ej8rju
3             Agape CHP        GvFqTavdpGE
4  Ahamadyya Mission Cl        plnHVbJR6p4


In [7]:
# Get data element names and IDs dataframe.
# Create a dictionary to map IDs to names in final result.
dataElementDF = retrieveNameID('dataElements')
dataElementDict = dataElementDF.set_index('dataElementID')['dataElementName'].to_dict()

Dataframe shape:  (1036, 2) 

                                     dataElementName dataElementID
0                                      ANC 1st visit   fbfJHSPpUQD
1                                      ANC 2nd visit   cYeuwXTCPkU
2                                      ANC 3rd visit   Jtf34kNZhzP
3                             ANC 4th or more visits   hfdmMSPBgLG
4  ARI treated with antibiotics (pneumonia) follo...   FHD3wiSM7Sn


In [8]:
# Get category option names and IDs dataframe.
# Create a dictionary to map IDs to names in final result.
categoryOptiontDF = retrieveNameID('categoryOptionCombos')
categOptDict = categoryOptiontDF.set_index('categoryOptionComboID')['categoryOptionComboName'].to_dict()

Dataframe shape:  (225, 2) 

  categoryOptionComboName categoryOptionComboID
0                   0-11m           S34ULMcHMca
1                   0-11m           sqGRzCziswD
2                    0-4y           o2gxEt6Ek2C
3                  12-59m           LEDQQXEpWUl
4                  12-59m           wHBMVthqIX4


In [9]:
# Prompt for start and end dates.
# When testing, use 2022-01-01 and 2022-01-31
startDate = input("Start date (yyyy-mm-dd): ")
endDate = input("End date (yyyy-mm-dd): ")

Start date (yyyy-mm-dd): 2022-01-01
End date (yyyy-mm-dd): 2022-01-31


In [10]:
# Request data for all datasets in dataset list,
# and all organisational units in organisational unit list,
# for given start and end dates.

# Since data is too large for one request, break organisational
# unit ID list into a list of lists of organisational units, each
# of size 100.
orgNestList = [orgUnitIDList[i:i + 100] for i in range(0, len(orgUnitIDList), 100)]

# Create an empty list to hold the responses.
# This will be a list of dictionaries, each dictionary
# representing one data element, as seen in the next cell.
responseList = []
i=0    # List counter for user convenience
print("Expect", len(orgNestList), "iterations:")

# Retrieve data for each 100 orgUnits and add to
# response list using extend (as opposed to append).
for eaOrgList in orgNestList:
    response = api.get('dataValueSets', params={
        'dataSet': dataSetIDList,
        'orgUnit': eaOrgList,
        'startDate': startDate,
        'endDate': endDate
    })
#    print(response.json())
    addnlElements = response.json()['dataValues']
    responseList.extend(addnlElements)

    # Increment counter and print for user patience
    i+=1
    print(i, "..", len(addnlElements), "data elements")
#    print(i)

#    break

print("Total data elements:", len(responseList))


Expect 14 iterations:
1 .. 787 data elements
2 .. 674 data elements
3 .. 688 data elements
4 .. 906 data elements
5 .. 559 data elements
6 .. 425 data elements
7 .. 369 data elements
8 .. 648 data elements
9 .. 629 data elements
10 .. 875 data elements
11 .. 942 data elements
12 .. 759 data elements
13 .. 727 data elements
14 .. 292 data elements
Total data elements: 9280


In [11]:
# Print first value in the list (i.e the first data element)
# for visual confirmation.
responseList[0]

{'dataElement': 's46m5MS0hxu',
 'period': '202201',
 'orgUnit': 'o0BgK1dLhF8',
 'categoryOptionCombo': 'Prlt0C1RF0s',
 'attributeOptionCombo': 'HllvX50cXC0',
 'value': '15',
 'storedBy': 'anaam',
 'created': '2022-09-05T13:06:21.000+0000',
 'lastUpdated': '2022-05-29T22:06:10.000+0000',
 'comment': None,
 'followup': False}

In [12]:
# Feed all data elements from the list into a dataframe.
dhisDF = pd.DataFrame(responseList)
print("Dataframe shape: ", dhisDF.shape, '\n')
dhisDF.head()

Dataframe shape:  (9280, 11) 



Unnamed: 0,dataElement,period,orgUnit,categoryOptionCombo,attributeOptionCombo,value,storedBy,created,lastUpdated,comment,followup
0,s46m5MS0hxu,202201,o0BgK1dLhF8,Prlt0C1RF0s,HllvX50cXC0,15,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
1,x3Do5e7g4Qo,202201,o0BgK1dLhF8,Prlt0C1RF0s,HllvX50cXC0,10,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
2,O05mAByOgAv,202201,o0BgK1dLhF8,Prlt0C1RF0s,HllvX50cXC0,9,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
3,vI2csg55S9C,202201,o0BgK1dLhF8,Prlt0C1RF0s,HllvX50cXC0,5,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
4,I78gJm4KBo7,202201,o0BgK1dLhF8,Prlt0C1RF0s,HllvX50cXC0,9,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False


In [13]:
# Replace IDs with names using dictionaries from above.
# Note that the attributeOptionCombo uses the same
# dictionary as categoryOptionCombo. This is not a typo.
dhisDF = dhisDF.copy()
dhisDF['dataElement'] = dhisDF['dataElement'].map(dataElementDict)
dhisDF['orgUnit'] = dhisDF['orgUnit'].map(orgUnitDict)
dhisDF['categoryOptionCombo'] = dhisDF['categoryOptionCombo'].map(categOptDict)      # Same dictionary.
dhisDF['attributeOptionCombo'] = dhisDF['attributeOptionCombo'].map(categOptDict)    # Not a typo.
print("Dataframe shape: ", dhisDF.shape, '\n')
dhisDF.head()

Dataframe shape:  (9280, 11) 



Unnamed: 0,dataElement,period,orgUnit,categoryOptionCombo,attributeOptionCombo,value,storedBy,created,lastUpdated,comment,followup
0,BCG doses given,202201,Bendugu CHC,"Fixed, <1y",default,15,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
1,OPV0 doses given,202201,Bendugu CHC,"Fixed, <1y",default,10,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
2,OPV2 doses given,202201,Bendugu CHC,"Fixed, <1y",default,9,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
3,OPV3 doses given,202201,Bendugu CHC,"Fixed, <1y",default,5,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False
4,Penta2 doses given,202201,Bendugu CHC,"Fixed, <1y",default,9,anaam,2022-09-05T13:06:21.000+0000,2022-05-29T22:06:10.000+0000,,False


In [16]:
# Request file type and export.
# CSV, Excel or no export.

print("Export to CSV, Excel or no export.")
outFileType = input("csv, excel, nox: ")
while outFileType not in ['csv', 'excel', 'nox']:
    outFileType = input("csv, excel, nox: ")

newStartDate = startDate.replace('-', '')
newEndDate = endDate.replace('-', '')
dhisFileName = "DHIS-Demo-Data" + "-" + newStartDate + "-" + newEndDate

if outFileType == 'csv' :
    dhisFileName = dhisFileName + '.csv'
    print("Exporting ...")
    dhisDF.to_csv(dhisFileName)
    print("Data exported to:", dhisFileName)

elif outFileType == 'excel' :
    dhisFileName = dhisFileName + '.xlsx'
    print("Exporting ...")
    dhisDF.to_excel(dhisFileName)
    print("Data exported to:", dhisFileName)

elif outFileType == 'nox' :
    print("Data not exported.")


Export to CSV, Excel or no export.
csv, excel, nox: nox
Data not exported.
