# Overview
This notebook uses the *new* ESPEN Forecast API. There are 3 data elements that are being imported:

* MDA Forecasted 
* IA Forecasted
* IA Type


In [2]:
import requests
import csv

TOKEN="" # you can obtain an API token from ESPEN Portal Team


In [5]:
# looks at the json result and determines if the estimate has been provided (for any of the years)
def has_ia_estimates(result):
    if 'IA_2020' in result:
        if result['IA_2020'] is not None \
        or result['IA_2021'] is not None \
        or result['IA_2022'] is not None \
        or result['IA_2023'] is not None \
        or result['IA_2024'] is not None \
        or result['IA_2025'] is not None \
        or result['IA_2026'] is not None \
        or result['IA_2027'] is not None \
        or result['IA_2028'] is not None \
        or result['IA_2029'] is not None \
        or result['IA_2030'] is not None :
            return True
        else:
            return False
    # funny thing is the sch and trachoma return ia keys without an underscore, so checking here 
    # TODO: ask standard code
    elif 'IA2020' in result:
        if result['IA2020'] is not None \
        or result['IA2021'] is not None \
        or result['IA2022'] is not None \
        or result['IA2023'] is not None \
        or result['IA2024'] is not None \
        or result['IA2025'] is not None \
        or result['IA2026'] is not None \
        or result['IA2027'] is not None \
        or result['IA2028'] is not None \
        or result['IA2029'] is not None \
        or result['IA2030'] is not None :
            return True
        else:
            return False
    else:
        return False

# see if any of the years has estimates for mda
def has_mda_estimates(result):
    if result['MDA_2020'] is not None \
    or result['MDA_2021'] is not None \
    or result['MDA_2022'] is not None \
    or result['MDA_2023'] is not None \
    or result['MDA_2024'] is not None \
    or result['MDA_2025'] is not None \
    or result['MDA_2026'] is not None \
    or result['MDA_2027'] is not None \
    or result['MDA_2028'] is not None \
    or result['MDA_2029'] is not None \
    or result['MDA_2030'] is not None:
        return True
    else:
        return False

# look up cateogry option, these are from what is set up in D
def get_disease_category_option(disease):
    
    if disease == "lf":
        return "V0BMspy4wZa"
    
    elif disease == "sth":
        return "kZdL7Ru8FXQ"
    
    elif disease == "sch":
        return "VvPBXlaJZU7"
    
    elif disease == "oncho":
        return "tTPp1nAkGP7"
    
    elif disease == "trachoma":
        return "HSnNEBpKtmU"
    
    else:
        return "missing"


In [6]:
# from here: https://www.mikulskibartosz.name/how-to-display-a-progress-bar-in-jupyter-notebook/
import time, sys
from IPython.display import clear_output

def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
        
    block = int(round(bar_length * progress))
    
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

### Step 1: Import data from ESPEN API
Not all responses from the Forecast API will include the MDA and IA information, which is why only certain records are retained here as *total_mda_results* and *total_ia_results* respectively.

In [7]:
diseases = ['lf','sth', 'sch', 'oncho','trachoma'] # this can be lf, sth, sch, trachoma, oncho

total_results = []
total_ia_results = []
total_mda_results = []

for disease in diseases: # for each of the diseases
   
    r1 = requests.get('https://admin.espen.afro.who.int/api/data?iso2=ET&disease='+ disease +'&level=iu&type=forecast')
    iu_results = r1.json()
    
    # add to total
    total_results = total_results + iu_results
    
    # initialize disease specific estimates
    ia_estimates = []
    mda_estimates = []
    
    # go through each IU
    for index, iu_result in enumerate(iu_results):
        
        # add disease variable
        iu_result['disease'] = disease
        
        # keep track of impact assessments and MDA estimates seperatey
        if has_ia_estimates(iu_result): 
            ia_estimates.append(iu_result)
        
        if has_mda_estimates(iu_result): 
            mda_estimates.append(iu_result)
            
    print("there are {}".format(len(iu_results))+" records for "+ disease +" where {}".format(len(mda_estimates)) + " districts have MDA \
estimates and {}".format(len(ia_estimates)) +" districts have IU estimates")
    
    total_ia_results = total_ia_results + ia_estimates
    total_mda_results = total_mda_results + mda_estimates 

print("--------")
print("there are {}".format(len(total_results))+" records across diseases")
print("there are {}".format(len(total_mda_results))+" total_mda_results and {}".format(len(total_ia_results)) + " total_ia_results results")


there are 937 records for lf where 937 districts have MDA estimates and 79 districts have IU estimates
there are 937 records for sth where 937 districts have MDA estimates and 937 districts have IU estimates
there are 937 records for sch where 937 districts have MDA estimates and 937 districts have IU estimates
there are 937 records for oncho where 937 districts have MDA estimates and 937 districts have IU estimates
there are 937 records for trachoma where 63 districts have MDA estimates and 57 districts have IU estimates
--------
there are 4685 records across diseases
there are 3811 total_mda_results and 2947 total_ia_results results


In [8]:
import numpy as np
import pandas as pd

# write out to these to files
total_mda_df = pd.DataFrame(total_mda_results)
total_mda_df.to_csv('mda-export.csv', index=True)

total_ia_df = pd.DataFrame(total_ia_results)
total_ia_df.to_csv('ia-export.csv')  

## Step 2: Load in current orgs from database.
 This will be used later to determine there is a match in the [NTDDB](https://ethiopia.integratedntddb.org/)

In [1]:
import numpy as np
import pandas as pd

#ntd_orgs = pd.read_excel('../integrated_db_org_list.xlsx')
ntd_orgs = pd.read_csv('../ntddb_orgs.csv')

In [2]:
ntd_orgs.head(20)

Unnamed: 0,woreda,woreda uid,zone,zone id,region,region id
0,Abadir,ORG00000302,Harare,ORG00000301,Harare,ORG00000299
1,Abala,ORG00000029,Awsa,ORG00000014,Afar,ORG00000013
2,Abaya,unZLOLtOisg,Borena,ORG00000361,Oromia,ORG00000311
3,Abaya,ORG00000635,West Guji,ORG00000631,Oromia,ORG00000311
4,Abay Comen,ORG00000484,Horo Guduru,ORG00000482,Oromia,ORG00000311
5,Abe Dongoro,ORG00000483,Horo Guduru,ORG00000482,Oromia,ORG00000311
6,Abergele,ORG00000194,Wag Hemra,ORG00000193,Amhara,ORG00000053
7,Abeshge,ORG00000723,Gurage,ORG00000719,SNNPR,ORG00000668
8,Abi Adi,ORG00000942,Central Tigray,ORG00000935,Tigray,ORG00000934
9,Abichugna,ORG00000549,North Shoa Zone,ORG00000543,Oromia,ORG00000311


In [3]:
# the old way
def find_admin3(district):
    for index, ntd_row in ntd_orgs.iterrows():
        if ntd_row['woreda'].lower() == district.lower():
            return ntd_row['woreda uid']

In [4]:
# the newer way. exact match by woreda name. Use a dictionary for looking up woredas
org_dictionary = {}

for index, ntd_row in ntd_orgs.iterrows():
    org_dictionary[ntd_row['woreda'].lower()] = ntd_row['woreda uid']
    
    
def find_admin3(district):
    
    if district.lower() in org_dictionary:
        return org_dictionary[district.lower()]


In [5]:
# fuzzy match by woreda name
# returns a dict with the matched ou and the highest score 

from fuzzywuzzy import fuzz
from fuzzywuzzy import process


# This is REALLY imported used in step 4 and 5 below
ou_match_threshold = 86

# using fuzzy
def find_admin3(district):
    highscore = {"uid": "00000000","score":0}
    for index, ntd_row in ntd_orgs.iterrows():   
        score = fuzz.ratio(ntd_row['woreda'].lower(), district.lower())
        if score >= highscore['score']:
            highscore = {"uid":ntd_row['woreda uid'],"score":score}
    
    return highscore


In [6]:
# test this out
a=find_admin3("Gumer")
if a:
    print('there with a match score of {}'.format(a['score']))
else:
    print('not there')


there with a match score of 91


In [7]:
fuzz.ratio("Annalemo".lower(), "Anlemo".lower())

86

## Step 3: Write out OU mapping file
Determine how many IUs can be mapped to org units in the database

In [16]:
# Write out to a CSV file
with open('espen-forecasted-ous.csv', 'w') as analysis_data_export:
    
    fieldnames = ['admin1_name','admin2_name', 'admin3_name', 'disease', 'NTD UID','Match Ratio']
    writer = csv.DictWriter(analysis_data_export, fieldnames=fieldnames)
    writer.writeheader()
    
    for index,record in enumerate(total_results):
        
        match = find_admin3(record['IU_Name'])
        
        writer.writerow({
            'admin1_name': record['Country'],
            'admin2_name': record['Province_Region'],
            'admin3_name' : record['IU_Name'],
            'disease' : record['disease'],
            'NTD UID': match['uid'],
            'Match Ratio' : match['score']
        })
        
        update_progress( index / len(total_results))

Progress: [####################] 100.0%


## Step 4: Create the import file for MDA Forecasted
 The idea is to create a file that looks like this:


```xml
{
  "dataValues": [
    {
      "dataElement": "hygwN3AetL9", // MDA Forecasted
      "period": "2020", 
      "orgUnit": "lgZ6HfZaj3f", 
      "value": "True",   
      "categoryOptionCombo" : "pepMNQRIOA0" 
    }, 
    {
      "dataElement": "hygwN3AetL9", // MDA Forecasted
      "period": "2021", 
      "orgUnit": "zHa2ohFrpPM", 
      "value": "high-endemicity", 
      "categoryOptionCombo" : "pepMNQRIOA0" 
    }, 
 ```

In [14]:
# these are the currently forecasted years
mda_lables = ['MDA_2020','MDA_2021','MDA_2022','MDA_2023','MDA_2024','MDA_2025','MDA_2026','MDA_2027','MDA_2028','MDA_2029','MDA_2030']

In [15]:
# Get all of the possible MDA values
temp = set()
for n in total_mda_results:
    for i in mda_lables:
        if i in n : #if 'IA2024' in n and n['IA2024'] is not '':
            temp.add(n[i])
        
temp

{'MDA', 'NE', None}

In [16]:
datavalues = []
unmaped_ous = set()

for index, record in enumerate(total_mda_results):
    
    # only import data for woredas that can be found
    match = find_admin3(record['IU_Name'])
    if match['score'] >= ou_match_threshold:
        
        for label in mda_lables:
             
            # make sure that this record includes this data, because sometimes it doesn't
            if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)

                # MDA is forecasted when set to 'MDA', ignore for NE
                if record[label]=="MDA":
                    
                    element = {
                        "dataElement": "hygwN3AetL9",
                        "period": label[-4:], 
                        "orgUnit": match['uid'],
                        "value": "True",
                        "categoryOptionCombo" : get_disease_category_option(record['disease'] )
                                   }
                    datavalues.append(element)
                    
                    element = {
                        "dataElement": "JTceXcQc0HL",
                        "period": label[-4:], 
                        "orgUnit": match['uid'],
                        "value": "MDA",
                        "categoryOptionCombo" : get_disease_category_option(record['disease'] )
                                   }
                    datavalues.append(element)

                    
    else:
        unmaped_ous.add(record['IU_Name'])
        
    update_progress(index / len(total_mda_results))
        

Progress: [####################] 100.0%


In [17]:
# print what we know
print("Of the potential {}".format(len(total_mda_results)) +" records from the API to be loaded in, there are {}"\
      .format(len(datavalues)) + " yearly forecasts to be imported and {}"\
      .format(len(unmaped_ous)) + " woredas that could not be mapped")



Of the potential 3811 records from the API to be loaded in, there are 17720 yearly forecasts to be imported and 127 woredas that could not be mapped


In [18]:
import json

# Add these things
data = {}
data['dataValues'] = datavalues

with open('mda-results-importing.json', 'w') as outfile:
    json.dump(data, outfile, ensure_ascii=False, indent=4)

### Import the file
In the Import Export app, select Data Import and this select this file generated above

## Step 5: Create the import file for IA Forecasted
 The idea is to create a file that looks like this:


```xml
{
  "dataValues": [
    {
      "dataElement": "nAEO6eb7MdX", // IA Forecasted
      "period": "2020", 
      "orgUnit": "lgZ6HfZaj3f", 
      "value": "True",   
      "categoryOptionCombo" : "V0BMspy4wZa" 
    }, 
    {
      "dataElement": "nAEO6eb7MdX", // IA Forecasted
      "period": "2021", 
      "orgUnit": "lgZ6HfZaj3f", 
      "value": "True", 
      "categoryOptionCombo" : "V0BMspy4wZa" 
    }, 
 ```
 
 

In [12]:
# these are the currently forecasted years
ia_lables = ['IA_2020','IA_2021','IA_2022','IA_2023','IA_2024','IA_2025','IA_2026','IA_2027','IA_2028','IA_2029','IA_2030','IA2020','IA2021','IA2022','IA2023','IA2024','IA2025','IA2026','IA2027','IA2028','IA2029','IA2030']




In [15]:
# IA_2020 should return 2020 and IA2020 should also return 2020
print("{}".format('IA2020'[-4:])+" should be 2020")
print("{}".format('IA_2020'[-4:])+" should also be 2020")

2020 should be 2020
2020 should also be 2020


In [32]:
# Get all of the possible IA values
temp = set()
for n in total_ia_results:
    for i in ia_lables:
        if i in n \
        and n[i] is not None \
        and n[i] is not '': #if 'IA2024' in n and n['IA2024'] is not '':
            temp.add(n[i])
        
temp

{'IA', 'Mapping', 'Pre-TAS', 'Surveillance', 'TAS 1', 'TAS 2', 'TAS 3', 'TIS'}

In [47]:
datavalues = []
unmaped_ous = set()

for index, record in enumerate(total_ia_results):
    
    # only import data for woredas that can be found
    match = find_admin3(record['IU_Name'])
    if match['score'] >= ou_match_threshold:
    
        for label in ia_lables:

            # make sure that this record includes this data, because sometimes it doesn't
            if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)

                    element = {
                        "dataElement": "nAEO6eb7MdX",
                        "period": label[-4:], 
                        "orgUnit": match['uid'],
                        "value": "True",
                        "categoryOptionCombo" : get_disease_category_option(record['disease'] )
                                   }
                    datavalues.append(element)
           
    else:
        unmaped_ous.add(record['IU_Name'])
        
    update_progress(index / len(total_ia_results))
        



Progress: [####################] 100.0%


In [49]:
# print what we know
print("Of the potential {}".format(len(total_ia_results)) +" records from the API to be loaded in, there are {}"\
      .format(len(datavalues)) + " yearly forecasts to be imported and {}"\
      .format(len(unmaped_ous)) + " woredas that could not be mapped")



Of the potential 2947 records from the API to be loaded in, there are 25635 yearly forecasts to be imported and 127 woredas that could not be mapped


In [50]:
import json

# Add these things
data = {}
data['dataValues'] = datavalues

with open('ia-results-importing.json', 'w') as outfile:
    json.dump(data, outfile, ensure_ascii=False, indent=4)

### Import the file
In the Import Export app, select Data Import and this select this file generated above

## Step 6: Create the import file for IA Forecasted Type
 The idea is to create a file that looks like this:


```xml
{
  "dataValues": [
    {
      "dataElement": "nAEO6eb7MdX", // IA Forecasted
      "period": "2020", 
      "orgUnit": "lgZ6HfZaj3f", 
      "value": "True",   
      "categoryOptionCombo" : "V0BMspy4wZa" 
    }, 
    {
      "dataElement": "nAEO6eb7MdX", // IA Forecasted
      "period": "2021", 
      "orgUnit": "lgZ6HfZaj3f", 
      "value": "True", 
      "categoryOptionCombo" : "V0BMspy4wZa" 
    }, 
 ```
 
 

In [19]:
# returns the option value that DHIS2 needs
def mapAssessmentType(value):
    if value == "IA":
        return "IA"
    elif value == "Mapping":
        return "Mapping"
    elif value == "Pre-TAS":
        return "Pre TAS"
    elif value == "TAS 1":
        return "TAS 1"
    elif value == "TAS 2":
        return "TAS 2"
    elif value == "TAS 3":
        return "TAS 3"
    elif value == "Surveillance":
        return "Surveillance"
    elif value == "TIS":
        return "TIS"
    else:
        return value+" does not exist"
    
         

In [22]:
datavalues = []
unmaped_ous = set()

for index, record in enumerate(total_ia_results):
    
    # only import data for woredas that can be found
    match = find_admin3(record['IU_Name'])
    if match['score'] >= ou_match_threshold:
    
        for label in ia_lables:
              
            # make sure that this record includes this data, because sometimes it doesn't
            if label in record \
            and record[label] is not None \
            and record[label] is not '': # and ignore empty values (i.e. IU_NAME: '')

                element = {
                    "dataElement": "ja31hcRGTci",
                    "period": label[-4:], 
                    "orgUnit": match['uid'],
                    "value": mapAssessmentType(record[label]),
                    "categoryOptionCombo" : get_disease_category_option(record['disease'] )
                               }
                datavalues.append(element)
           
    else:
        unmaped_ous.add(record['IU_Name'])
        
    update_progress(index / len(total_ia_results))
        

print("there are {}".format(len(datavalues))+ " elements added")

Progress: [####################] 100.0%
there are 2984 elements added


In [23]:
import json

# Add these things
data = {}
data['dataValues'] = datavalues

with open('ia-types-importing.json', 'w') as outfile:
    json.dump(data, outfile, ensure_ascii=False, indent=4)

### Import the file
In the Import Export app, select Data Import and this select this file generated above