In [2]:
import pandas as pd
import sqlite3
import re
from pathlib import Path
import requests
from glob import glob

Path("data/indicators").mkdir(parents=True, exist_ok=True)

In [3]:
# Automatically download the data for an indicator and filter/modify it's structure
indicators = ["NCD_DIABETES_PREVALENCE_AGESTD", "HIV_0000000026"]

for indicator in indicators:
    data = requests.get(f"https://ghoapi.azureedge.net/api/{indicator}").json()
    df = pd.DataFrame(data['value'])

    no_numeric = df["NumericValue"].dropna(how="all").empty

    # Get dimension types
    dim_type_map = {"SpatialDim": None, "TimeDim": None, "Dim1": None, "Dim2": None, "Dim3": None, }

    # if there is a numeric value we use that as the main value, otherwise we use the general 'Value' column, which could also contain strings
    if no_numeric: dim_type_map["Value"] = "value"
    else: dim_type_map["NumericValue"] = "value"

    dim_value_map = {}
    for dim in dim_type_map.keys():
        if dim_type_map[dim] is not None: continue
        dim_type = df[dim+"Type"].mode(dropna=False)[0]
        if dim_type is None or dim_type == "": continue 
        dim_values = requests.get(f"https://ghoapi.azureedge.net/api/DIMENSION/{dim_type}/DimensionValues").json()["value"]
        dim_value_map[dim_type.lower()] = {d["Code"]: d["Title"].lower() for d in dim_values}
        dim_type_map[dim] = dim_type.lower()

    # Remove unused dimensions
    dim_map = {k: v for k,v in dim_type_map.items() if v is not None}

    # Get indicator name
    indicator_name = requests.get(f"https://ghoapi.azureedge.net/api/Indicator?$filter=IndicatorCode eq '{indicator}'").json()["value"][0]["IndicatorName"]

    # remove rows where the spatial and time dim differ from the one picked above
    df = df.loc[df['SpatialDimType'].str.lower()==dim_type_map["SpatialDim"]]
    df = df.loc[df['TimeDimType'].str.lower()==dim_type_map["TimeDim"]]

    # Filter data to specific columns and rename to their actual names
    filtered_df = df[dim_map.keys()]
    filtered_df = filtered_df.rename(columns=dim_map)

    # Map Value codes to their textual values
    for dim, value_map in dim_value_map.items():
        filtered_df[dim] = filtered_df[dim].map(value_map).fillna(filtered_df[dim])

    # Save data to csv file
    file_name = re.sub('[^0-9a-zA-Z]+', '_', indicator_name.lower())
    filtered_df = filtered_df.applymap(lambda x: ' '.join(x.split()) if isinstance(x, str) else x)
    filtered_df.to_csv(f"data/indicators/{file_name}.csv", index=False, sep=";")

In [4]:
# Connect to sqlite database
db_path = "data/gho.db"
conn = sqlite3.connect(db_path)

# Convert csv file to sqlite database
for indicator_file in glob("data/indicators/*.csv"):
    df = pd.read_csv(indicator_file, sep=";")
    df.insert(0, 'rowid', range(1, 1 + len(df)))
    df.to_sql(indicator_file.split("/")[-1][:-4], conn, if_exists="replace", index=False)