# WHO Global Health Observatory Data Exploration

This notebook explores health data from the WHO GHO OData API for Sustainable Development Goals (SDGs).

In [1]:
# Import Required Libraries
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objects as go

# WHO GHO API Base URL
API_BASE_URL = "https://ghoapi.azureedge.net/api"

print("Libraries loaded successfully!")

Libraries loaded successfully!


## 1. Fetch Available Dimensions

Let's see what dimensions are available in the WHO API.

In [2]:
# Fetch available dimensions
response = requests.get(f"{API_BASE_URL}/Dimension")
dimensions = pd.DataFrame(response.json()["value"])
print(f"Available Dimensions: {len(dimensions)}")
dimensions

Available Dimensions: 192


Unnamed: 0,Code,Title
0,ADVERTISINGTYPE,SUBSTANCE_ABUSE_ADVERTISING_TYPES
1,AGEGROUP,Age Group
2,ALCOHOLTYPE,Beverage Types
3,AMRGLASSCATEGORY,AMR GLASS Category
4,ANTIBIOTIC,Antibiotic
...,...,...
187,WPR_DATASOURCE,WPRO Data Source
188,WPR_DEMO_IND,WPRO Demo Indicator
189,WRS_OUTSIDE_REPORT,WRs from outside the region year report
190,WRS_STATUS,WHO Representative Status


## 2. Fetch Countries List

Get the list of all countries available in the dataset.

In [3]:
# Fetch countries
response = requests.get(f"{API_BASE_URL}/DIMENSION/COUNTRY/DimensionValues")
countries = pd.DataFrame(response.json()["value"])
print(f"Total Countries: {len(countries)}")
countries[["Code", "Title"]].head(20)

Total Countries: 234


Unnamed: 0,Code,Title
0,ABW,Aruba
1,AFG,Afghanistan
2,AGO,Angola
3,AIA,Anguilla
4,ALB,Albania
5,AND,Andorra
6,ARE,United Arab Emirates
7,ARG,Argentina
8,ARM,Armenia
9,ASM,American Samoa


## 3. Fetch WHO Regions

In [4]:
# Fetch WHO regions
response = requests.get(f"{API_BASE_URL}/DIMENSION/REGION/DimensionValues")
regions = pd.DataFrame(response.json()["value"])
print(f"WHO Regions: {len(regions)}")
regions[["Code", "Title"]]

WHO Regions: 43


Unnamed: 0,Code,Title
0,AFR,Africa
1,AMR,Americas
2,EMR,Eastern Mediterranean
3,EUR,Europe
4,GBD_REG14_AFRD,"Africa region, stratum D (AFR D)"
5,GBD_REG14_AFRE,"Africa region, stratum E(AFR E)"
6,GBD_REG14_AMRA,"Americas region, stratum A (AMR A)"
7,GBD_REG14_AMRB,"Americas region, stratum B (AMR B)"
8,GBD_REG14_AMRD,"Americas region, stratum D (AMR D)"
9,GBD_REG14_EMRB,"Eastern Mediterranean region, stratum B (EMR B)"


## 4. Search for SDG Health Indicators

Let's search for indicators related to Sustainable Development Goals.

In [5]:
# Fetch all indicators (limited)
response = requests.get(f"{API_BASE_URL}/Indicator")
all_indicators = pd.DataFrame(response.json()["value"])
print(f"Total Indicators: {len(all_indicators)}")
all_indicators[["IndicatorCode", "IndicatorName"]].head(20)

Total Indicators: 3055


Unnamed: 0,IndicatorCode,IndicatorName
0,EMFLIMITPOWERDENSITY900,Power density limit at 900 MHz (W/m^2)
1,EMFPOWERFREQUENCY,Power frequency (Hz)
2,FINPROTECTION_CATA_ESTIMATE_AVAILABLE,Availability of estimates on large health exp...
3,FINPROTECTION_CATA_TOT_25_LEVEL_SH,Population with household spending on health g...
4,FINPROTECTION_IMP_NP_190_LEVEL_SH,Total population pushed below the $1.90 a day...
5,HIV_0000000009,Reported number of people receiving antiretrov...
6,GOE_Q203,Lack of integration between various health ser...
7,HCF_REL_ELECTRICITY,Percentage of health-care facilities with reli...
8,HIV_0000000007,"Testing and counselling facilities, reported n..."
9,HIV_0000000008,"Testing and counselling facilities, estimated ..."


In [6]:
# Search for SDG-related indicators
sdg_indicators = all_indicators[all_indicators["IndicatorName"].str.contains("SDG|mortality|life expectancy", case=False, na=False)]
print(f"SDG-related Indicators: {len(sdg_indicators)}")
sdg_indicators[["IndicatorCode", "IndicatorName"]].head(30)

SDG-related Indicators: 49


Unnamed: 0,IndicatorCode,IndicatorName
3,FINPROTECTION_CATA_TOT_25_LEVEL_SH,Population with household spending on health g...
38,FINPROTECTION_CATA_TOT_10_LEVEL_MILLION,Population with household spending on health g...
39,FINPROTECTION_CATA_TOT_10_LEVEL_SH,Population with household spending on health g...
40,FINPROTECTION_CATA_TOT_10_POP,Population with household spending on health g...
41,FINPROTECTION_CATA_TOT_25_LEVEL_MILLION,Population with household spending on health g...
44,FINPROTECTION_CATA_TOT_25_POP,Population with household spending on health g...
214,imr,Infant mortality rate (deaths per 1000 live bi...
295,MORTADO,Adolescent mortality rate (per 1 000 age speci...
310,MDG_0000000007,Under-five mortality rate (probability of dyin...
317,MDG_0000000001,Infant mortality rate (probability of dying be...


## 5. Fetch Life Expectancy Data

Let's fetch and explore Life Expectancy at Birth data.

In [7]:
# Fetch Life Expectancy data
indicator_code = "WHOSIS_000001"  # Life expectancy at birth
response = requests.get(f"{API_BASE_URL}/{indicator_code}")
life_expectancy = pd.DataFrame(response.json()["value"])

print(f"Records: {len(life_expectancy)}")
print(f"\nColumns: {life_expectancy.columns.tolist()}")
life_expectancy.head(10)

Records: 12936

Columns: ['Id', 'IndicatorCode', 'SpatialDimType', 'SpatialDim', 'ParentLocationCode', 'TimeDimType', 'ParentLocation', 'Dim1Type', 'TimeDim', 'Dim1', 'Dim2Type', 'Dim2', 'Dim3Type', 'Dim3', 'DataSourceDimType', 'DataSourceDim', 'Value', 'NumericValue', 'Low', 'High', 'Comments', 'Date', 'TimeDimensionValue', 'TimeDimensionBegin', 'TimeDimensionEnd']


Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,ParentLocationCode,TimeDimType,ParentLocation,Dim1Type,TimeDim,Dim1,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,9768193,WHOSIS_000001,COUNTRY,TJK,EUR,YEAR,Europe,SEX,2009,SEX_BTSX,...,,69.3 [68.8-69.9],69.345311,68.838784,69.886027,,2024-08-02T09:43:39.193+02:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00
1,9768267,WHOSIS_000001,WORLDBANKINCOMEGROUP,WB_LMI,,YEAR,,SEX,2014,SEX_BTSX,...,,68.0 [67.5-68.6],68.039758,67.486191,68.608364,,2024-08-02T09:43:39.193+02:00,2014,2014-01-01T00:00:00+01:00,2014-12-31T00:00:00+01:00
2,9768536,WHOSIS_000001,COUNTRY,BLZ,AMR,YEAR,Americas,SEX,2009,SEX_BTSX,...,,73.7 [73.5-73.9],73.670099,73.477567,73.919821,,2024-08-02T09:43:39.193+02:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00
3,9769054,WHOSIS_000001,COUNTRY,CRI,AMR,YEAR,Americas,SEX,2019,SEX_BTSX,...,,80.3 [80.2-80.4],80.300732,80.204039,80.423448,,2024-08-02T09:43:39.193+02:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00
4,9769581,WHOSIS_000001,COUNTRY,PNG,WPR,YEAR,Western Pacific,SEX,2009,SEX_MLE,...,,63.8 [63.1-64.7],63.817352,63.127693,64.671811,,2024-08-02T09:43:39.193+02:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00
5,9771412,WHOSIS_000001,COUNTRY,MDA,EUR,YEAR,Europe,SEX,2011,SEX_BTSX,...,,69.8 [69.6-70.0],69.820704,69.642998,70.030446,,2024-08-02T09:43:39.193+02:00,2011,2011-01-01T00:00:00+01:00,2011-12-31T00:00:00+01:00
6,9772444,WHOSIS_000001,COUNTRY,LUX,EUR,YEAR,Europe,SEX,2005,SEX_FMLE,...,,82.1 [82.0-82.2],82.073297,82.007442,82.235889,,2024-08-02T09:43:39.193+02:00,2005,2005-01-01T00:00:00+01:00,2005-12-31T00:00:00+01:00
7,9773020,WHOSIS_000001,COUNTRY,SVK,EUR,YEAR,Europe,SEX,2005,SEX_MLE,...,,70.2 [70.1-70.3],70.187024,70.128752,70.315256,,2024-08-02T09:43:39.193+02:00,2005,2005-01-01T00:00:00+01:00,2005-12-31T00:00:00+01:00
8,9773516,WHOSIS_000001,COUNTRY,BHR,EMR,YEAR,Eastern Mediterranean,SEX,2020,SEX_FMLE,...,,76.8 [76.5-77.1],76.769973,76.513988,77.059442,,2024-08-02T09:43:39.193+02:00,2020,2020-01-01T00:00:00+01:00,2020-12-31T00:00:00+01:00
9,9773569,WHOSIS_000001,COUNTRY,CHN,WPR,YEAR,Western Pacific,SEX,2000,SEX_FMLE,...,,73.3 [73.0-73.8],73.333632,73.007768,73.767402,,2024-08-02T09:43:39.193+02:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00


In [8]:
# Clean and process the data
life_exp_clean = life_expectancy[["SpatialDim", "TimeDim", "Dim1", "NumericValue"]].copy()
life_exp_clean.columns = ["Country", "Year", "Sex", "LifeExpectancy"]
life_exp_clean["Year"] = pd.to_numeric(life_exp_clean["Year"], errors="coerce")
life_exp_clean["LifeExpectancy"] = pd.to_numeric(life_exp_clean["LifeExpectancy"], errors="coerce")
life_exp_clean = life_exp_clean.dropna()

print(f"Cleaned Records: {len(life_exp_clean)}")
print(f"Year Range: {life_exp_clean['Year'].min():.0f} - {life_exp_clean['Year'].max():.0f}")
print(f"Countries: {life_exp_clean['Country'].nunique()}")
life_exp_clean.head(10)

Cleaned Records: 12936
Year Range: 2000 - 2021
Countries: 196


Unnamed: 0,Country,Year,Sex,LifeExpectancy
0,TJK,2009,SEX_BTSX,69.345311
1,WB_LMI,2014,SEX_BTSX,68.039758
2,BLZ,2009,SEX_BTSX,73.670099
3,CRI,2019,SEX_BTSX,80.300732
4,PNG,2009,SEX_MLE,63.817352
5,MDA,2011,SEX_BTSX,69.820704
6,LUX,2005,SEX_FMLE,82.073297
7,SVK,2005,SEX_MLE,70.187024
8,BHR,2020,SEX_FMLE,76.769973
9,CHN,2000,SEX_FMLE,73.333632


In [9]:
# Visualize Life Expectancy by Year (Global Average)
global_avg = life_exp_clean.groupby(["Year", "Sex"])["LifeExpectancy"].mean().reset_index()

fig = px.line(global_avg, x="Year", y="LifeExpectancy", color="Sex",
              title="Global Average Life Expectancy at Birth by Sex",
              labels={"LifeExpectancy": "Life Expectancy (years)"})
fig.update_layout(template="plotly_white")
fig.show()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

## 6. Fetch Maternal Mortality Data (SDG 3.1)

In [None]:
# Fetch Maternal Mortality data
indicator_code = "MDG_0000000001"  # Maternal mortality ratio
response = requests.get(f"{API_BASE_URL}/{indicator_code}")
maternal_mortality = pd.DataFrame(response.json()["value"])

print(f"Maternal Mortality Records: {len(maternal_mortality)}")
maternal_mortality.head()

Maternal Mortality Records: 43513


Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,ParentLocationCode,TimeDimType,ParentLocation,Dim1Type,Dim1,TimeDim,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,9768968,MDG_0000000001,COUNTRY,AZE,EUR,YEAR,Europe,SEX,SEX_MLE,2009,...,,33.7 [28.0-40.5],33.710192,27.979396,40.470787,,2025-04-15T15:49:27.583+02:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00
1,9768988,MDG_0000000001,COUNTRY,SWE,EUR,YEAR,Europe,SEX,SEX_FMLE,1983,...,,5.9 [5.7-6.1],5.936449,5.731761,6.147711,,2025-04-15T15:49:27.583+02:00,1983,1983-01-01T00:00:00+01:00,1983-12-31T00:00:00+01:00
2,9769157,MDG_0000000001,MGHEREG,UNICEF_LACRO,,YEAR,,SEX,SEX_BTSX,2013,...,,16.3 [16.0-16.8],16.309493,15.959494,16.780849,,2025-04-15T15:49:27.583+02:00,2013,2013-01-01T00:00:00+01:00,2013-12-31T00:00:00+01:00
3,9769329,MDG_0000000001,COUNTRY,DMA,AMR,YEAR,Americas,SEX,SEX_MLE,2015,...,,28.4 [25.7-31.3],28.378631,25.694694,31.265511,,2025-04-15T15:49:27.583+02:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00
4,9769480,MDG_0000000001,COUNTRY,ATG,AMR,YEAR,Americas,SEX,SEX_BTSX,1999,...,,12.4 [11.1-13.9],12.41565,11.149507,13.856991,,2025-04-15T15:49:27.583+02:00,1999,1999-01-01T00:00:00+01:00,1999-12-31T00:00:00+01:00


In [None]:
# Process maternal mortality data
mm_clean = maternal_mortality[["SpatialDim", "TimeDim", "NumericValue"]].copy()
mm_clean.columns = ["Country", "Year", "MaternalMortalityRatio"]
mm_clean["Year"] = pd.to_numeric(mm_clean["Year"], errors="coerce")
mm_clean["MaternalMortalityRatio"] = pd.to_numeric(mm_clean["MaternalMortalityRatio"], errors="coerce")
mm_clean = mm_clean.dropna()

# Get latest year data
latest_year = mm_clean["Year"].max()
mm_latest = mm_clean[mm_clean["Year"] == latest_year].sort_values("MaternalMortalityRatio", ascending=False)

print(f"Latest Year: {latest_year:.0f}")
print(f"\nTop 10 Countries with Highest Maternal Mortality:")
mm_latest.head(10)

In [None]:
# Visualize Maternal Mortality on World Map
fig = px.choropleth(mm_latest, 
                    locations="Country",
                    color="MaternalMortalityRatio",
                    hover_name="Country",
                    color_continuous_scale="Reds",
                    title=f"Maternal Mortality Ratio per 100,000 Live Births ({latest_year:.0f})")
fig.update_layout(geo=dict(showframe=False, projection_type="natural earth"))
fig.show()

## 7. Fetch Under-5 Mortality Data (SDG 3.2)

In [None]:
# Fetch Under-5 Mortality data
indicator_code = "MDG_0000000007"  # Under-five mortality rate
response = requests.get(f"{API_BASE_URL}/{indicator_code}")
under5_mortality = pd.DataFrame(response.json()["value"])

# Process data
u5_clean = under5_mortality[["SpatialDim", "TimeDim", "NumericValue"]].copy()
u5_clean.columns = ["Country", "Year", "Under5MortalityRate"]
u5_clean["Year"] = pd.to_numeric(u5_clean["Year"], errors="coerce")
u5_clean["Under5MortalityRate"] = pd.to_numeric(u5_clean["Under5MortalityRate"], errors="coerce")
u5_clean = u5_clean.dropna()

print(f"Under-5 Mortality Records: {len(u5_clean)}")
u5_clean.head(10)

In [None]:
# Global trend of Under-5 Mortality
u5_trend = u5_clean.groupby("Year")["Under5MortalityRate"].mean().reset_index()

fig = px.line(u5_trend, x="Year", y="Under5MortalityRate",
              title="Global Average Under-5 Mortality Rate (per 1,000 live births)",
              markers=True)
fig.update_layout(template="plotly_white")
fig.show()

## 8. Fetch UHC Service Coverage Index (SDG 3.8)

In [None]:
# Fetch UHC Service Coverage data
indicator_code = "UHC_INDEX_REPORTED"  # UHC service coverage index
response = requests.get(f"{API_BASE_URL}/{indicator_code}")
uhc_data = pd.DataFrame(response.json()["value"])

# Process data
uhc_clean = uhc_data[["SpatialDim", "TimeDim", "NumericValue"]].copy()
uhc_clean.columns = ["Country", "Year", "UHC_Index"]
uhc_clean["Year"] = pd.to_numeric(uhc_clean["Year"], errors="coerce")
uhc_clean["UHC_Index"] = pd.to_numeric(uhc_clean["UHC_Index"], errors="coerce")
uhc_clean = uhc_clean.dropna()

print(f"UHC Index Records: {len(uhc_clean)}")
uhc_clean.describe()

In [None]:
# Distribution of UHC Index (latest year)
latest_year = uhc_clean["Year"].max()
uhc_latest = uhc_clean[uhc_clean["Year"] == latest_year]

fig = px.histogram(uhc_latest, x="UHC_Index", nbins=30,
                   title=f"Distribution of UHC Service Coverage Index ({latest_year:.0f})",
                   labels={"UHC_Index": "UHC Index (0-100)"})
fig.update_layout(template="plotly_white")
fig.show()

## 9. Summary Statistics

In [None]:
# Create a summary of all fetched data
summary = {
    "Dataset": ["Life Expectancy", "Maternal Mortality", "Under-5 Mortality", "UHC Index"],
    "Records": [len(life_exp_clean), len(mm_clean), len(u5_clean), len(uhc_clean)],
    "Countries": [life_exp_clean["Country"].nunique(), mm_clean["Country"].nunique(), 
                  u5_clean["Country"].nunique(), uhc_clean["Country"].nunique()],
    "Year Range": [
        f"{life_exp_clean['Year'].min():.0f}-{life_exp_clean['Year'].max():.0f}",
        f"{mm_clean['Year'].min():.0f}-{mm_clean['Year'].max():.0f}",
        f"{u5_clean['Year'].min():.0f}-{u5_clean['Year'].max():.0f}",
        f"{uhc_clean['Year'].min():.0f}-{uhc_clean['Year'].max():.0f}"
    ]
}

summary_df = pd.DataFrame(summary)
print("📊 Data Summary")
print("="*50)
summary_df

## 10. Export Data to CSV

In [None]:
# Export cleaned datasets to CSV files
life_exp_clean.to_csv("life_expectancy_data.csv", index=False)
mm_clean.to_csv("maternal_mortality_data.csv", index=False)
u5_clean.to_csv("under5_mortality_data.csv", index=False)
uhc_clean.to_csv("uhc_index_data.csv", index=False)

print("✅ Data exported successfully!")
print("- life_expectancy_data.csv")
print("- maternal_mortality_data.csv")
print("- under5_mortality_data.csv")
print("- uhc_index_data.csv")