In [1]:
import pandas as pd

In [2]:
# Step 1: Load all relevant files
haq_2016_rsd_df = pd.read_csv("IHME_GBD_2016_HAQ_INDEX_1990_2016_UNSCALED_CAUSE_VALUES_Y2018M05D23.CSV")
haq_2016_df = pd.read_csv("IHME_GBD_2016_HAQ_INDEX_1990_2016_SCALED_CAUSE_VALUES_Y2018M05D23.CSV")
haq_2015_rsd_df = pd.read_csv("IHME_GBD_2015_HAQ_INDEX_1990_2015_AGE_RISK_STANDARDIZED_MORTALITY_RATES_Y2017M05D18.CSV")
haq_2015_df = pd.read_csv("IHME_GBD_2015_HAQ_INDEX_1990_2015_HAQ_INDEX_AND_VALUES_Y2017M05D18.CSV")
haq_2019_df = pd.read_csv("IHME_GBD_2019_HAQ_1990_2019_DATA.CSV")

In [3]:

# Step 2: Prepare unified African country list (mapped)
country_name_mapping = {
    "Cabo Verde": "Cape Verde",
    "Democratic Republic of Congo": "Democratic Republic of the Congo",
    "Eswatini": "Swaziland",
    "Gambia": "The Gambia"
}
african_countries = [
    'Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi',
    'Cabo Verde', 'Cameroon', 'Central African Republic', 'Chad', 'Comoros',
    'Congo', "Cote d'Ivoire", 'Democratic Republic of Congo', 'Djibouti',
    'Egypt', 'Equatorial Guinea', 'Eritrea', 'Eswatini', 'Ethiopia', 'Gabon',
    'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Kenya', 'Lesotho', 'Liberia',
    'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius', 'Morocco',
    'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda', 'Sao Tome and Principe',
    'Senegal', 'Seychelles', 'Sierra Leone', 'Somalia', 'South Africa',
    'South Sudan', 'Sudan', 'Togo', 'Tunisia', 'Uganda', 'Tanzania',
    'Zambia', 'Zimbabwe'
]
mapped_african_countries = [
    country_name_mapping.get(country, country) for country in african_countries
]

In [4]:
# Step 3: Filter each dataset using the same logic

# 2016 Death Rate for Chronic Respiratory Diseases
haq_2016_rsd = haq_2016_rsd_df[
    (haq_2016_rsd_df["indicator_name"] == "Chronic respiratory diseases") &
    (haq_2016_rsd_df["measure"] == "Age- and risk-standardized death rate per 100,000") &
    (haq_2016_rsd_df["location_name"].isin(mapped_african_countries))
][["location_name", "year_id", "indicator_name", "val"]]

# 2016 HAQ Index
haq_2016 = haq_2016_df[
    (haq_2016_df["indicator_name"] == "Healthcare Access and Quality Index") &
    (haq_2016_df["location_name"].isin(mapped_african_countries))
][["location_name", "year_id", "indicator_name", "val"]]

# 2015 Death Rate for Chronic Respiratory Diseases
haq_2015_rsd = haq_2015_rsd_df[
    (haq_2015_rsd_df["year_id"] == 2015) &
    (haq_2015_rsd_df["indicator_name"] == "Chronic respiratory diseases") &
    (haq_2015_rsd_df["location_name"].isin(mapped_african_countries))
][["location_name", "year_id", "indicator_name", "val"]]

# 2015 HAQ Index
haq_2015 = haq_2015_df[
    (haq_2015_df["year_id"] == 2015) &
    (haq_2015_df["indicator_name"] == "Healthcare Access and Quality") &
    (haq_2015_df["location_name"].isin(mapped_african_countries))
][["location_name", "year_id", "indicator_name", "val"]]

# # # Code 5 - 2019 HAQ and Chronic Respiratory
# haq_2019 = haq_2019_df[
#     (haq_2019_df["location_name"].isin(mapped_african_countries)) &
#     (haq_2019_df["age_group_name"] == 'Age-standardized') &
#     (haq_2019_df["indicator_name"].isin(['Chronic respiratory diseases', 'HAQ Index'])) &
#     (haq_2019_df["year_id"] == 2019)
# ][["location_name", "year_id", "indicator_name", "val"]]






In [5]:
#haq 2019 filter


africa_countries = [
    'Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi',
    'Cabo Verde', 'Cameroon', 'Central African Republic', 'Chad', 'Comoros',
    'Congo', "Côte d'Ivoire", 'Democratic Republic of the Congo', 'Djibouti',
    'Egypt', 'Equatorial Guinea', 'Eritrea', 'Eswatini', 'Ethiopia', 'Gabon',
    'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Kenya', 'Lesotho', 'Liberia',
    'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius', 'Morocco',
    'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda', 'Sao Tome and Principe',
    'Senegal', 'Seychelles', 'Sierra Leone', 'Somalia', 'South Africa',
    'South Sudan', 'Sudan', 'Togo', 'Tunisia', 'Uganda', 'United Republic of Tanzania',
    'Zambia', 'Zimbabwe'
]

haq_2019_df_filtered = haq_2019_df[
    (haq_2019_df['location_name'].isin(africa_countries)) &
    (haq_2019_df['age_group_name'] == 'Age-standardized') &
    (haq_2019_df['indicator_name'].isin(['Chronic respiratory diseases', 'HAQ Index'])) &
    (haq_2019_df['year_id'] >= 2000)
][['location_name', 'year_id', 'indicator_name', 'val']]


haq_2019_df_filtered['location_name'] = haq_2019_df_filtered['location_name'].replace({
    'Democratic Republic of the Congo': 'Democratic Republic of Congo',
    "Côte d'Ivoire": "Cote d'Ivoire",
    'United Republic of Tanzania': 'Tanzania'
}) #renamed for uniformity while merging

In [6]:
# Step 4: Concatenate all filtered datasets into a single one
haq_combined_df = pd.concat([haq_2016_rsd, haq_2016, haq_2015_rsd, haq_2015, haq_2019_df_filtered], ignore_index=True)


haq_combined_df['indicator_name'] = haq_combined_df['indicator_name'].replace({
    'Chronic respiratory diseases': 'Chronic_Respiratory(RSD)',
    'HAQ Index': 'HAQ_Index',
    'Healthcare Access and Quality Index': 'HAQ_Index',
    'Healthcare Access and Quality': 'HAQ_Index'
})

In [7]:
haq_combined_df.head()
haq_combined_df.to_csv("combined_haq_df.csv", index=False)
