In [1]:
import requests
import pandas as pd

# === WHO Athena API Example ===
def fetch_who_disease_data(disease_keyword='malaria'):
    url = f"https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,'Malaria')"
    response = requests.get(url)
    if response.ok:
        indicators = response.json()['value']
        for i, ind in enumerate(indicators[:3]):  # Show top 3 matches
            print(f"{i+1}. {ind['IndicatorCode']}: {ind['IndicatorName']}")
    else:
        print("Failed to fetch WHO data")

# === CDC Socrata API Example ===
def fetch_cdc_notifiable_diseases():
    url = "https://data.cdc.gov/resource/x9gk-5huc.json?$limit=1000"
    response = requests.get(url)
    if response.ok:
        data = pd.DataFrame(response.json())
        print("Top 5 CDC Records:")
        print(data.head())
    else:
        print("Failed to fetch CDC data")

if __name__ == "__main__":
    print("=== WHO Data (Athena API) ===")
    fetch_who_disease_data('malaria')  # or 'malaria', 'tb', etc.

    print("\n=== CDC Notifiable Diseases (Socrata API) ===")
    fetch_cdc_notifiable_diseases()

=== WHO Data (Athena API) ===
1. MALARIA001: Malaria - number of reported deaths
2. MALARIA004: Number of under-five deaths from malaria
3. MALARIA002: Estimated number of malaria cases

=== CDC Notifiable Diseases (Socrata API) ===
Top 5 CDC Records:
          states  year week    label m1_flag m2 m2_flag m3_flag m4_flag  \
0   US RESIDENTS  2022    1  Anthrax       -  0       -       -       -   
1    NEW ENGLAND  2022    1  Anthrax       -  0       -       -       -   
2    CONNECTICUT  2022    1  Anthrax       -  0       -       -       -   
3          MAINE  2022    1  Anthrax       -  0       -       -       -   
4  MASSACHUSETTS  2022    1  Anthrax       -  0       -       -       -   

      location2   sort_order      location1  \
0  US RESIDENTS  20220100001            NaN   
1   NEW ENGLAND  20220100002            NaN   
2           NaN  20220100003    CONNECTICUT   
3           NaN  20220100004          MAINE   
4           NaN  20220100005  MASSACHUSETTS   

              

In [None]:
import requests
import pandas as pd
import os

# Create output directory
os.makedirs("who_disease_data", exist_ok=True)

# List of disease keywords you want to fetch
diseases = ["malaria", "tuberculosis", "hiv", "cholera", "measles"]

# Function to get matching indicator codes for a disease
def get_indicator_codes(keyword):
    url = f"https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,'{keyword}')"
    response = requests.get(url)
    if response.ok:
        return [i["IndicatorCode"] for i in response.json()["value"]][:1]  # Limit to 1 top indicator
    else:
        return []

# Function to fetch and save data for each indicator
def fetch_and_save_data(indicator_code, disease_name):
    url = f"https://ghoapi.azureedge.net/api/{indicator_code}"
    response = requests.get(url)
    if response.ok:
        raw = response.json()["value"]
        df = pd.json_normalize(raw)
        if not df.empty:
            # Keep relevant columns
            keep_cols = ["IndicatorCode", "SpatialDim", "TimeDim", "NumericValue", "Dim1", "Dim1Value"]
            df = df[[col for col in keep_cols if col in df.columns]]
            df.columns = ['Indicator', 'Country', 'Year', 'Value', 'Dim1', 'Dim1Value'][:len(df.columns)]
            # Save to CSV
            file_path = f"who_disease_data/{disease_name}_data.csv"
            df.to_csv(file_path, index=False)
            print(f"✅ Saved: {file_path}")
        else:
            print(f"⚠️ No data for {indicator_code}")
    else:
        print(f"❌ Failed to fetch data for {indicator_code}")

# === Main Script ===
if __name__ == "__main__":
    for disease in diseases:
        print(f"\n🔍 Searching WHO indicators for: {disease}")
        codes = get_indicator_codes(disease)
        for code in codes:
            fetch_and_save_data(code, disease)


NameError: name 'data' is not defined