In [1]:
import requests
import pandas as pd

# World Bank indicators
indicators = {
    "GDP_per_capita": "NY.GDP.PCAP.CD",
    "Unemployment": "SL.UEM.TOTL.ZS",
    "School_enrollment": "SE.SEC.ENRR"
}

wb_rows = []

for name, code in indicators.items():
    url = f"http://api.worldbank.org/v2/country/all/indicator/{code}?format=json&per_page=20000"
    response = requests.get(url)
    data = response.json()
    records = data[1]  # Actual data

    for item in records:
        wb_rows.append({
            "country_name": item["country"]["value"],  # use country name
            "year": int(item["date"]),
            "indicator": name,
            "value": item["value"]  # keep raw value, even if None
        })

# Convert to DataFrame
wb_df_raw = pd.DataFrame(wb_rows)
# Filter years to match WHO data (e.g., 2000â€“2023)
wb_df_raw = wb_df_raw[(wb_df_raw['year'] >= 2000) & (wb_df_raw['year'] <= 2023)]

# Pivot indicators to separate columns
wb_df_raw = wb_df_raw.pivot_table(index=["country_name", "year"],
                                  columns="indicator",
                                  values="value").reset_index()
wb_df_raw.columns.name = None

# Save raw data for later cleaning
wb_df_raw.to_csv("data//raw//world_bank_data_raw.csv", index=False)
wb_df_raw.to_json("data//raw//world_bank_data_raw.json", orient="records", indent=4)

print("Raw World Bank data fetched:", wb_df_raw.shape)


Raw World Bank data fetched: (6301, 5)
