# Country GDP Dataset (2000–2025)

This notebook fetches GDP (current US$) for all countries from the World Bank API, maps them into the 7 classic continents (Asia, Africa, Europe, North America, South America, Australia, Antarctica), and outputs a wide-format CSV


GDP by country (current US$) 2000–2025, wide format, 7-continent mapping.

Input: "https://api.worldbank.org/v2"

Output CSV: gdp_2000_2025.csv

Columns: Name of country, Continent, 2000, 2001, ..., 2025


In [None]:
import requests
import pandas as pd

# Configuration

Here we define the indicator (NY.GDP.MKTP.CD → GDP in current US$), the time range, and continent mapping rules.

In [None]:
WB_BASE = "https://api.worldbank.org/v2"
GDP_INDICATOR = "NY.GDP.MKTP.CD"
START_YEAR, END_YEAR = 2000, 2025

# Continent Mapping

World Bank groups (e.g., East Asia & Pacific) don’t match the 7 continents. We fix that with explicit ISO3 sets and rules.

In [None]:
# Oceania (map to "Australia")
OCEANIA_SET = {
    # sovereigns
    "AUS","NZL","FJI","PNG","SLB","VUT","WSM","TON","KIR","TUV","NRU","PLW","FSM","MHL",
    # territories & affiliates often in WB country list
    "ASM","GUM","MNP","PYF","COK","NIU","NCL","WLF","TKL"
}

# Central Asia + Caucasus (WB puts these in "Europe & Central Asia", but we want Asia)
CENTRAL_ASIA_SET = {"KAZ","KGZ","TJK","TKM","UZB"}
CAUCASUS_SET = {"ARM","AZE","GEO"}  # Treat as Asia for this use case

# North Africa (subset of MENA -> Africa)
NORTH_AFRICA_SET = {"DZA","EGY","LBY","MAR","TUN","ESH"}

# Some known geographic overrides (just in case WB region suggests otherwise)
ISO_OVERRIDE = {
    "GRL": "North America",  # Greenland is geographically NA
    "TUR": "Asia",           # treat Turkey as Asia here
    "CYP": "Asia",           # Cyprus geographically in Asia (politically EU)
}

# Helper Functions

- fetch_all_pages: handles pagination in World Bank API
- map_continent7: maps WB regions + ISO3 → 7 continents

In [None]:
def fetch_all_pages(url, params):
    out, page = [], 1
    while True:
        resp = requests.get(url, params={**params, "page": page}, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        if not isinstance(data, list) or len(data) < 2:
            break
        meta, rows = data[0], data[1]
        if not rows:
            break
        out.extend(rows)
        if page >= int(meta.get("pages", 1)):
            break
        page += 1
    return out

In [None]:
def map_continent7(iso3: str, wb_region_value: str) -> str | None:
    """
    Map World Bank region + ISO3 to one of:
    Asia, North America, Africa, South America, Europe, Antarctica, Australia
    """
    # Hard overrides first
    if iso3 in ISO_OVERRIDE:
        return ISO_OVERRIDE[iso3]
    if iso3 in OCEANIA_SET:
        return "Australia"
    if iso3 in CENTRAL_ASIA_SET or iso3 in CAUCASUS_SET:
        return "Asia"
    if iso3 in NORTH_AFRICA_SET:
        return "Africa"

    # WB region routing
    r = wb_region_value
    if r == "East Asia & Pacific":
        # non-Oceania EAP -> Asia
        return "Asia"
    if r == "South Asia":
        return "Asia"
    if r == "Middle East & North Africa":
        # remaining MENA (not North Africa) -> Asia
        return "Asia"
    if r == "Europe & Central Asia":
        # Central Asia handled; rest -> Europe by default
        return "Europe"
    if r == "Sub-Saharan Africa":
        return "Africa"
    if r == "Latin America & Caribbean":
        # Collapsing LAC to North America per 7-continent spec request
        return "North America"
    if r == "North America":
        return "North America"
    if r == "Aggregates":
        return None  # skip aggregates

    # Fallback: None (skip)
    return None

# Fetch Country Metadata

Get all countries from the World Bank API, map them to the 7 continents, and store in a DataFrame.

In [None]:
def get_countries():
    url = f"{WB_BASE}/country"
    rows = fetch_all_pages(url, params={"format": "json", "per_page": 500})
    clean = []
    for r in rows:
        region_val = r.get("region", {}).get("value")
        if region_val == "Aggregates":
            continue
        iso3 = r.get("id")
        name = r.get("name")
        cont7 = map_continent7(iso3, region_val)
        if cont7 is None:
            # If truly unmapped or Antarctica (no GDP countries), skip
            continue
        clean.append({
            "iso3c": iso3,
            "Name of country": name,
            "Continent": cont7,
        })
    return pd.DataFrame(clean).dropna().drop_duplicates(subset=["iso3c"])

# Fetch GDP Data

Download GDP (current US$) from 2000–2025 for all real countries.

In [None]:
def get_gdp(iso_filter):
    url = f"{WB_BASE}/country/all/indicator/{GDP_INDICATOR}"
    rows = fetch_all_pages(
        url,
        params={"format": "json", "per_page": 20000, "date": f"{START_YEAR}:{END_YEAR}"}
    )
    tidy = []
    for r in rows:
        iso = r.get("countryiso3code")
        if iso not in iso_filter:
            continue
        try:
            y = int(r.get("date"))
        except (TypeError, ValueError):
            continue
        tidy.append({"iso3c": iso, "Year": y, "GDP": r.get("value")})
    return pd.DataFrame(tidy)

# Main

In [None]:
def main():
    countries = get_countries()
    gdp_long = get_gdp(set(countries["iso3c"]))

    # Merge and pivot to wide format
    df = gdp_long.merge(countries, on="iso3c", how="left")
    df_wide = df.pivot_table(
        index=["Name of country", "Continent"],
        columns="Year",
        values="GDP",
        aggfunc="first"
    ).reset_index()

    # Ensure all requested years exist as columns
    years = list(range(START_YEAR, END_YEAR + 1))
    for y in years:
        if y not in df_wide.columns:
            df_wide[y] = pd.NA

    # Reorder and sort
    df_wide = df_wide[["Name of country", "Continent"] + years]
    df_wide = df_wide.sort_values(["Continent", "Name of country"]).reset_index(drop=True)

    # Optional: if you want only the 7 continent labels, ensure Antarctica is present (it will have no rows)
    # but since there are no Antarctic countries, nothing to add.

    # Save CSV
    out_path = "gdp_2000_2025.csv"
    df_wide.to_csv(out_path, index=False)
    print(f"Saved {out_path}")
    # quick sanity peek
    print(df_wide[df_wide["Name of country"].isin(["Australia","American Samoa","India","United States"])].head())

if __name__ == "__main__":
    main()