<a href="https://www.kaggle.com/code/naveenapaleti/data-fetch-worldbank-population-2000-2024?scriptVersionId=264757182" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# World Population from 2000 to 2024

### Import Libraries

In [1]:
import requests
import pandas as pd
import pycountry_convert as pc


### Parameters

In [2]:
YEARS = "2000:2024"
BASE = "https://api.worldbank.org/v2"
COUNTRIES = "all" 

INDICATORS = {
    "total_population": "SP.POP.TOTL",
    "male_population": "SP.POP.TOTL.MA.IN",
    "female_population": "SP.POP.TOTL.FE.IN",
    "children_population": "SP.POP.0014.TO",  # ages 0–14
}

continent_map = {
    "AF": "Africa",
    "AS": "Asia",
    "EU": "Europe",
    "NA": "North America",
    "SA": "South America",
    "OC": "Oceania"
}

### Function to pull indicators

In [3]:
def wb_pull(indicator_code: str) -> pd.DataFrame:
    """Fetch a World Bank indicator for all countries and years."""
    url = f"{BASE}/country/{COUNTRIES}/indicator/{indicator_code}?date={YEARS}&format=json&per_page=20000"
    resp = requests.get(url, timeout=60)
    resp.raise_for_status()
    meta, data = resp.json()
    
    pages = meta.get("pages", 1)
    rows = data or []
    for p in range(2, pages + 1):
        more = requests.get(url + f"&page={p}", timeout=60).json()[1] or []
        rows.extend(more)

    out = []
    for d in rows:
        if not d or d.get("value") is None:
            continue
        out.append({
            "iso3": d["countryiso3code"],
            "country": d["country"]["value"],
            "year": int(d["date"]),
            indicator_code: float(d["value"])
        })
    return pd.DataFrame(out)


### merge

In [4]:
dfs = []
for nice_name, code in INDICATORS.items():
    df = wb_pull(code).rename(columns={code: nice_name})
    dfs.append(df)

pop = dfs[0]
for df in dfs[1:]:
    pop = pop.merge(df, on=["iso3", "country", "year"], how="outer")

# Filter by year
pop = pop[(pop["year"] >= 2000) & (pop["year"] <= 2024)].copy()
pop = pop.sort_values(["country", "year"]).reset_index(drop=True)


In [5]:
pop.head()

Unnamed: 0,iso3,country,year,total_population,male_population,female_population,children_population
0,AFG,Afghanistan,2000,20130327.0,10094645.0,10035682.0,10070182.0
1,AFG,Afghanistan,2001,20284307.0,10179793.0,10104514.0,10176371.0
2,AFG,Afghanistan,2002,21378117.0,10737198.0,10640919.0,10739916.0
3,AFG,Afghanistan,2003,22733049.0,11426685.0,11306364.0,11421150.0
4,AFG,Afghanistan,2004,23560654.0,11851534.0,11709120.0,11822009.0


In [6]:
pop.tail()

Unnamed: 0,iso3,country,year,total_population,male_population,female_population,children_population
6620,ZWE,Zimbabwe,2020,15526888.0,7370588.0,8156300.0,6549831.0
6621,ZWE,Zimbabwe,2021,15797210.0,7505482.0,8291728.0,6614596.0
6622,ZWE,Zimbabwe,2022,16069056.0,7642717.0,8426339.0,6686582.0
6623,ZWE,Zimbabwe,2023,16340822.0,7780934.0,8559888.0,6752564.0
6624,ZWE,Zimbabwe,2024,16634373.0,7929088.0,8705285.0,6799094.0


### Add Continent column

In [7]:
def iso3_to_continent(iso3):
    try:
        country_alpha2 = pc.country_alpha3_to_country_alpha2(iso3)
        cont_code = pc.country_alpha2_to_continent_code(country_alpha2)
        return continent_map.get(cont_code, "Other")
    except:
        return "Other"

pop["continent"] = pop["iso3"].apply(iso3_to_continent)


In [8]:
pop.tail()

Unnamed: 0,iso3,country,year,total_population,male_population,female_population,children_population,continent
6620,ZWE,Zimbabwe,2020,15526888.0,7370588.0,8156300.0,6549831.0,Africa
6621,ZWE,Zimbabwe,2021,15797210.0,7505482.0,8291728.0,6614596.0,Africa
6622,ZWE,Zimbabwe,2022,16069056.0,7642717.0,8426339.0,6686582.0,Africa
6623,ZWE,Zimbabwe,2023,16340822.0,7780934.0,8559888.0,6752564.0,Africa
6624,ZWE,Zimbabwe,2024,16634373.0,7929088.0,8705285.0,6799094.0,Africa


In [9]:
cols = [
    "iso3",
    "country",
    "continent",
    "year",
    "total_population",
    "male_population",
    "female_population",
    "children_population"
]

pop = pop[cols]

### Save csv file

In [10]:
output_file = "population_2000_2024.csv"
pop.to_csv(output_file, index=False)