In [1]:
from owid.catalog import Dataset
from etl.paths import DATA_DIR
import pandas as pd

ds = Dataset(DATA_DIR / "garden" / "demography" / "2022-12-08" / "population")
print(ds.table_names)

['population']


In [2]:
df = ds["population"].reset_index()

In [3]:
list(df.source.unique())

['HYDE v3.2 (https://dataportaal.pbl.nl/downloads/HYDE/)',
 'Gapminder v6 (https://www.gapminder.org/data/documentation/gd003/)',
 'United Nations - Population Division 2022 (https://population.un.org/wpp/Download/Standard/Population/)',
 'Gapminder - Systema Globalis (https://github.com/open-numbers/ddf--gapminder--systema_globalis); Gapminder v6 (https://www.gapminder.org/data/documentation/gd003/); HYDE v3.2 (https://dataportaal.pbl.nl/downloads/HYDE/); United Nations - Population Division 2022 (https://population.un.org/wpp/Download/Standard/Population/)',
 'Gapminder - Systema Globalis (https://github.com/open-numbers/ddf--gapminder--systema_globalis)',
 'Gapminder v6 (https://www.gapminder.org/data/documentation/gd003/); HYDE v3.2 (https://dataportaal.pbl.nl/downloads/HYDE/); United Nations - Population Division 2022 (https://population.un.org/wpp/Download/Standard/Population/)']

In [4]:
def print_source_details(source_name):
    x = df[df.source == source_name]
    print(source_name)
    print(x.year.min(), x.year.max())

In [5]:
source_names = [
    "Gapminder - Systema Globalis (https://github.com/open-numbers/ddf--gapminder--systema_globalis)",
    "Gapminder v6 (https://www.gapminder.org/data/documentation/gd003/)",
    "HYDE v3.2 (https://dataportaal.pbl.nl/downloads/HYDE/)",
    "United Nations - Population Division 2022 (https://population.un.org/wpp/Download/Standard/Population/)",
]

In [6]:
for source in source_names:
    print_source_details(source)
    print(df[(df.source == source) & (df.year > 1950)].country.unique())
    print()

Gapminder - Systema Globalis (https://github.com/open-numbers/ddf--gapminder--systema_globalis)
1555 2008
['Akrotiri and Dhekelia', 'Czechoslovakia', 'East Germany', 'Eritrea and Ethiopia', 'Serbia and Montenegro', 'USSR', 'West Germany', 'Yemen Arab Republic', 'Yemen People's Republic', 'Yugoslavia']
Categories (267, object): ['Afghanistan', 'Africa', 'Africa (UN)', 'Akrotiri and Dhekelia', ..., 'Yemen People's Republic', 'Yugoslavia', 'Zambia', 'Zimbabwe']

Gapminder v6 (https://www.gapminder.org/data/documentation/gd003/)
1800 2100
['Vatican']
Categories (267, object): ['Afghanistan', 'Africa', 'Africa (UN)', 'Akrotiri and Dhekelia', ..., 'Yemen People's Republic', 'Yugoslavia', 'Zambia', 'Zimbabwe']

HYDE v3.2 (https://dataportaal.pbl.nl/downloads/HYDE/)
-10000 2017
['Netherlands Antilles', 'Svalbard and Jan Mayen']
Categories (267, object): ['Afghanistan', 'Africa', 'Africa (UN)', 'Akrotiri and Dhekelia', ..., 'Yemen People's Republic', 'Yugoslavia', 'Zambia', 'Zimbabwe']

United 

In [7]:
df[df.country == "Vatican"]

Unnamed: 0,country,year,population,world_pop_share,source
75456,Vatican,1800,905,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75457,Vatican,1801,905,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75458,Vatican,1802,905,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75459,Vatican,1803,905,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75460,Vatican,1804,905,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
...,...,...,...,...,...
75752,Vatican,2096,793,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75753,Vatican,2097,796,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75754,Vatican,2098,796,0.0,Gapminder v6 (https://www.gapminder.org/data/d...
75755,Vatican,2099,795,0.0,Gapminder v6 (https://www.gapminder.org/data/d...


In [8]:
df.population.metadata

VariableMeta(title='Population', description='Population by country, available from 10,000 BCE to 2100 based on Gapminder data, HYDE, and UN Population Division (2022) estimates.\n\n* 10,000 BCE - 1799: Historical estimates by HYDE (v3.2). Includes some datapoints from Gapminder (Systema Globalis).\n* 1800-1949: Historical estimates by Gapminder. Includes some datapoints from HYDE (v3.2) and Gapminder (Systema Globalis).\n* 1950-2021: Population records by the United Nations - Population Division (2022). Includes some datapoints from HYDE (v3.2), Gapminder (Systema Globalis) and Gapminder (v6).\n* 2022-2100: Projections based on Medium variant by the United Nations - Population Division (2022).\n', sources=[Source(name='Gapminder (v6)', description=None, url='https://www.gapminder.org/data/documentation/gd003/', source_data_url=None, owid_data_url=None, date_accessed='October 8, 2021', publication_date=None, publication_year=None, published_by='Gapminder (v6)', publisher_source=None), 

In [9]:
df.source.metadata

VariableMeta(title=None, description=None, sources=[], licenses=[], unit=None, short_unit=None, display=None, additional_info=None)

In [10]:
import re

In [11]:
description = df.population.metadata.description

In [12]:
new_description = []
for line in description.split("\n"):
    match = re.search("([\d\-BCE,\s]*):.*", line)
    if match:
        print(match.group(1))

 10,000 BCE - 1799
 1800-1949
 1950-2021
 2022-2100


In [57]:
s = "2022-2100: Projections based on Medium variant by the United Nations - Population Division (2022)."

In [72]:
match = re.search("([\d\-BCE,\s]*):.*", s)

In [14]:
df[df.country == "Netherlands Antilles"]

Unnamed: 0,country,year,population,world_pop_share,source
48654,Netherlands Antilles,200,0,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48655,Netherlands Antilles,300,0,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48656,Netherlands Antilles,400,0,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48657,Netherlands Antilles,500,1,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48658,Netherlands Antilles,600,3,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
...,...,...,...,...,...
48712,Netherlands Antilles,2013,315716,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48713,Netherlands Antilles,2014,317110,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48714,Netherlands Antilles,2015,318504,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
48715,Netherlands Antilles,2016,319898,0.0,HYDE v3.2 (https://dataportaal.pbl.nl/download...
