In [1]:
from owid.catalog import Dataset
from owid.catalog.meta import *
from owid.catalog.variables import Variable
from owid.catalog.tables import Table

In [2]:
import pandas as pd

In [30]:
target_dataset_path = "../../../data/reference"

In [3]:
df = pd.read_feather("intermediate/04-countries-with-continents.feather")

In [4]:
df

Unnamed: 0,code,name,iso_alpha2,iso_alpha3,imf_code,cow_letter,cow_code,unctad_code,marc_code,ncd_code,kansas_code,penn_code,wikidata_uri,wikidata_label,legacy_entity_id,legacy_country_id,members
0,AFG,Afghanistan,AF,AFG,512,AFG,700,AFG,AF,AFGN,AFG,AFG,http://www.wikidata.org/entity/Q889,Afghanistan,15,562,
1,ALA,Aland Islands,AX,ALA,,,,,,,,,http://www.wikidata.org/entity/Q5689,Åland,296,791,
2,ALB,Albania,AL,ALB,914,ALB,339,ALB,AA,ALBN,ALB,ALB,http://www.wikidata.org/entity/Q222,Albania,16,565,
3,DZA,Algeria,DZ,DZA,612,ALG,615,ALG,AE,ALGR,DZA,DZA,http://www.wikidata.org/entity/Q262,Algeria,17,619,
4,ASM,American Samoa,AS,ASM,859,,,,AS,,ASM,ASM,http://www.wikidata.org/entity/Q16641,American Samoa,246,571,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,OWID_ASI,Asia,,,,,,,,,,,,,275,,"[""AFG"", ""ARM"", ""AZE"", ""BHR"", ""BGD"", ""BTN"", ""IO..."
292,OWID_EUR,Europe,,,,,,,,,,,,,276,,"[""ALA"", ""ALB"", ""AND"", ""AUT"", ""OWID_AUH"", ""OWID..."
293,OWID_LAM,Latin America,,,,,,,,,,,,,5403,,"[""ARG"", ""BOL"", ""BRA"", ""OWID_NLC"", ""CHL"", ""COL""..."
294,OWID_NAM,North America,,,,,,,,,,,,,294,,"[""AIA"", ""ATG"", ""ABW"", ""BHS"", ""BRB"", ""BLZ"", ""BM..."


In [5]:
countries_regions = Table(df)

In [6]:
isinstance(countries_regions, Table)

True

In [7]:
isinstance(countries_regions.name, Variable)

True

In [8]:
countries_regions.head()

Unnamed: 0,code,name,iso_alpha2,iso_alpha3,imf_code,cow_letter,cow_code,unctad_code,marc_code,ncd_code,kansas_code,penn_code,wikidata_uri,wikidata_label,legacy_entity_id,legacy_country_id,members
0,AFG,Afghanistan,AF,AFG,512.0,AFG,700.0,AFG,AF,AFGN,AFG,AFG,http://www.wikidata.org/entity/Q889,Afghanistan,15,562,
1,ALA,Aland Islands,AX,ALA,,,,,,,,,http://www.wikidata.org/entity/Q5689,Åland,296,791,
2,ALB,Albania,AL,ALB,914.0,ALB,339.0,ALB,AA,ALBN,ALB,ALB,http://www.wikidata.org/entity/Q222,Albania,16,565,
3,DZA,Algeria,DZ,DZA,612.0,ALG,615.0,ALG,AE,ALGR,DZA,DZA,http://www.wikidata.org/entity/Q262,Algeria,17,619,
4,ASM,American Samoa,AS,ASM,859.0,,,,AS,,ASM,ASM,http://www.wikidata.org/entity/Q16641,American Samoa,246,571,


In [9]:
countries_regions.columns

Index(['code', 'name', 'iso_alpha2', 'iso_alpha3', 'imf_code', 'cow_letter',
       'cow_code', 'unctad_code', 'marc_code', 'ncd_code', 'kansas_code',
       'penn_code', 'wikidata_uri', 'wikidata_label', 'legacy_entity_id',
       'legacy_country_id', 'members'],
      dtype='object')

In [10]:
def add_variable_metadata(
    table: Table, column_name: str, title: str, description: str
) -> None:
    table[column_name].metadata.name = column_name
    table[column_name].metadata.title = title
    table[column_name].metadata.description = description

In [11]:
add_variable_metadata(
    countries_regions,
    "code",
    "OWID geo code",
    """The primary identifier for geographic entities at Our World In Data. Based on ISO alpha 3 country codes but extended with custom identifiers where necessary.""",
)

In [12]:
add_variable_metadata(
    countries_regions, "name", "Name", """The preferred name for a country/region"""
)

In [13]:
add_variable_metadata(
    countries_regions,
    "iso_alpha2",
    "ISO 3166-1 alpha-2 code",
    """International 2 letter ISO country code""",
)

In [14]:
add_variable_metadata(
    countries_regions,
    "iso_alpha3",
    "ISO 3166-1 alpha-3 code",
    """International 3 letter ISO country code""",
)

In [15]:
add_variable_metadata(countries_regions, "imf_code", "IMF code", """IMF country code""")

In [16]:
add_variable_metadata(
    countries_regions,
    "cow_letter",
    "Correlates of War country code",
    """Correlates of War project country code (StateAbb)""",
)

In [17]:
add_variable_metadata(
    countries_regions,
    "cow_code",
    "Correlates of War country id",
    """Correlates of War project country identifier (CCode)""",
)

In [18]:
add_variable_metadata(
    countries_regions,
    "unctad_code",
    "UN CTAD code",
    """United Nations Conference on Trade and Development country code""",
)

In [19]:
add_variable_metadata(
    countries_regions,
    "marc_code",
    "MARC code",
    """Library of Congress MARC project country code""",
)

In [20]:
add_variable_metadata(countries_regions, "kansas_code", "kansas_code", """""")

In [21]:
add_variable_metadata(
    countries_regions,
    "penn_code",
    "PENN world table country code",
    """Country code for the PENN World Talbe project by the University of Groningen""",
)

In [22]:
add_variable_metadata(
    countries_regions,
    "wikidata_uri",
    "Wikidata id",
    """The Wikidata identifier for this country/region""",
)

In [23]:
add_variable_metadata(
    countries_regions,
    "wikidata_label",
    "Wikidata label",
    """The Wikidata label (english language)""",
)

In [25]:
add_variable_metadata(
    countries_regions,
    "legacy_country_id",
    "Legacy OWID country table id",
    """Used for matching legacy data that contained ids from the Our World In Data mysql country_name_tool_countrydata table""",
)

In [26]:
add_variable_metadata(
    countries_regions,
    "legacy_entity_id",
    "Legacy OWID entity table id",
    """Used for matching legacy data that contained ids from the Our World In Data mysql entity table""",
)

In [27]:
add_variable_metadata(
    countries_regions,
    "members",
    "List of members of each region/country",
    """An optional json encoded array of strings referencing other rows in this table by 'code'. Rows that have entries here are understood to 'contain' other entries, e.g. continents list the contained countries or a country could list the contained subnational regions. """,
)

In [28]:
countries_regions.set_index("code", inplace=True)

In [29]:
countries_regions.metadata.short_name = "countries_regions"
countries_regions.metadata.title = "Countries and regions"
countries_regions.metadata.description = "Our World In Data countries and geo-regions harmonisation dataset. Contains various identifiers for all current sovereign nations, overseas territories, some historic countries as well as common groupings like continents like world"

In [32]:
ds = Dataset.create_empty(target_dataset_path)

In [33]:
ds.add(countries_regions, format="csv")

In [34]:
countries_regions.penn_code.metadata

VariableMeta(title='PENN world table country code', description='Country code for the PENN World Talbe project by the University of Groningen', sources=[], licenses=[])

In [35]:
ds.metadata.short_name = "owid_reference"
ds.metadata.description = """Our World In Data reference datasets. These datasets define the vocabulary for important dimensions that we harmonized across different datasets. 
Every table represents the vocabulary for one dimensions and can include additional columns like third party identifiers, hierarchical grouping etc."""

In [36]:
ds.save()

In [50]:
readme_text = """# Countries and regions

This dataset is used for harmonizing countries and world regions across datasets. The columns are mostly 
identifiers from various sources that can be helpful in linking data. The main 
"""