In [8]:
from etl.datasets import Dataset
from etl.meta import *
from etl.variables import Variable
from etl.tables import Table

In [1]:
df = pd.read_feather("entities/04-countries-with-continents.feather")

In [2]:
df.columns

Index(['code', 'name', 'iso_alpha2', 'iso_alpha3', 'imf_code', 'cow_letter',
       'cow_code', 'unctad_code', 'marc_code', 'ncd_code', 'kansas_code',
       'penn_code', 'wikidata_uri', 'wikidata_label', 'legacy_entity_id',
       'legacy_country_id', 'members'],
      dtype='object')

In [3]:
variable_metadata = dict()

In [4]:
def add_variable_metadata(metadata_dict: dict, column_name: str, title: str, description: str) -> None:
    variable_metadata[column_name] = {
        "slug": column_name,
        "title": title,
        "description": description
    }

In [5]:
add_variable_metadata(variable_metadata, "code", "OWID geo code",
                      """The primary identifier for geographic entities at Our World In Data. Based on ISO alpha 3 country codes but extended with custom identifiers where necessary.""")

In [7]:
add_variable_metadata(variable_metadata, "name", "Name", """The preferred name for a country/region""")

In [8]:
add_variable_metadata(variable_metadata, "iso_alpha2", "ISO 3166-1 alpha-2 code", """International 2 letter ISO country code""")

In [9]:
add_variable_metadata(variable_metadata, "iso_alpha3", "ISO 3166-1 alpha-3 code", """International 3 letter ISO country code""")

In [10]:
add_variable_metadata(variable_metadata, "imf_code", "IMF code", """IMF country code""")

In [11]:
add_variable_metadata(variable_metadata, "cow_letter", "Correlates of War country code", """Correlates of War project country code (StateAbb)""")

In [12]:
add_variable_metadata(variable_metadata, "cow_code", "Correlates of War country id", """Correlates of War project country identifier (CCode)""")

In [13]:
add_variable_metadata(variable_metadata, "unctad_code", "UN CTAD code", """United Nations Conference on Trade and Development country code""")

In [14]:
add_variable_metadata(variable_metadata, "marc_code", "MARC code", """Library of Congress MARC project country code""")

In [15]:
add_variable_metadata(variable_metadata, "kansas_code", "kansas_code", """""")

In [16]:
add_variable_metadata(variable_metadata, "penn_code", "PENN world table country code", """Country code for the PENN World Talbe project by the University of Groningen""")

In [17]:
add_variable_metadata(variable_metadata, "wikidata_uri", "Wikidata id", """The Wikidata identifier for this country/region""")

In [18]:
add_variable_metadata(variable_metadata, "wikidata_label", "Wikidata label", """The Wikidata label (english language)""")

In [19]:
add_variable_metadata(variable_metadata, "legacy_country_id", "Legacy OWID country table id", """Used for matching legacy data that contained ids from the Our World In Data mysql country_name_tool_countrydata table""")

In [20]:
add_variable_metadata(variable_metadata, "legacy_entity_id", "Legacy OWID entity table id", """Used for matching legacy data that contained ids from the Our World In Data mysql entity table""")

In [23]:
add_variable_metadata(variable_metadata, "members", "List of members of each region/country", """An optional json encoded array of strings referencing other rows in this table by 'code'. Rows that have entries here are understood to 'contain' other entries, e.g. continents list the contained countries or a country could list the contained subnational regions. """)

In [24]:
datatable_metadata = {
    "slug": "countries-regions",
    "title": "Countries and regions",
    "description": "Our World In Data countries and geo-regions harmonisation dataset. Contains various identifiers for all current sovereign nations, overseas territories, some historic countries as well as common groupings like continents like world",
    "license": "CC-BY-4.0"
}

Metadata todo:
* Identify primary key column
* Add data types?

In [None]:
readme_text = """# Countries and regions

This dataset is used for harmonizing countries and world regions across datasets. The columns are mostly 
identifiers from various sources that can be helpful in linking data. The main 
"""