# FAO metadata

Several FAO datasets need identifiers that come from the FAO API. Here we reconstruct them from a snapshot.

## Libraries

In [1]:
from owid.walden import Catalog
from owid.catalog import Dataset, Table
from etl.steps.data.converters import convert_walden_metadata
import json
import pandas as pd

## Parameters

In [2]:
dest_dir = "/tmp/faostat_metadata"

## Fetch the data from Walden

In [3]:
walden_ds = Catalog().find_one(short_name="faostat_metadata", version="2022-02-10")

In [4]:
walden_ds

Dataset(namespace='faostat', short_name='faostat_metadata', name='Metadata and identifiers - FAO (2022)', description='Metadata and identifiers used in FAO datasets', source_name='Food and Agriculture Organization of the United Nations', url='http://www.fao.org/faostat/en/#data', date_accessed='2022-02-10', file_extension='json', license_url='http://www.fao.org/contact-us/terms/db-terms-of-use/en', source_data_url='https://gist.githubusercontent.com/larsyencken/b872ac59e9f36800c270af0be48b909b/raw/cfa9204bdc1f42b6d0e099cd5a2a86a38ce29634/fao_metadata.json', md5='3d8882f43ed9f3854381979a40c43f52', publication_year=2022, publication_date='2022-02-10', owid_data_url='https://walden.nyc3.digitaloceanspaces.com/faostat/2022-02-10/fao_metadata.json', license_name='CC BY-NC-SA 3.0 IGO', access_notes=None, is_public=True)

In [5]:
local_file = walden_ds.ensure_downloaded()

In [6]:
local_file

'/Users/mojmir/.owid/walden/faostat/2022-02-10/faostat_metadata.json'

## Load and restructure

In [7]:
with open(local_file) as istream:
    all_metadata = json.load(istream)

In [8]:
data_paths = [
    {
        "category": "itemgroup",
        "index": ["Item Group Code", "Item Code"],
        "short_name": "item",
    },
    {
        "category": "area",
        "index": ["Country Code"],
        "short_name": "area",
    },
    {
        "category": "element",
        "index": ["Element Code"],
        "short_name": "element",
    },
    {
        "category": "unit",
        "index": ["Unit Name"],
        "short_name": "unit",
    },
]

In [9]:
ds = Dataset.create_empty(dest_dir)

In [10]:
ds.metadata = convert_walden_metadata(walden_ds)

In [21]:
from owid.catalog import utils

for domain in all_metadata:
    print(domain)
    domain_meta = all_metadata[domain]
    for record in data_paths:
        print("  ", record["short_name"])

        # annoying workaround for QCL using "itemsgroup" instead of "itemgroup"
        if domain == "QCL" and record["short_name"] == "item":
            category = "itemsgroup"
        else:
            category = record["category"]

        json_data = domain_meta[category]["data"]
        df = pd.DataFrame.from_dict(json_data)
        df.set_index(record["index"], verify_integrity=True, inplace=True)
        t = Table(df)
        t.metadata.short_name = f'meta_{domain.lower()}_{record["short_name"]}'

        ds.add(utils.underscore_table(t))

FBS
   item
   area
   element
   unit
FBSH
   item
   area
   element
   unit
QCL
   item
   area
   element
   unit
