# FAO metadata

Several FAO datasets need identifiers that come from the FAO API. Here we reconstruct them from a snapshot.

## Libraries

In [None]:
from owid.walden import Catalog
from owid.catalog import Dataset, Table
from etl.steps.data.converters import convert_walden_metadata
import json
import pandas as pd

## Parameters

In [None]:
dest_dir = "/tmp/faostat_metadata"

## Fetch the data from Walden

In [None]:
walden_ds = Catalog().find_one(short_name="faostat_metadata", version="2022-02-10")

In [None]:
walden_ds

In [None]:
local_file = walden_ds.ensure_downloaded()

In [None]:
local_file

## Load and restructure

In [None]:
with open(local_file) as istream:
    all_metadata = json.load(istream)

In [None]:
data_paths = [
    {
        "category": "itemgroup",
        "index": ["Item Group Code", "Item Code"],
        "short_name": "item",
    },
    {
        "category": "area",
        "index": ["Country Code"],
        "short_name": "area",
    },
    {
        "category": "element",
        "index": ["Element Code"],
        "short_name": "element",
    },
    {
        "category": "unit",
        "index": ["Unit Name"],
        "short_name": "unit",
    },
]

In [None]:
ds = Dataset.create_empty(dest_dir)

In [None]:
ds.metadata = convert_walden_metadata(walden_ds)

In [None]:
from owid.catalog import utils

for domain in all_metadata:
    print(domain)
    domain_meta = all_metadata[domain]
    for record in data_paths:
        print("  ", record["short_name"])

        # annoying workaround for QCL using "itemsgroup" instead of "itemgroup"
        if domain == "QCL" and record["short_name"] == "item":
            category = "itemsgroup"
        else:
            category = record["category"]

        json_data = domain_meta[category]["data"]
        df = pd.DataFrame.from_dict(json_data)
        df.set_index(record["index"], verify_integrity=True, inplace=True)
        t = Table(df)
        t.metadata.short_name = f'meta_{domain.lower()}_{record["short_name"]}'

        ds.add(utils.underscore_table(t))

ds.save()