# FAOstat: Crops and livestock products
[_Source data_](https://www.fao.org/faostat/en/#data)

## Parameters

In [None]:
dest_dir = "/tmp/faostat_qcl"

## Imports

In [None]:
import zipfile
import tempfile
import shutil
from pathlib import Path
import os

import requests
import pandas as pd

from owid.catalog import Dataset, Table, frames
from owid.walden import Catalog
from etl.paths import DATA_DIR
from etl.steps.data.converters import convert_walden_metadata

## Fetch walden dataset

In [None]:
walden_ds = Catalog().find_one("faostat", "2021-03-18", "faostat_QCL")

In [None]:
walden_ds

## Make a dataset

In [None]:
ds = Dataset.create_empty(dest_dir)
ds.metadata = convert_walden_metadata(walden_ds)
ds.metadata.short_name = "faostat_qcl"
ds.save()

In [None]:
ds

## Unzip to temp directory

In [None]:
tmp_dir = tempfile.mkdtemp()

In [None]:
z = zipfile.ZipFile(walden_ds.local_path)
z.extractall(tmp_dir)
# print(os.listdir(tmp_dir))
(filename,) = list(filter(lambda x: "(Normalized)" in x, os.listdir(tmp_dir)))
filename

## Load dataset
### Data file

In [None]:
df = pd.read_csv(os.path.join(tmp_dir, filename), encoding="latin-1")

In [None]:
df.head()

## Clean dataset

### Sanity checks

In [None]:
# Check nulls
df.isnull().any()

In [None]:
# def check_unique_pairs(df, name_1, name_2):
#     if not (
#         (df.groupby(name_1)[name_2].nunique() != 1).any()
#         and (df.groupby(name_2)[name_1].nunique() != 1).any()
#     ):
#         raise ValueError(
#             f"Some `{name_1}` may have multiple `{name_2}` values (or opposite)."
#         )


# fields = ["Item", Element"]
# for field in fields:
#     check_unique_pairs(df, field, f"{field} Code")

In [None]:
x = df.groupby(["Element", "Unit"])["Element Code"].nunique()
if (x > 1).any():
    raise ValueError(f"Element-Unit not unique!")

### Drop columns and set Index

In [None]:
df.drop(columns=["Area", "Item", "Element", "Year Code"], inplace=True)

In [None]:
df.set_index(
    ["Area Code", "Item Code", "Element Code", "Year", "Flag"],
    inplace=True,
    verify_integrity=True,
)

In [None]:
df.head()

## Add tables

In [None]:
from owid.catalog import utils

t = Table(df)
t.metadata.short_name = "bulk"
ds.add(utils.underscore_table(t))

## Cleanup

In [None]:
shutil.rmtree(tmp_dir)