In [1]:
dest_dir = "/tmp/faostat_rl"

In [25]:
from owid.catalog import Dataset, Table
from owid import walden
from etl.steps.data.converters import convert_walden_metadata

In [4]:
ds = Dataset.create_empty(dest_dir)

In [6]:
cat = walden.Catalog()

In [17]:
walden_ds = cat.find_one(short_name="faostat_rl")
walden_ds

Dataset(namespace='faostat', short_name='faostat_rl', name='Land, Inputs and Sustainability: Land Use - FAO (2021)', description='The FAOSTAT Land Use domain contains data on forty-four categories of land use, irrigation and agricultural practices, relevant to monitor agriculture, forestry and fisheries activities at national, regional and global level. Data are available by country and year, with global coverage and annual updates.', source_name='Food and Agriculture Organization of the United Nations', url='http://www.fao.org/faostat/en/#data', date_accessed='2021-11-05', file_extension='zip', license_url='http://www.fao.org/contact-us/terms/db-terms-of-use/en', source_data_url='https://fenixservices.fao.org/faostat/static/bulkdownloads/Inputs_LandUse_E_All_Data_(Normalized).zip', md5='f674d66f36b564c17cc6f96203dceb0d', publication_year=2021, publication_date='2021-06-17', owid_data_url='http://walden.nyc3.digitaloceanspaces.com/faostat/2021-06-17/Inputs_LandUse_E_All_Data_(Normalize

In [None]:
ds.metadata = convert_walden_metadata(walden_ds)
ds.save()

In [26]:
walden_ds.local_path

'/Users/bmacwell/.owid/walden/faostat/2021-06-17/faostat_rl.zip'

In [29]:
import tempfile

In [31]:
dir = tempfile.mkdtemp()
dir

'/var/folders/7w/439b5c190j90py3_7k6xw8g00000gn/T/tmp80wo1_p_'

In [32]:
import zipfile

In [36]:
zf = zipfile.ZipFile(walden_ds.local_path)
zf.extractall(dir)
!ls {dir}

Inputs_LandUse_E_All_Data_(Normalized).csv
Inputs_LandUse_E_Flags.csv


In [39]:
import pandas as pd
import os

In [41]:
df = pd.read_csv(
    os.path.join(dir, "Inputs_LandUse_E_All_Data_(Normalized).csv"), encoding="latin1"
)

In [42]:
df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,Afghanistan,6600,Country area,5110,Area,1961,1961,1000 ha,65286.0,W
1,2,Afghanistan,6600,Country area,5110,Area,1962,1962,1000 ha,65286.0,W
2,2,Afghanistan,6600,Country area,5110,Area,1963,1963,1000 ha,65286.0,W
3,2,Afghanistan,6600,Country area,5110,Area,1964,1964,1000 ha,65286.0,W
4,2,Afghanistan,6600,Country area,5110,Area,1965,1965,1000 ha,65286.0,W


In [43]:
df.set_index(["Area", "Item", "Element", "Year"], inplace=True, verify_integrity=True)

In [44]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Area Code,Item Code,Element Code,Year Code,Unit,Value,Flag
Area,Item,Element,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,Country area,Area,1961,2,6600,5110,1961,1000 ha,65286.0,W
Afghanistan,Country area,Area,1962,2,6600,5110,1962,1000 ha,65286.0,W
Afghanistan,Country area,Area,1963,2,6600,5110,1963,1000 ha,65286.0,W
Afghanistan,Country area,Area,1964,2,6600,5110,1964,1000 ha,65286.0,W
Afghanistan,Country area,Area,1965,2,6600,5110,1965,1000 ha,65286.0,W


In [45]:
df.drop(columns=["Area Code", "Item Code", "Element Code", "Year Code"], inplace=True)

In [46]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unit,Value,Flag
Area,Item,Element,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,Country area,Area,1961,1000 ha,65286.0,W
Afghanistan,Country area,Area,1962,1000 ha,65286.0,W
Afghanistan,Country area,Area,1963,1000 ha,65286.0,W
Afghanistan,Country area,Area,1964,1000 ha,65286.0,W
Afghanistan,Country area,Area,1965,1000 ha,65286.0,W


In [47]:
t = Table(df)

In [48]:
t.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unit,Value,Flag
Area,Item,Element,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,Country area,Area,1961,1000 ha,65286.0,W
Afghanistan,Country area,Area,1962,1000 ha,65286.0,W
Afghanistan,Country area,Area,1963,1000 ha,65286.0,W
Afghanistan,Country area,Area,1964,1000 ha,65286.0,W
Afghanistan,Country area,Area,1965,1000 ha,65286.0,W


In [52]:
t.metadata

TableMeta(short_name=None, title=None, description=None, dataset=None, primary_key=[])

In [61]:
t.metadata.short_name = "land_use"

In [62]:
ds.add(t)