# FAO Land Use (2021)

## Parameters

In [1]:
dest_dir = "/tmp/faostat_rl"

## Imports

In [2]:
from owid import catalog
import json
from pathlib import Path

In [3]:
from etl.paths import BASE_DIR as base_path

## Load meadow data

In [4]:
meadow_path = base_path / "data/meadow/faostat/2021-06-17/faostat_rl"

fao_meadow = catalog.Dataset(meadow_path)

df = fao_meadow["land_use"]

In [5]:
df.metadata

TableMeta(short_name='land_use', title=None, description=None, dataset=DatasetMeta(namespace='faostat', short_name='faostat_rl', title='Land, Inputs and Sustainability: Land Use - FAO (2021)', description='The FAOSTAT Land Use domain contains data on forty-four categories of land use, irrigation and agricultural practices, relevant to monitor agriculture, forestry and fisheries activities at national, regional and global level. Data are available by country and year, with global coverage and annual updates.', sources=[Source(name='Food and Agriculture Organization of the United Nations', description=None, url='http://www.fao.org/faostat/en/#data', source_data_url='https://fenixservices.fao.org/faostat/static/bulkdownloads/Inputs_LandUse_E_All_Data_(Normalized).zip', owid_data_url='http://walden.nyc3.digitaloceanspaces.com/faostat/2021-06-17/Inputs_LandUse_E_All_Data_(Normalized).zip', date_accessed='2021-11-05', publication_date='2021-06-17', publication_year=2021)], licenses=[License(

In [6]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unit,Value,Flag
Area,Item,Element,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,Country area,Area,1961,1000 ha,65286.0,W
Afghanistan,Country area,Area,1962,1000 ha,65286.0,W
Afghanistan,Country area,Area,1963,1000 ha,65286.0,W
Afghanistan,Country area,Area,1964,1000 ha,65286.0,W
Afghanistan,Country area,Area,1965,1000 ha,65286.0,W


## Fix country names

In [7]:
filename = (
    base_path / "etl/steps/data/garden/faostat/2021-06-17/faostat_rl.countries.json"
)

with open(filename) as f:
    cm = json.load(f)

In [8]:
cm

{'Afghanistan': 'Afghanistan',
 'Africa': 'Africa',
 'Albania': 'Albania',
 'Algeria': 'Algeria',
 'American Samoa': 'American Samoa',
 'Andorra': 'Andorra',
 'Angola': 'Angola',
 'Anguilla': 'Anguilla',
 'Antigua and Barbuda': 'Antigua and Barbuda',
 'Argentina': 'Argentina',
 'Armenia': 'Armenia',
 'Aruba': 'Aruba',
 'Asia': 'Asia',
 'Australia': 'Australia',
 'Austria': 'Austria',
 'Azerbaijan': 'Azerbaijan',
 'Bahamas': 'Bahamas',
 'Bahrain': 'Bahrain',
 'Bangladesh': 'Bangladesh',
 'Barbados': 'Barbados',
 'Belarus': 'Belarus',
 'Belgium': 'Belgium',
 'Belize': 'Belize',
 'Benin': 'Benin',
 'Bermuda': 'Bermuda',
 'Bhutan': 'Bhutan',
 'Bosnia and Herzegovina': 'Bosnia and Herzegovina',
 'Botswana': 'Botswana',
 'Brazil': 'Brazil',
 'British Virgin Islands': 'British Virgin Islands',
 'Bulgaria': 'Bulgaria',
 'Burkina Faso': 'Burkina Faso',
 'Burundi': 'Burundi',
 'Cambodia': 'Cambodia',
 'Cameroon': 'Cameroon',
 'Canada': 'Canada',
 'Cayman Islands': 'Cayman Islands',
 'Central Afr

In [9]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unit,Value,Flag
Area,Item,Element,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,Country area,Area,1961,1000 ha,65286.0,W
Afghanistan,Country area,Area,1962,1000 ha,65286.0,W
Afghanistan,Country area,Area,1963,1000 ha,65286.0,W
Afghanistan,Country area,Area,1964,1000 ha,65286.0,W
Afghanistan,Country area,Area,1965,1000 ha,65286.0,W


In [10]:
# we need to move Area out of the index to change it
df.reset_index(inplace=True)

In [11]:
df.head()

Unnamed: 0,Area,Item,Element,Year,Unit,Value,Flag
0,Afghanistan,Country area,Area,1961,1000 ha,65286.0,W
1,Afghanistan,Country area,Area,1962,1000 ha,65286.0,W
2,Afghanistan,Country area,Area,1963,1000 ha,65286.0,W
3,Afghanistan,Country area,Area,1964,1000 ha,65286.0,W
4,Afghanistan,Country area,Area,1965,1000 ha,65286.0,W


In [12]:
# if we have something in the mapping, use that
# otherwise leave the name the same
area = df["Area"].apply(lambda a: cm.get(a, a))

In [13]:
# now we need to put the index back; luckily it's kept in the metadata
df.metadata.primary_key

['Area', 'Item', 'Element', 'Year']

In [14]:
df.set_index(df.metadata.primary_key, inplace=True)

## Make the garden dataset

In [15]:
fao_garden = catalog.Dataset.create_empty(dest_dir)

In [16]:
fao_garden.metadata = fao_meadow.metadata
fao_garden.save()

In [17]:
fao_garden.add(df)