## Parameters

In [1]:
dest_dir = "/tmp/faostat_rl"

## Imports

In [2]:
from owid import catalog
import json
import re
import wikipedia
from pathlib import Path

In [3]:
from etl.paths import BASE_DIR as base_path

## Load meadow data

In [4]:
meadow_path = base_path / "data/meadow/living_planet/2020-09-10/lpd"

lpd_meadow = catalog.Dataset(meadow_path)

#### Name of the feather file in the above directory

In [7]:
df = lpd_meadow["living_planet_database"]

In [8]:
lpd_meadow

Dataset(path='/Users/mojmir/projects/etl/data/meadow/living_planet/2020-09-10/lpd', metadata=DatasetMeta(namespace='living_planet', short_name='lpd', title='Living Planet Database - (2020-09)', description='The Living Planet Database contains tens of thousands of vertebrate population time-series from around the world. It is the largest collection of its kind, and is publicly available, making it an invaluable tool for both research and conservation. This dataset contains time-series of population abundance data for vertebrate species spanning years between 1970 and 2016. These data were used in the Living Planet Report 2020. Confidential records that cannot be shared have been removed from this data set.', sources=[Source(name='The Zoological Society of London', description=None, url='http://stats.livingplanetindex.org/', source_data_url=None, owid_data_url='http://nyc3.digitaloceanspaces.com/walden/living_planet/2020-09-10/lpd.zip', date_accessed='2021-11-22', publication_date='2020-

Run the country harmonizer tool 

'.venv/bin/harmonize <path/to/input.feather> <country-field> <path/to/output.mapping.json>'
   

In [9]:
filename = (
    base_path
    / "etl/steps/data/garden/living_planet/2020-09-10/living_planet.mapping.json"
)

with open(filename) as f:
    cm = json.load(f)

In [10]:
df.head()

Unnamed: 0,id,binomial,citation,class,order,family,genus,species,subspecies,common_name,location,country,latitude,longitude,specific_location,units,year,value
0,4,Copsychus_sechellarum,"Komdeur, J. (1996). Breeding of the Seychelles...",Aves,Passeriformes,Muscicapidae,Copsychus,sechellarum,,Seychelles magpie-robin,"Fregate Island, Seychelles",Seychelles,-4.58333,55.933331,Yes,Number of individuals,1950,
1,5,Falco_punctatus,"Groombridge, J. J., Bruford, M.W., Jones, C.G,...",Aves,Falconiformes,Falconidae,Falco,punctatus,,Mauritius kestrel,"Western Population, Mauritius",Mauritius,-20.299999,57.583328,No,Number of breeding pairs,1950,
2,6,Pternistis_ochropectus,"WPA/Birdlife/ SSC Partridge, Q. a. F. S. G. (2...",Aves,Galliformes,Phasianidae,Pternistis,ochropectus,,Djibouti francolin,"Foret du Day, Djibouti",Djibouti,11.76667,42.658058,Yes,Number of individuals,1950,
3,7,Gyps_coprotheres,"WWF-SA (2000). Cape griffon.""""",Aves,Accipitriformes,Accipitridae,Gyps,coprotheres,,Cape vulture,Botswana,Botswana,-22.0,24.0,No,Individuals,1950,
4,8,Gyps_coprotheres,"WWF-SA (2000). Cape griffon.""""",Aves,Accipitriformes,Accipitridae,Gyps,coprotheres,,Cape vulture,Lesotho,Lesotho,-29.5,28.25,No,Number of individuals,1950,


In [12]:
df["country"] = df["country"].apply(lambda a: cm.get(a, a))

In [13]:
df = df.drop(
    ["id", "class", "citation", "order", "family", "genus", "species", "subspecies"],
    axis=1,
)

## Wikipedia name

In [15]:
binom = df["binomial"].drop_duplicates().str.replace(r"_", " ")
binom

0            Copsychus sechellarum
1                  Falco punctatus
2           Pternistis ochropectus
3                 Gyps coprotheres
17                  Gyps africanus
                   ...            
15336             Smilisca phaeota
15337    Strabomantis bufoniformis
15338           Teratohyla spinosa
15339                Cottus specus
15341            Anguilla japonica
Name: binomial, Length: 4182, dtype: object

In [16]:
df.set_index(
    [
        "binomial",
        "common_name",
        "location",
        "country",
        "latitude",
        "longitude",
        "specific_location",
        "units",
    ],
    inplace=True,
)

## Make the garden dataset

In [17]:
lpd_garden = catalog.Dataset.create_empty(dest_dir)

In [18]:
lpd_garden.metadata = lpd_meadow.metadata
lpd_garden.save()

In [19]:
lpd_garden.add(df)