In [1]:
import numpy as np
import xarray as xr
import pandas as pd

# Matching Poore & Nemecek data to FAO item base

The item list described in the Poore & Nemecek (2018) LCA data does not match the item based from the FAOSTAT food balance sheets. In this notebook we match the item bases under a series of assumptions, specific to item groups and food items in some cases.

This way, analysis requiring LCA values from the FAOSTAT food items can be done without going through the matching process every time.

The matching is done in two steps:

- 1: Matching values are read from the matrix file and assigned based on the a matrix multiplication.
- 2: Items with an "alternative" value defined in the matching matrix are overridden by the alternativa value

After this, the resulting dataset is saved.

The matchig matrix is stored in this repository and can be modified if a different set of assumptions is desired.

In [2]:
matching = pd.read_csv("PN18 and FAOSTAT list matching matrix.csv", sep=":")
PN18 = xr.open_dataset("data/PN18.nc")

In [3]:
def match(impact, matching_matrix):
    """Matches an impact dataset to a new item base using a matching matrix

    Parameters
    ----------
    impact: xarray.DataSet
        xarray dataset including a list of items and impacts
    matching_matrix: pandas dataframe
        Defines how items are matched from the input to the output datasets,
        with the values of the matrix indicating the scaling of the
        impact quantities. Column names indicate the original item list, while
        row names indicate the new item list

    Returns
    -------
    dataset_out : xarray.Dataset
        FAOSTAT formatted Food Supply dataset with scaled quantities.

    """

    out_items = matching_matrix["Item Code"]

    in_items = impact.Item.values

    # First column is the item code column
    in_items_mat = matching_matrix.columns[1:]

    assert np.equal(in_items, in_items_mat).all() , "Input items do not match assignment matrix"

    # Again, we avoid first column
    mat = matching_matrix.iloc[:, 1:].fillna(0).to_numpy()

    dataset_out = xr.Dataset(
        coords = dict(
            Item=("Item", out_items),
        )
    )

    for var in list(impact.keys()):
        data_out = np.matmul(mat, impact[var].values)
        dataset_out = dataset_out.assign({var:("Item", data_out)})

    return dataset_out

In [4]:
alternative = matching["Alternative"]
alt_mask = ~np.isnan(alternative)

matching.drop("Alternative", axis=1, inplace=True)
matching.fillna(0, inplace=True)

data = match(PN18, matching)


In [5]:
data

In [6]:

data["GHG Emissions (IPCC 2013)"].values[alt_mask] = alternative[alt_mask]
data

In [7]:
# Include additional item metadata
FAOSTAT = xr.open_dataset("../food/data/FAOSTAT.nc")
FAOSTAT.load()

# Create a dataarray with the same coordinates as FAOSTAT, containing only ones
ones = xr.ones_like(FAOSTAT['stock'].sel(Region=229, Year=2020))
ones = ones.drop_vars(['Year', 'Region', 'Region_name'])

# Multiply the dataarray with the ones dataarray to add the missing label coords
data = data * ones

data

In [8]:
data.to_netcdf("data/PN18_FAOSTAT.nc")