# Create the regionalized version of EF 3.1 in brightway

This notebook should be run twice: 
- a first time to create the updated spatialized biosphere database with `name_spatialized_biosphere_db = 'biosphere3_spatialized_flows'`, `write_method_bw = False` and `write_new_spatialized_biosphere = True`. This will create the spatialized biosphere database in the brightway project, as well as the associated pickle file.
- and a second time to create the regionalized methods with `name_spatialized_biosphere_db = 'biosphere3_spatialized_flows_EF'`, `write_method_bw = True` and `write_new_spatialized_biosphere = False`. This will create the EF 3.1 regionalized method in the brightway project, as well as the associated bw2package file. This second iteration requires the adjusted spatialized biosphere database to be present in the brightway project.

In [1]:
import bw2data as bd
import bw2io as bi
import numpy as np
import pandas as pd
from mescal import *
import os
import collections

## User inputs

In [2]:
bd.projects.set_current('ecoinvent3.9.1A')

In [3]:
name_spatialized_biosphere_db = 'biosphere3_spatialized_flows_EF'

In [4]:
write_method_bw = True
write_new_spatialized_biosphere = False

## Set up

In [5]:
cwd = os.getcwd()

In [6]:
ef_cfs = pd.read_excel('data/EF-LCIAMethod_CF(EF-v3.1).xlsx', sheet_name='lciamethods_CF', keep_default_na=False, na_values='')
ef_biosphere_mapping = pd.read_csv('data/EF31_biosphere_mapping.csv')

In [7]:
ef_biosphere_mapping_dict = ef_biosphere_mapping.groupby('ef')['biosphere'].apply(list).to_dict()

In [8]:
# Load the biosphere and spatialized biosphere databases 
biosphere_db_spatialized = Database(name_spatialized_biosphere_db)
biosphere_db = Database('biosphere3')

Getting activity data


100%|██████████| 30207/30207 [00:00<00:00, 128058.35it/s]


Adding exchange data to activities


0it [00:00, ?it/s]


Filling out exchange data


100%|██████████| 30207/30207 [00:00<?, ?it/s]


Loaded biosphere3_spatialized_flows_EF from brightway!
Getting activity data


100%|██████████| 4718/4718 [00:00<00:00, 302150.25it/s]


Adding exchange data to activities


0it [00:00, ?it/s]


Filling out exchange data


100%|██████████| 4718/4718 [00:00<?, ?it/s]

Loaded biosphere3 from brightway!





In [9]:
biosphere_db_dict_name = biosphere_db.list_to_dict('name', 'biosphere')
biosphere_db_spatialized_dict_name = biosphere_db_spatialized.list_to_dict('name', 'biosphere')

In [10]:
biosphere_db_dict_code = biosphere_db.list_to_dict('code', 'biosphere')
biosphere_db_spatialized_dict_code = biosphere_db_spatialized.list_to_dict('code', 'biosphere')

## Correct typing mistake in the EF excel file

In [11]:
ef_cfs[(ef_cfs.LCIAMethod_name == 'Water use') & (ef_cfs.LCIAMethod_location.isna())]

Unnamed: 0,FLOW_uuid,FLOW_name,LCIAMethod_uuid EF3.1,LCIAMethod_name,CF EF3.1,LCIAMethod_location,FLOW_class0,FLOW_class1,FLOW_class2,LCIAMethod_derivation,LCIAMethod_direction
156733,5fdac403-9f2c-4a10-b8d6-5367cc9d2d9b,freshwater,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input
156930,4f462198-40cd-4184-8733-86648a20dc3f,ground water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input
157213,c506b970-7b92-452f-8d6f-05d4f203d958,lake water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input
157379,805a7346-1664-4483-afe3-4b224be5e361,river water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input
157608,5e50fc01-19c6-4377-a1cc-bc65a12498ea,Water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,-42.95,,Emissions,Emissions to water,Emissions to fresh water,Unknown derivation,Output
157838,a3876d9b-a3e8-4680-861a-4e08642f1392,Water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,-42.955,,Emissions,Emissions to water,"Emissions to water, unspecified",Unknown derivation,Output
158149,419682fe-60fb-4b43-be89-bf2824b51104,water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input
158332,e2f7117d-06a4-49d8-9f5c-ae1a1f18ffb2,Water from cooling,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,-42.95,,Emissions,Emissions to water,Emissions to fresh water,Unknown derivation,Output
158447,73fb4dab-b979-4dcc-8616-2193fc154255,Water from turbine,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,-42.95,,Emissions,Emissions to water,Emissions to fresh water,Unknown derivation,Output
158617,21868f36-62ab-4e8e-98ed-7106228c17be,Water to Cooling,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,42.95,,Resources,Resources from water,Renewable material resources from water,Unknown derivation,Input


Flow a3876d9b-a3e8-4680-861a-4e08642f1392 should have a CF of -42.95 for water use (global average), but it has a CF of -42.955.

In [12]:
index_water_flow_to_change = ef_cfs[(ef_cfs.FLOW_uuid == 'a3876d9b-a3e8-4680-861a-4e08642f1392') & (ef_cfs.LCIAMethod_location.isna())].index[0]

In [13]:
# Change the global CF value of the a3876d9b-a3e8-4680-861a-4e08642f1392 water flow
ef_cfs.at[index_water_flow_to_change, 'CF EF3.1'] = -42.95

In [14]:
ef_cfs[(ef_cfs.FLOW_uuid == 'a3876d9b-a3e8-4680-861a-4e08642f1392') & (ef_cfs.LCIAMethod_location.isna())]

Unnamed: 0,FLOW_uuid,FLOW_name,LCIAMethod_uuid EF3.1,LCIAMethod_name,CF EF3.1,LCIAMethod_location,FLOW_class0,FLOW_class1,FLOW_class2,LCIAMethod_derivation,LCIAMethod_direction
157838,a3876d9b-a3e8-4680-861a-4e08642f1392,Water,b2ad66ce-c78d-11e6-9d9d-cec0c932ce01,Water use,-42.95,,Emissions,Emissions to water,"Emissions to water, unspecified",Unknown derivation,Output


## Adapt the EF excel file to be compatible with ecoinvent

In [15]:
def ecoinvent_compatible_categories(name, cat0, cat1, cat2):
    
    if name.lower() in ef_biosphere_mapping_dict.keys():
        name = ef_biosphere_mapping_dict[name.lower()][0]
    
    if cat0 == 'Emissions':
        cat1 = cat1.replace('Emissions to ', '')
        cat2 = cat2.replace('Emissions to ', '')
        cat2 = cat2.replace('and', '+')
        
        if cat2 == 'water, unspecified':
            cat2 = None
        elif cat2 == 'water, unspecified (long-term)':
            cat2 = 'ground-, long-term'
        elif cat2 == 'sea water':
            cat2 = 'ocean'
        elif cat2 == 'fresh water':
            cat2 = 'surface water'
        
        if cat2 == 'air, unspecified':
            cat2 = None
        elif cat2 == 'air, unspecified (long-term)':
            cat2 = 'low population density, long-term'
        elif cat2 == 'non-agricultural soil':
            cat2 = 'industrial'
            
        if '(biogenic)' in name:
            name = name.replace(' (biogenic)', ', non-fossil')
        elif '(fossil)' in name:
            name = name.replace(' (fossil)', ', fossil')
        elif '(land use change)' in name:
            name = name.replace(' (land use change)', ', to soil or biomass stock')
        
        if 'Particles' in name:
            name.replace('Particles', 'Particulate Matter, ')
            
            if '(PM10)' in name:
                name = name.replace('(PM10)', '> 2.5 um and < 10um')
            elif '(PM0.2 - PM2.5)' in name:
                name = name.replace('(PM0.2 - PM2.5)', '< 2.5 um')
    
    elif cat0 == 'Land use':
        
        if ('from ' in name) & (name.replace('from ', '') in ef_biosphere_mapping_dict.keys()):
            name = f"from {ef_biosphere_mapping_dict[name.replace('from ', '')][0]}"
        
        elif ('to ' in name) & (name.replace('to ', '') in ef_biosphere_mapping_dict.keys()):
            name = f"to {ef_biosphere_mapping_dict[name.replace('to ', '')][0]}"
        
        cat1 = cat1.replace('Land ', '')
        name = f'{cat1}, {name}'
        cat1 = 'natural resource'
        cat2 = 'land'
    
    elif cat0 == 'Resources':
        cat2 = f"in {cat1.replace('Resources from ', '')}"
        cat1 = 'natural resource'
    
    else:
        raise ValueError(f"Category0 {cat0} not recognized")
    
    return name, cat0, cat1, cat2

In [16]:
def ecoinvent_compatible_water_flows(name, cat0, cat1, cat2, lcia_cat):
    
    name = name.lower()
    
    if lcia_cat.lower() == 'water use':
        
        if cat0 == 'Resources':
            if name == 'ground water':
                name = 'water, well, in ground'
            elif name == 'lake water':
                name = 'water, lake'
            elif name == 'river water':
                name = 'water, river'
            elif name == 'freshwater':
                name = 'water, unspecified natural origin'
            elif name == 'water to cooling':
                name = 'water, cooling, unspecified natural origin'
            elif name == 'water to turbine':
                name = 'water, turbine use, unspecified natural origin'
            
            cat1 = 'natural resource'
            cat2 = None
        
        elif cat0 == 'Emissions':
            if (name == 'water') & (cat2 == 'water, unspecified'):
                name = 'water'
                cat1 = 'water'
                cat2 = None
        
    return name, cat1, cat2

In [17]:
# Change the name and categories columns of the EF file to be ecoinvent-compliant 
ef_cfs[['FLOW_name', 'FLOW_class0', 'FLOW_class1', 'FLOW_class2']] = ef_cfs.apply(lambda x: ecoinvent_compatible_categories(x['FLOW_name'], x['FLOW_class0'], x['FLOW_class1'], x['FLOW_class2']), axis=1, result_type='expand')

In [18]:
ef_cfs[['FLOW_name', 'FLOW_class1', 'FLOW_class2']] = ef_cfs.apply(lambda x: ecoinvent_compatible_water_flows(x['FLOW_name'], x['FLOW_class0'], x['FLOW_class1'], x['FLOW_class2'], x['LCIAMethod_name']), axis=1, result_type='expand')

In [19]:
# Add some missing land biosphere flows, as they come from the same EF flow
ef_cfs.drop_duplicates(inplace=True)
for key, value in ef_biosphere_mapping_dict.items():
    if len(value) > 1:
        df_to_add = ef_cfs[
            (ef_cfs.FLOW_name.str.lower() == f'occupation, {value[0]}')
            | (ef_cfs.FLOW_name.str.lower() == f'transformation, from {value[0]}')
            | (ef_cfs.FLOW_name.str.lower() == f'transformation, to {value[0]}')
        ].copy(deep=True)
        for i in range(1, len(value)):
            df_to_add['FLOW_name'] = df_to_add['FLOW_name'].apply(lambda x: x.replace(value[i-1], value[i]))
            ef_cfs = pd.concat([ef_cfs, df_to_add])

ef_cfs.reset_index(drop=True, inplace=True)

In [20]:
# Add missing nitrate flows
df_to_add = ef_cfs[
    (ef_cfs.FLOW_name.str.lower() == 'nitrate')
    & (ef_cfs.FLOW_class1 == 'water')
    & (ef_cfs.FLOW_class2.isna())
].copy(deep=True)
df_to_add['FLOW_class2'] = 'ground-'
ef_cfs = pd.concat([ef_cfs, df_to_add])

In [21]:
# Add unspecified water flow 
df_to_add = ef_cfs[
    (ef_cfs.FLOW_name.str.lower() == 'water, unspecified natural origin')
    & (ef_cfs.FLOW_class1 == 'natural resource')
    & (ef_cfs.FLOW_class2.isna())
].copy(deep=True)
df_to_add['FLOW_name'] = 'water'
ef_cfs = pd.concat([ef_cfs, df_to_add])

# If needed, add missing water flows (copies from water flow without second category)
missing_subcategories_resources = ['fossil well', 'in ground', 'in water']
water_resource_flows = ['water', 'water, lake', 'water, river', 'water, unspecified natural origin', 'water, cooling, unspecified natural origin', 'water, turbine use, unspecified natural origin', 'water, well, in ground']
for water_flow in water_resource_flows:
    df_to_add = ef_cfs[
        (ef_cfs.FLOW_name.str.lower() == water_flow)
        & (ef_cfs.FLOW_class1 == 'natural resource')
        & (ef_cfs.FLOW_class2.isna())
    ].copy(deep=True)
    for subcat in missing_subcategories_resources:
        df_to_add['FLOW_class2'] = subcat
        ef_cfs = pd.concat([ef_cfs, df_to_add])

missing_subcategories_emissions = ['fossil well', 'ground-', 'ground-, long-term']
df_to_add = ef_cfs[
    (ef_cfs.FLOW_name.str.lower() == 'water')
    & (ef_cfs.FLOW_class1 == 'water')
    & (ef_cfs.FLOW_class2.isna())
].copy(deep=True)
for subcat in missing_subcategories_emissions:
    df_to_add['FLOW_class2'] = subcat
    ef_cfs = pd.concat([ef_cfs, df_to_add])

ef_cfs.reset_index(drop=True, inplace=True)

In [22]:
# Add a GLO row in addition to the normal one (with no location) 
df_to_add = ef_cfs[ef_cfs.LCIAMethod_location.isna()].copy(deep=True)
df_to_add['LCIAMethod_location'] = 'GLO'
ef_cfs = pd.concat([ef_cfs, df_to_add])

In [23]:
# Add missing land flows, their CF will be the one of their equivalent unspecified flow. 
land_flows_ef = list(ef_cfs[ef_cfs.LCIAMethod_name == 'Land use'].FLOW_name.unique()) # list of land flows in EF
land_flows_ei = list(set([i['name'].lower() for i in biosphere_db.db_as_list if 'Transformation' in i['name'] or 'Occupation' in i['name']]))  # list of land flows in ecoinvent
land_flows_ei_not_in_ef = list(set(land_flows_ei) - set(land_flows_ef))  # list of land flows in ecoinvent but not in EF
land_flows_ei_not_in_ef = [i for i in land_flows_ei_not_in_ef if '(non-use)' not in i]  # remove the non-use flows

for land_flow in land_flows_ei_not_in_ef:
    if 'transformation, to ' in land_flow:
        df_to_add = ef_cfs[
            (ef_cfs.FLOW_name.str.lower() == 'transformation, to unspecified')
            & (ef_cfs.FLOW_class1 == 'natural resource')
            & (ef_cfs.FLOW_class2 == 'land')
        ].copy(deep=True)
    
    elif 'transformation, from ' in land_flow:
        df_to_add = ef_cfs[
            (ef_cfs.FLOW_name.str.lower() == 'transformation, from unspecified')
            & (ef_cfs.FLOW_class1 == 'natural resource')
            & (ef_cfs.FLOW_class2 == 'land')
        ].copy(deep=True)
    
    elif 'occupation, ' in land_flow:
        df_to_add = ef_cfs[
            (ef_cfs.FLOW_name.str.lower() == 'occupation, unspecified')
            & (ef_cfs.FLOW_class1 == 'natural resource')
            & (ef_cfs.FLOW_class2 == 'land')
        ].copy(deep=True)
    
    else:
        raise ValueError(f'Land flow {land_flow} not recognized')
    
    df_to_add['FLOW_name'] = land_flow
    ef_cfs = pd.concat([ef_cfs, df_to_add])

## Create the new regionalized methods

In [24]:
def create_cf_list(df):
    
    reg_cfs = []
    unmapped = []
    
    for i in range(len(df)):
        
        flow_name = df.loc[i, 'FLOW_name'].lower()
        flow_location = df.loc[i, 'LCIAMethod_location']
        flow_cf = df.loc[i, 'CF EF3.1']
        
        if flow_location is np.nan:
            spatialized_db = False
        else:
            flow_name = f'{flow_name}, {flow_location}'
            spatialized_db = True
        
        category1 = df.loc[i, 'FLOW_class1']
        category2 = df.loc[i, 'FLOW_class2']
        
        flow_name = flow_name[0].upper() + flow_name[1:]
        
        if category2 is not None:
            
            if spatialized_db:
                if (flow_name, (category1, category2), name_spatialized_biosphere_db) in biosphere_db_spatialized_dict_name.keys():
                    flow_code = biosphere_db_spatialized_dict_name[(flow_name, (category1, category2), name_spatialized_biosphere_db)]['code']
                    reg_cfs.append(((name_spatialized_biosphere_db, flow_code), flow_cf))
                else:
                    unmapped.append((name_spatialized_biosphere_db, flow_name, (category1, category2)))
            
            else:
                if (flow_name, (category1, category2), 'biosphere3') in biosphere_db_dict_name.keys():
                    flow_code = biosphere_db_dict_name[(flow_name, (category1, category2), 'biosphere3')]['code']
                    reg_cfs.append((('biosphere3', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3', flow_name, (category1, category2)))
        
        else:
            
            if spatialized_db:
                if (flow_name, (category1,), name_spatialized_biosphere_db) in biosphere_db_spatialized_dict_name.keys():
                    flow_code = biosphere_db_spatialized_dict_name[(flow_name, (category1,), name_spatialized_biosphere_db)]['code']
                    reg_cfs.append(((name_spatialized_biosphere_db, flow_code), flow_cf))
                else:
                    unmapped.append((name_spatialized_biosphere_db, flow_name, (category1,)))
            
            else:
                if (flow_name, (category1,), 'biosphere3') in biosphere_db_dict_name.keys():
                    flow_code = biosphere_db_dict_name[(flow_name, (category1,), 'biosphere3')]['code']
                    reg_cfs.append((('biosphere3', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3', flow_name, (category1,)))
    
    return reg_cfs, unmapped

In [25]:
method_names_bw_to_ef = {
    'acidification': 'Acidification',
    'climate change': 'Climate change',
    'climate change: biogenic': 'Climate change-Biogenic',
    'climate change: fossil': 'Climate change-Fossil',
    'climate change: land use and land use change': 'Climate change-Land use and land use change',
    'particulate matter formation': 'EF-particulate Matter',
    'ecotoxicity: freshwater': 'Ecotoxicity, freshwater',
    'ecotoxicity: freshwater, inorganics': 'Ecotoxicity, freshwater_inorganics',
    'ecotoxicity: freshwater, organics': 'Ecotoxicity, freshwater_organics',
    'eutrophication: marine': 'Eutrophication marine',
    'eutrophication: freshwater': 'Eutrophication, freshwater',
    'eutrophication: terrestrial': 'Eutrophication, terrestrial',
    'human toxicity: carcinogenic': 'Human toxicity, cancer',
    'human toxicity: carcinogenic, inorganics': 'Human toxicity, cancer_inorganics',
    'human toxicity: carcinogenic, organics': 'Human toxicity, cancer_organics',
    'human toxicity: non-carcinogenic': 'Human toxicity, non-cancer',
    'human toxicity: non-carcinogenic, inorganics': 'Human toxicity, non-cancer_inorganics',
    'human toxicity: non-carcinogenic, organics': 'Human toxicity, non-cancer_organics',
    'ionising radiation: human health': 'Ionising radiation, human health',
    'land use': 'Land use',
    'ozone depletion': 'Ozone depletion',
    'photochemical oxidant formation: human health': 'Photochemical ozone formation - human health',
    'energy resources: non-renewable': 'Resource use, fossils',
    'material resources: metals/minerals': 'Resource use, minerals and metals',
    'water scarcity': 'Water use'
}

In [26]:
# Create the new set of regionalized impact categories 
ef_methods_bw = [i for i in bd.methods if i[0] == 'EF v3.1']

# Replace the water use method of EF by the one of IW+. Both are based on AWARE, but the one of IW+ is more complete. 
ef_methods_bw.remove(('EF v3.1', 'water use', 'user deprivation potential (deprivation-weighted water consumption)'))
ef_methods_bw.append(('IMPACT World+ Midpoint 2.0.1', 'Midpoint', 'Water scarcity'))

regionalized_methods = {}
unlinked = []

list_spatialized_flows = list(ef_cfs[
    (~ef_cfs.LCIAMethod_location.isna()) 
    & (ef_cfs.LCIAMethod_location != 'GLO') 
    & (~ef_cfs.LCIAMethod_name.isin(['Water use', 'Land use'])) # except water and land flows 
].FLOW_name.unique()) + ['nitric oxide', 'nitrate']

# Acidification, terrestrial eutrophication, land use, and water use are the four categories to be fully regionalized (i.e., fo every elementary flow). For the other categories, only a subset of flows is regionalized (the ones in common with the regionalized methods).
methods_to_regionalize = [
    'acidification',
    'eutrophication: terrestrial',
    'land use',
    'water scarcity',
] # categories for which regional CF are added, other categories are kept as they are in brightway

methods_not_regionalized_but_adjusted = [
    'eutrophication: marine', # ammonia, nitrogen dioxide, and nitrogen oxides flows only (with global value)
    'ecotoxicity: freshwater', # ammonia flows only (with global value)
    'ecotoxicity: freshwater, inorganics', # ammonia flows only (with global value)
    'human toxicity: non-carcinogenic', # ammonia flows only (with global value)
    'human toxicity: non-carcinogenic, inorganics', # ammonia flows only (with global value)
    'particulate matter formation', # ammonia, nitrogen 
    'photochemical oxidant formation: human health', # nitrogen dioxide, nitrogen oxides, sulfur dioxide, and sulfur oxides flows only (with global value)
] # categories for which fake regional CF are added (all equal the global value)

for i in range(len(ef_methods_bw)):
    method = bd.Method(ef_methods_bw[i])
    if ef_methods_bw[i][0] == 'EF v3.1':
        method_name_bw = ef_methods_bw[i][1]
    else:
        method_name_bw = ef_methods_bw[i][2] # for the IW+ water scarcity method
    method_name = method_names_bw_to_ef[method_name_bw.lower()]
    cfs = method.load()
    regionalized_cfs = []
    
    # Add missing land occupation and transformation elementary flows to the method
    if method_name_bw.lower() == 'land use':
        for land_flow in land_flows_ei_not_in_ef:
            land_flow = land_flow[0].upper() + land_flow[1:]
            cfs.append((('biosphere3', biosphere_db_dict_name[(land_flow, ('natural resource', 'land'), 'biosphere3')]['code']), 0))
    
    for j in cfs:
        flow = biosphere_db_dict_code[(j[0][0], j[0][1])]
        flow_name = flow['name'].lower()
        
        if method_name_bw.lower() not in methods_to_regionalize+methods_not_regionalized_but_adjusted:
            regionalized_cfs.append(j) # non-regionalized flows are kept as they are
            # if some of the regionalized flows appear in the other categories, we must add the new GLO elementary flow to that method  
            if flow_name in list_spatialized_flows:
                flow_name = flow_name[0].upper() + flow_name[1:]
                flow_categories = flow['categories']
                global_flow_code = biosphere_db_spatialized_dict_name[(f'{flow_name}, GLO', flow_categories, name_spatialized_biosphere_db)]['code']
                regionalized_cfs.append(((name_spatialized_biosphere_db, global_flow_code), j[1]))
                # print(method_name_bw, flow_name, flow_categories)
        else:
            if method_name_bw.lower() in methods_not_regionalized_but_adjusted:
                # for the "fake" regionalized methods, only some flows are regionalized
                if flow_name not in list_spatialized_flows:
                    regionalized_cfs.append(j)
                    continue
            
            # for the fully regionalized methods, all flows are regionalized
            flow_category_1 = flow['categories'][0]
            
            if len(flow['categories']) == 1:
                df_regionalized_flows = ef_cfs[
                    (ef_cfs['FLOW_name'].str.lower() == flow_name) 
                    & (ef_cfs['FLOW_class1'] == flow_category_1)
                    & (ef_cfs['FLOW_class2'].isna())
                    & (ef_cfs['LCIAMethod_name'] == method_name)
                ].reset_index()
                
                if len(df_regionalized_flows) == 0:
                    unlinked.append(('biosphere3', flow_name, (flow_category_1,)))
                
            else:
                flow_category_2 = flow['categories'][1]
            
                df_regionalized_flows = ef_cfs[
                    (ef_cfs['FLOW_name'].str.lower() == flow_name) 
                    & (ef_cfs['FLOW_class1'] == flow_category_1)
                    & (ef_cfs['FLOW_class2'] == flow_category_2)
                    & (ef_cfs['LCIAMethod_name'] == method_name)
                ].reset_index()
                
                if len(df_regionalized_flows) == 0:
                    unlinked.append(('biosphere3', flow_name, (flow_category_1, flow_category_2)))
            
            new_regionalized_cfs, new_unlinked = create_cf_list(df_regionalized_flows)
            regionalized_cfs += new_regionalized_cfs
            unlinked += new_unlinked
    
    regionalized_methods[method_name_bw] = regionalized_cfs

In [27]:
# Check if some global flows are still unlinked.
list(set([i[1:] for i in unlinked if 'biosphere3' == i[0]]))

[('water', ('water', 'ocean'))]

In [28]:
# Check if there are duplicates in the regionalized methods
for key in regionalized_methods.keys():
    duplicates = [item for item, count in collections.Counter(regionalized_methods[key]).items() if count > 1]
    if len(duplicates) > 0:
        raise ValueError(f'Duplicates in {key} method: {duplicates}')

The water (water, ocean) flow is missing, but this is not a problem as its CF is 0. 

In [29]:
if write_method_bw:
    for method in ef_methods_bw:
        if method[2] == 'Water scarcity':
            ef_regionalized_method = bd.Method(('EF v3.1 regionalized', 'water use', 'user deprivation potential (deprivation-weighted water consumption)'))
            unit = bd.Method(('EF v3.1', 'water use', 'user deprivation potential (deprivation-weighted water consumption)')).metadata['unit']
            method_name = method[2]
        else:
            ef_regionalized_method = bd.Method(('EF v3.1 regionalized', method[1], method[2]))
            unit = bd.Method(method).metadata['unit']
            method_name = method[1]
        ef_regionalized_method_metadata = {'unit': unit}
        ef_regionalized_method.register(**ef_regionalized_method_metadata)
        ef_regionalized_method.write(regionalized_methods[method_name])

In [30]:
list_ef_regionalized_methods = [bd.Method(('EF v3.1 regionalized', method[1], method[2])) for method in ef_methods_bw if method[2] != 'Water scarcity']
list_ef_regionalized_methods.append(bd.Method(('EF v3.1 regionalized', 'water use', 'user deprivation potential (deprivation-weighted water consumption)')))

In [31]:
if write_method_bw:
    bi.package.BW2Package.export_objs(list_ef_regionalized_methods, filename='EF3.1_regionalized', folder=cwd+'\\results\\')

## Spatialized flows from regioinvent that are not used by EF regionalized methods

There might be some spatialized elementary flows that are in the spatialized biosphere database from regioinvent that are not used in any EF regionalized method. Those flows should be removed. Otherwise, the resulting spatialized flows in the ecoinvent database will not be characterized, thus possibly leading to an underestimation of impacts, as the corresponding global (non-spatialized) flow could be characterized in EF. 

In [32]:
all_regionalized_flows_in_ef = list(set([i[0][1] for i in [x for xs in regionalized_methods.values() for x in xs] if i[0][0] == name_spatialized_biosphere_db]))

In [33]:
all_regionalized_flows_in_regioinvent = [i['code'] for i in biosphere_db_spatialized.db_as_list]

In [34]:
used_spatialized_flows = list(set(all_regionalized_flows_in_regioinvent).intersection(all_regionalized_flows_in_ef))

In [35]:
new_spatialized_biosphere_db = Database(db_as_list=[i for i in biosphere_db_spatialized.db_as_list if i['code'] in used_spatialized_flows])

## Missing locations and flows 

In [36]:
ei_database = Database('ecoinvent-3.9.1-cutoff')

Loaded ecoinvent-3.9.1-cutoff from pickle!


In [37]:
ei_locations = list(set([i['location'] for i in ei_database.db_as_list]))

In [38]:
missing_biosphere_flows = sorted(list(set([i[1].rsplit(', ', 1)[0] for i in unlinked])))

In [39]:
missing_flows_dict = {}
for flow in missing_biosphere_flows:
    missing_loc = list(set([i[1].rsplit(', ', 1)[-1] for i in unlinked if (i[1].rsplit(', ', 1)[0] == flow) & (i[1].rsplit(', ', 1)[-1] in ei_locations)]))
    if len(missing_loc) > 0:
        print(f'##### {flow} #####')
        print(f'{sorted(missing_loc)}\n')
        missing_flows_dict[flow] = missing_loc

##### Occupation, annual crop, flooded crop #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS', 'JO', 'KW', 'LB', 'LY', 'MT', 'OM', 'QA', 'SA', 'SY', 'TJ', 'TM', 'UZ', 'YE']

##### Occupation, annual crop, greenhouse #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS', 'JO', 'KW', 'LB', 'LY', 'MT', 'OM', 'QA', 'SA', 'SY', 'TJ', 'TM', 'UZ', 'YE']

##### Occupation, annual crop, irrigated #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS', 'JO', 'KW', 'LB', 'LY', 'MT', 'OM', 'QA', 'SA', 'SY', 'TJ', 'TM', 'UZ', 'YE']

##### Occupation, annual crop, irrigated, extensive #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS', 'JO', 'KW', 'LB', 'LY', 'MT', 'OM', 'QA', 'SA', 'SY', 'TJ', 'TM', 'UZ', 'YE']

##### Occupation, annual crop, irrigated, intensive #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS', 'JO', 'KW', 'LB', 'LY', 'MT', 'OM', 'QA', 'SA', 'SY', 'TJ', 'TM', 'UZ', 'YE']

##### Occupation, annual crop, non-irrigated #####
['AE', 'BH', 'CW', 'CY', 'EG', 'GI', 'IL', 'IS',

However, all of these flows are not necessarily used in ecoinvent. 

### Add missing biosphere flows to regioinvent's biosphere database

In [40]:
# Check if those flows would be used in ecoinvent database
missing_flows_used_in_ei = []
all_missing_loc = list(set([i[1].rsplit(', ', 1)[-1] for i in unlinked])) # all missing locations retrieved from the unlinked list
for loc in all_missing_loc:
    missing_flows_for_loc = [i[1].rsplit(', ', 1)[0] for i in unlinked if i[1].rsplit(', ', 1)[-1] == loc] # all types of unlinked flows with this location
    activities_of_loc = [i for i in ei_database.db_as_list if i['location'] == loc] # list of ecoinvent activities for this location
    for act in activities_of_loc:
        biosphere_flows = Dataset(act).get_biosphere_flows() # list of biosphere flows in the activity exchanges 
        for flow in biosphere_flows:
            if flow['name'] in missing_flows_for_loc:
                missing_flows_used_in_ei.append([act["name"], act["location"], flow["name"], flow["categories"], flow["unit"]])

In [41]:
# create a dictionary with the useful missing flows as key (name, categories, unit) and the list of missing locations as values 
missing_flows_used_in_ei_dict = {}
counter = 0
for i in missing_flows_used_in_ei:
    if (i[2], i[3], i[4]) in missing_flows_used_in_ei_dict.keys():
        if i[1] not in missing_flows_used_in_ei_dict[(i[2], i[3], i[4])]:
            missing_flows_used_in_ei_dict[(i[2], i[3], i[4])].append(i[1])
            counter += 1
    else:
        missing_flows_used_in_ei_dict[(i[2], i[3], i[4])] = [i[1]]
        counter += 1

In [42]:
# for each missing flow and associated list of locations, create a new flow in the spatialized biosphere database
for key in missing_flows_used_in_ei_dict.keys():
    for loc in missing_flows_dict[key[0]]:
        new_biosphere_act = {
            'name': f'{key[0]}, {loc}',
            'categories': key[1],
            'unit': key[2],
            'code': random_code(),
            'database':'biosphere3_spatialized_flows_EF',
        }
        new_spatialized_biosphere_db.db_as_list.append(new_biosphere_act)

In [43]:
if write_new_spatialized_biosphere:
    new_spatialized_biosphere_db.write_to_brightway(new_db_name='biosphere3_spatialized_flows_EF', database_type='biosphere')

In [44]:
if write_new_spatialized_biosphere:
    new_spatialized_biosphere_db = Database('biosphere3_spatialized_flows_EF', create_pickle=True)

## Geographical mapping between ecoinvent (including premise regions) and EF methods to be used in regioinvent

In [45]:
ei_premise_iam_locations_geo_mapping = {
    'CAZ': 'CA',
    'CHA': 'CN',
    'EUR': 'RER',
    'JPN': 'JP',
    'IND': 'IN',
    'LAM': 'RLA',
    'MEA': 'RME',
    'NEU': 'RER',
    'OAS': 'RAS',
    'REF': 'RU',
    'SSA': 'RAF',
    'USA': 'US',
    'APAC': 'RAS',
    'World': 'GLO',
    'RSAM': 'RLA',
    'RCAM': 'RLA',
    'INDO': 'ID',
    'RSAF': 'RAF',
    'CEU': 'RER',
    'SAF': 'ZA',
    'INDIA': 'IN',
    'BRA': 'BR',
    'STAN': 'RAS',
    'WAF': 'RAF',
    'CHN': 'CN',
    'NAF': 'RAF',
    'UKR': 'UA',
    'RSAS': 'RAS',
    'RUS': 'RU',
    'SEAS': 'RAS',
    'KOR': 'KR',
    'JAP': 'JP',
    'EAF': 'RAF',
    'TUR': 'TR',
    'CAN': 'CA',
    'MEX': 'MX',
    'AFR': 'RAF',
    'AUS': 'AU',
    'CSA': 'RLA',
    'CHI': 'CN',
    'EEU': 'RER',
    'FSU': 'RAS',
    'ODA': 'RAS',
    'SKO': 'KR',
    'UK': 'GB',
    'OCE': 'AU',
}

In [46]:
ei_locations.extend(ei_premise_iam_locations_geo_mapping.keys())

In [47]:
locations_spat_biosphere = list(set([i['name'].split(', ')[-1] for i in biosphere_db_spatialized.db_as_list]))

In [48]:
missing_locations_spatialized_biosphere = [location for location in locations_spat_biosphere if (location not in ei_locations) & (location not in ['Deep', 'agricultural', 'in ground', 'mosaic (agroforestry)', 'ocean', 'sole'])]

In [49]:
ei_locations.extend(missing_locations_spatialized_biosphere)

In [50]:
water_loc_list = []
land_loc_list = []
acid_loc_list = []
eutro_loc_list = []
ei_locations = sorted(ei_locations)

ef_cfs_water = ef_cfs[ef_cfs.LCIAMethod_name == 'Water use']
ef_cfs_land = ef_cfs[ef_cfs.LCIAMethod_name == 'Land use']
ef_cfs_acid = ef_cfs[ef_cfs.LCIAMethod_name == 'Acidification']
ef_cfs_eutro = ef_cfs[ef_cfs.LCIAMethod_name == 'Eutrophication, terrestrial']

for loc in ei_locations:
    
    if '-' in loc:
        loc = loc.split('-')[0]
    if 'Canada' in loc:
        loc = 'CA'
        
    if loc in ei_premise_iam_locations_geo_mapping.keys():
        loc = ei_premise_iam_locations_geo_mapping[loc]
    
    if loc in ef_cfs_water.LCIAMethod_location.unique():
        water_loc_list.append(loc)
    else:
        water_loc_list.append('GLO')
    
    if loc in ef_cfs_land.LCIAMethod_location.unique():
        land_loc_list.append(loc)
    else:
        land_loc_list.append('GLO')
    
    if loc in ef_cfs_acid.LCIAMethod_location.unique():
        acid_loc_list.append(loc)
    else:
        acid_loc_list.append('GLO')
    
    if loc in ef_cfs_eutro.LCIAMethod_location.unique():
        eutro_loc_list.append(loc)
    else:
        eutro_loc_list.append('GLO')

In [51]:
df_mapping_ef_ei_geographies = pd.DataFrame(data = [ei_locations, water_loc_list, land_loc_list, acid_loc_list, eutro_loc_list], index = ['ecoinvent', 'water', 'land', 'acid', 'eutro']).T

In [52]:
if write_method_bw:
    df_mapping_ef_ei_geographies.to_excel('results/ei_ef_geo_mapping.xlsx', index=False)