# Create the regionalized version of EF 3.1 in brightway

In [1]:
import bw2data as bd
import numpy as np
import pandas as pd
from mescal import *

In [2]:
bd.projects.set_current(f'ecoinvent3.9.1')

In [3]:
ef_cfs = pd.read_excel('lcia/EF-LCIAMethod_CF(EF-v3.1).xlsx', sheet_name='lciamethods_CF', keep_default_na=False, na_values='')
ef_biosphere_mapping = pd.read_csv('lcia/EF31_biosphere_mapping.csv')

In [4]:
ef_biosphere_mapping_dict = ef_biosphere_mapping.groupby('ef')['biosphere'].apply(list).to_dict()

## Adapt the EF excel file to be compatible with ecoinvent

In [5]:
def ecoinvent_compatible_categories(name, cat0, cat1, cat2):
    
    if name.lower() in ef_biosphere_mapping_dict.keys():
        name = ef_biosphere_mapping_dict[name.lower()][0]
    
    if cat0 == 'Emissions':
        cat1 = cat1.replace('Emissions to ', '')
        cat2 = cat2.replace('Emissions to ', '')
        cat2 = cat2.replace('and', '+')
        
        if cat2 == 'air, unspecified':
            cat2 = None
        elif cat2 == 'air, unspecified (long-term)':
            cat2 = 'low population density, long-term'
            
        if '(biogenic)' in name:
            name = name.replace(' (biogenic)', ', non-fossil')
        elif '(fossil)' in name:
            name = name.replace(' (fossil)', ', fossil')
        elif '(land use change)' in name:
            name = name.replace(' (land use change)', ', to soil or biomass stock')
        
        if 'Particles' in name:
            name.replace('Particles', 'Particulate Matter, ')
            
            if '(PM10)' in name:
                name = name.replace('(PM10)', '> 2.5 um and < 10um')
            elif '(PM0.2 - PM2.5)' in name:
                name = name.replace('(PM0.2 - PM2.5)', '< 2.5 um')
    
    elif cat0 == 'Land use':
        
        if ('from ' in name) & (name.replace('from ', '') in ef_biosphere_mapping_dict.keys()):
            name = f"from {ef_biosphere_mapping_dict[name.replace('from ', '')][0]}"
        
        elif ('to ' in name) & (name.replace('to ', '') in ef_biosphere_mapping_dict.keys()):
            name = f"to {ef_biosphere_mapping_dict[name.replace('to ', '')][0]}"
        
        cat1 = cat1.replace('Land ', '')
        name = f'{cat1}, {name}'
        cat1 = 'natural resource'
        cat2 = 'land'
    
    elif cat0 == 'Resources':
        cat2 = f"in {cat1.replace('Resources from ', '')}"
        cat1 = 'natural resource'
    
    else:
        raise ValueError(f"Category0 {cat0} not recognized")
    
    return name, cat0, cat1, cat2

In [6]:
def ecoinvent_compatible_water_flows(name, cat0, cat1, cat2, lcia_cat):
    
    name = name.lower()
    
    if lcia_cat.lower() == 'water use':
        
        if cat0 == 'Resources':
            if name == 'ground water':
                name = 'water, unspecified natural origin'
                cat1 = 'natural resource'
                cat2 = 'in ground'
            elif name == 'lake water':
                name = 'water, lake'
                cat1 = 'natural resource'
                cat2 = 'in water'
            elif name == 'river water':
                name = 'water, river'
                cat1 = 'natural resource'
                cat2 = 'in water'
            elif name == 'freshwater':
                name = 'water, unspecified natural origin'
                cat1 = 'natural resource'
                cat2 = 'in water'
            elif name == 'water to cooling':
                name = 'water, cooling, unspecified natural origin'
                cat1 = 'natural resource'
                cat2 = 'in water'
            elif name == 'water to turbine':
                name = 'water, turbine use, unspecified natural origin'
                cat1 = 'natural resource'
                cat2 = 'in water'
        
        elif cat0 == 'Emissions':
            if (name == 'water') & (cat2 == 'water, unspecified'):
                name = 'water'
                cat1 = 'water'
                cat2 = None
        
    return name, cat1, cat2

In [7]:
# Change the name and categories columns of the EF file to be ecoinvent-compliant 
ef_cfs[['FLOW_name', 'FLOW_class0', 'FLOW_class1', 'FLOW_class2']] = ef_cfs.apply(lambda x: ecoinvent_compatible_categories(x['FLOW_name'], x['FLOW_class0'], x['FLOW_class1'], x['FLOW_class2']), axis=1, result_type='expand')

In [8]:
ef_cfs[['FLOW_name', 'FLOW_class1', 'FLOW_class2']] = ef_cfs.apply(lambda x: ecoinvent_compatible_water_flows(x['FLOW_name'], x['FLOW_class0'], x['FLOW_class1'], x['FLOW_class2'], x['LCIAMethod_name']), axis=1, result_type='expand')

In [9]:
# Add some missing land biosphere flows, as they come from the same EF flow
ef_cfs.drop_duplicates(inplace=True)
for key, value in ef_biosphere_mapping_dict.items():
    if len(value) > 1:
        df_to_add = ef_cfs[
            (ef_cfs.FLOW_name.str.lower() == f'occupation, {value[0]}')
            | (ef_cfs.FLOW_name.str.lower() == f'transformation, from {value[0]}')
            | (ef_cfs.FLOW_name.str.lower() == f'transformation, to {value[0]}')
        ].copy(deep=True)
        for i in range(1, len(value)):
            df_to_add['FLOW_name'] = df_to_add['FLOW_name'].apply(lambda x: x.replace(value[i-1], value[i]))
            ef_cfs = pd.concat([ef_cfs, df_to_add])

ef_cfs.reset_index(drop=True, inplace=True)

In [10]:
# Add missing water flows
# First, the missing 'water, well, in ground' and 'fossil well' subcategory water flows
missing_water_flows = [
    ('water, well, in ground', 'natural resource', 'in water'),
    ('water, unspecified natural origin', 'natural resource', 'fossil well'),
]
df_to_add = ef_cfs[
    (ef_cfs.FLOW_name.str.lower() == 'water, unspecified natural origin')
    & (ef_cfs.FLOW_class1 == 'natural resource')
    & (ef_cfs.FLOW_class2 == 'in ground')
].copy(deep=True)
for flow_name, cat1, cat2 in missing_water_flows:
    df_to_add['FLOW_name'] = flow_name
    df_to_add['FLOW_class2'] = cat2
    ef_cfs = pd.concat([ef_cfs, df_to_add])

# Second, several water emissions flows 
missing_subcategories = ['fossil well', 'ground-', 'ground-, long-term', 'surface water']
df_to_add = ef_cfs[
    (ef_cfs.FLOW_name.str.lower() == 'water')
    & (ef_cfs.FLOW_class1 == 'water')
    & (ef_cfs.FLOW_class2.isna())
].copy(deep=True)
for subcat in missing_subcategories:
    df_to_add['FLOW_class2'] = subcat
    ef_cfs = pd.concat([ef_cfs, df_to_add])

ef_cfs.reset_index(drop=True, inplace=True)

In [11]:
# Load the biosphere and spatialized biosphere databases 
biosphere_db_spatialized = Database('biosphere3_spatialized_flows')
biosphere_db = Database('biosphere3')

Getting activity data


100%|██████████| 62222/62222 [00:00<00:00, 101598.73it/s]


Adding exchange data to activities


0it [00:00, ?it/s]


Filling out exchange data


100%|██████████| 62222/62222 [00:00<00:00, 3974990.23it/s]


Loaded biosphere3_spatialized_flows from brightway!
Getting activity data


100%|██████████| 4718/4718 [00:00<00:00, 301482.77it/s]


Adding exchange data to activities


0it [00:00, ?it/s]


Filling out exchange data


100%|██████████| 4718/4718 [00:00<?, ?it/s]

Loaded biosphere3 from brightway!





In [12]:
biosphere_db_dict_name = biosphere_db.list_to_dict('name', 'biosphere')
biosphere_db_spatialized_dict_name = biosphere_db_spatialized.list_to_dict('name', 'biosphere')

In [13]:
biosphere_db_dict_code = biosphere_db.list_to_dict('code', 'biosphere')
biosphere_db_spatialized_dict_code = biosphere_db_spatialized.list_to_dict('code', 'biosphere')

## Create the new regionalized methods

In [14]:
def create_cf_list(df):
    
    reg_cfs = []
    unmapped = []
    
    for i in range(len(df)):
        
        flow_name = df.loc[i, 'FLOW_name']
        flow_name = flow_name.lower()
        
        flow_location = df.loc[i, 'LCIAMethod_location']
        
        flow_cf = df.loc[i, 'CF EF3.1']
        
        if flow_location is np.nan:
            spatialized_db = False
        else:
            flow_name = f'{flow_name}, {flow_location}'
            spatialized_db = True
        
        category1 = df.loc[i, 'FLOW_class1']
        category2 = df.loc[i, 'FLOW_class2']
        
        flow_name = flow_name[0].upper() + flow_name[1:]
        
        if category2 is not None:
            
            if spatialized_db:
                if (flow_name, (category1, category2), 'biosphere3_spatialized_flows') in biosphere_db_spatialized_dict_name.keys():
                    flow_code = biosphere_db_spatialized_dict_name[(flow_name, (category1, category2), 'biosphere3_spatialized_flows')]['code']
                    reg_cfs.append((('biosphere3_spatialized_flows', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3_spatialized_flows', flow_name, (category1, category2)))
            
            else:
                if (flow_name, (category1, category2), 'biosphere3') in biosphere_db_dict_name.keys():
                    flow_code = biosphere_db_dict_name[(flow_name, (category1, category2), 'biosphere3')]['code']
                    reg_cfs.append((('biosphere3', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3', flow_name, (category1, category2)))
        
        else:
            
            if spatialized_db:
                if (flow_name, (category1,), 'biosphere3_spatialized_flows') in biosphere_db_spatialized_dict_name.keys():
                    flow_code = biosphere_db_spatialized_dict_name[(flow_name, (category1,), 'biosphere3_spatialized_flows')]['code']
                    reg_cfs.append((('biosphere3_spatialized_flows', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3_spatialized_flows', flow_name, (category1,)))
            
            else:
                if (flow_name, (category1,), 'biosphere3') in biosphere_db_dict_name.keys():
                    flow_code = biosphere_db_dict_name[(flow_name, (category1,), 'biosphere3')]['code']
                    reg_cfs.append((('biosphere3', flow_code), flow_cf))
                else:
                    unmapped.append(('biosphere3', flow_name, (category1,)))
    
    return reg_cfs, unmapped

In [15]:
def method_names_bw(cat):
    # Changes in categories names 
    cat_bw = cat.replace('EF-', '')
    cat_bw = cat_bw.replace('Resource use, fossils', 'Energy resources: non-renewable')
    cat_bw = cat_bw.replace('Resource use, minerals and metals', 'Material resources: metals/minerals')
    cat_bw = cat_bw.replace('Eutrophication marine', 'eutrophication: marine')
    cat_bw = cat_bw.replace('Matter', 'matter formation')
    cat_bw = cat_bw.replace('cancer', 'carcinogenic')
    cat_bw = cat_bw.replace('ozone', 'oxidant')
    cat_bw = cat_bw.replace('-', ': ')
    cat_bw = cat_bw.replace(' : ', ':')
    cat_bw = cat_bw.replace(',', ':')
    cat_bw = cat_bw.replace('_', ', ')
    cat_bw = cat_bw.replace('non: ', 'non-')
    
    return cat_bw

In [16]:
# Create the new set of regionalized impact categories 
ef_methods_bw = [i for i in bd.methods if i[0] == 'EF v3.1']

# Replace the water use method of EF by the one of IW+. Both are based on AWARE, but the one of IW+ is more complete. 
ef_methods_bw.remove(('EF v3.1', 'water use', 'user deprivation potential (deprivation-weighted water consumption)'))
ef_methods_bw.append(('IMPACT World+ Midpoint 2.0.1', 'Midpoint', 'Water scarcity'))

regionalized_methods = {}
unlinked = []
methods_to_regionalize = [
    'acidification',
    # 'ecotoxicity: freshwater',
    # 'ecotoxicity: freshwater, inorganics',
    # 'ecotoxicity: freshwater, organics',
    # 'eutrophication, marine',
    'eutrophication, terrestrial',
    'land use',
    'water use',
] # categories for which regional CF are added, other categories are kept as they are in brightway

for i in range(len(ef_methods_bw)):
    method = bd.Method(ef_methods_bw[i])
    if ef_methods_bw[i][0] == 'EF v3.1':
        method_name = ef_methods_bw[i][1]
    else:
        method_name = ef_methods_bw[i][2] # for the IW+ water scarcity method
    method_name = method_names_bw(method_name)
    cfs = method.load()
    regionalized_cfs = []
    
    for j in cfs:
        
        if method_name not in methods_to_regionalize:
            regionalized_cfs.append(j)
        
        else:
            flow = biosphere_db_dict_code[(j[0][0], j[0][1])]
            flow_name = flow['name'].lower()
            flow_category_1 = flow['categories'][0]
            if len(flow['categories']) == 1:
                df_regionalized_flows = ef_cfs[
                    (ef_cfs['FLOW_name'].str.lower() == flow_name) 
                    & (ef_cfs['FLOW_class1'] == flow_category_1)
                    & (ef_cfs['FLOW_class2'].isna())
                    & (ef_cfs['LCIAMethod_name'].str.lower() == method_name)
                ].reset_index()
                
                if len(df_regionalized_flows) == 0:
                    unlinked.append(('biosphere3', flow_name, (flow_category_1,)))
                
            else:
                flow_category_2 = flow['categories'][1]
            
                df_regionalized_flows = ef_cfs[
                    (ef_cfs['FLOW_name'].str.lower() == flow_name) 
                    & (ef_cfs['FLOW_class1'] == flow_category_1)
                    & (ef_cfs['FLOW_class2'] == flow_category_2)
                    & (ef_cfs['LCIAMethod_name'].str.lower() == method_name)
                ].reset_index()
                
                if len(df_regionalized_flows) == 0:
                    unlinked.append(('biosphere3', flow_name, (flow_category_1, flow_category_2)))
            
            new_regionalized_cfs, new_unlinked = create_cf_list(df_regionalized_flows)
            regionalized_cfs += new_regionalized_cfs
            unlinked += new_unlinked
    
    regionalized_methods[method_name] = regionalized_cfs

In [17]:
list(set([i[1] for i in unlinked if 'biosphere3' == i[0]]))

[]

In [18]:
for method in ef_methods_bw:
    if method[2] == 'Water scarcity':
        ef_regionalized_method = bd.Method(('EF v3.1 regionalized', 'water use', 'user deprivation potential (deprivation-weighted water consumption)'))
        unit = bd.Method(('EF v3.1', 'water use', 'user deprivation potential (deprivation-weighted water consumption)')).metadata['unit']
        method_name = 'Water scarcity'
    else:
        ef_regionalized_method = bd.Method(('EF v3.1 regionalized', method[1], method[2]))
        unit = bd.Method(method).metadata['unit']
        method_name = method[1]
    ef_regionalized_method_metadata = {'unit': unit}
    ef_regionalized_method.register(**ef_regionalized_method_metadata)
    ef_regionalized_method.write(regionalized_methods[method_name])

KeyError: 'ecotoxicity: freshwater, inorganics'

In [None]:
# TODO: print missing locations and biosphere flows

In [19]:
regionalized_methods.keys()

dict_keys(['acidification', 'climate change', 'climate change: biogenic', 'climate change: fossil', 'climate change: land use and land use change', 'ecotoxicity: freshwater', 'ecotoxicity: freshwater: inorganics', 'ecotoxicity: freshwater: organics', 'energy resources: non-renewable', 'eutrophication: freshwater', 'eutrophication: marine', 'eutrophication: terrestrial', 'human toxicity: carcinogenic', 'human toxicity: carcinogenic: inorganics', 'human toxicity: carcinogenic: organics', 'human toxicity: non-carcinogenic', 'human toxicity: non-carcinogenic: inorganics', 'human toxicity: non-carcinogenic: organics', 'ionising radiation: human health', 'land use', 'material resources: metals/minerals', 'oxidant depletion', 'particulate matter formation', 'photochemical oxidant formation: human health', 'Water scarcity'])