# Notebook for importing Agribalyse 3.1 from a SimaPro .csv export

In [1]:
import brightway2 as bw

In [2]:
if 'Importing AGB3.1' in bw.projects:
   bw.projects.delete_project('Importing AGB3.1')

In [3]:
bw.projects.set_current('Importing AGB3.1')
bw.bw2setup()

Creating default biosphere

Applying strategy: normalize_units
Applying strategy: drop_unspecified_subcategories
Applying strategy: ensure_categories_are_tuples
Applied 3 strategies in 0.00 seconds


Writing activities to SQLite3 database:
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


Title: Writing activities to SQLite3 database:
  Started: 09/23/2022 14:00:55
  Finished: 09/23/2022 14:00:56
  Total time elapsed: 00:00:00
  CPU %: 41.00
  Memory %: 1.07
Created database: biosphere3
Creating default LCIA methods

Applying strategy: normalize_units
Applying strategy: set_biosphere_type
Applying strategy: fix_ecoinvent_38_lcia_implementation
Applying strategy: drop_unspecified_subcategories
Applying strategy: link_iterable_by_fields
Applied 5 strategies in 2.42 seconds


ValueError: Method ('CML 2001 (superseded)', 'terrestrial ecotoxicity', 'TAETP infinite') already exists. Use ``overwrite=True`` to overwrite existing methods

Some custom migrations are necessary

In [4]:
agb_csv_filepath = r"data/AGB3.1.CSV"

agb_technosphere_migration_data = {
    'fields': ['name', 'unit'],
    'data': [
        (
            (
                'Wastewater, average {Europe without Switzerland}| market for wastewater, average | Cut-off, S - Copied from Ecoinvent',
                'litre'),
            {
                'unit': 'cubic meter',
                'multiplier': 10e-3
            }
        ),
        (
            (
                'Wastewater, from residence {RoW}| market for wastewater, from residence | Cut-off, S - Copied from Ecoinvent',
                'litre'),
            {
                'unit': 'cubic meter',
                'multiplier': 10e-3
            }
        ),
        (
            (
                'Heat, central or small-scale, natural gas {Europe without Switzerland}| market for heat, central or small-scale, natural gas | Cut-off, S - Copied from Ecoinvent',
                'kilowatt hour'),
            {
                'unit': 'megajoule',
                'multiplier': 3.6
            }
        ),
        (
            (
                'Heat, district or industrial, natural gas {Europe without Switzerland}| heat production, natural gas, at industrial furnace >100kW | Cut-off, S - Copied from Ecoinvent',
                'kilowatt hour'),
            {
                'unit': 'megajoule',
                'multiplier': 3.6
            }
        ),
        (
            ('Heat, district or industrial, natural gas {RER}| market group for | Cut-off, S - Copied from Ecoinvent',
             'kilowatt hour'),
            {
                'unit': 'megajoule',
                'multiplier': 3.6
            }
        ),
        (
            (
                'Heat, district or industrial, natural gas {RoW}| market for heat, district or industrial, natural gas | Cut-off, S - Copied from Ecoinvent',
                'kilowatt hour'),
            {
                'unit': 'megajoule',
                'multiplier': 3.6
            }
        ),
        (
            (
                'Land use change, perennial crop {BR}| market group for land use change, perennial crop | Cut-off, S - Copied from Ecoinvent',
                'square meter'),
            {
                'unit': 'hectare',
                'multiplier': 10e-4
            }
        ),
    ]
}

## Importing and linking the data

In [5]:
agb_importer = bw.SimaProCSVImporter(agb_csv_filepath, "agribalyse3.1")

agb_technosphere_migration = bw.Migration("agb-technosphere")
agb_technosphere_migration.write(
    agb_technosphere_migration_data,
    description="Specific technosphere fixes for Agribalyse 3"
)

agb_importer.apply_strategies()
agb_importer.apply_strategies()
agb_importer.migrate("agb-technosphere")
agb_importer.statistics()

Extracted 17557 unallocated datasets in 101.79 seconds
Applying strategy: normalize_units
Applying strategy: update_ecoinvent_locations
Applying strategy: assign_only_product_as_production
Applying strategy: drop_unspecified_subcategories
Applying strategy: sp_allocate_products
Applying strategy: fix_zero_allocation_products
Applying strategy: split_simapro_name_geo
Applying strategy: strip_biosphere_exc_locations
Applying strategy: migrate_datasets
Applying strategy: migrate_exchanges
Applying strategy: set_code_by_activity_hash
Applying strategy: link_technosphere_based_on_name_unit_location
Applying strategy: change_electricity_unit_mj_to_kwh
Applying strategy: set_lognormal_loc_value_uncertainty_safe
Applying strategy: normalize_biosphere_categories
Applying strategy: normalize_simapro_biosphere_categories
Applying strategy: normalize_biosphere_names
Applying strategy: normalize_simapro_biosphere_names
Applying strategy: migrate_exchanges
Applying strategy: fix_localized_water_flow

(18560, 5068215, 294477)

In [6]:
agb_importer.apply_strategies()
agb_importer.statistics()

Applying strategy: normalize_units
Applying strategy: update_ecoinvent_locations
Applying strategy: assign_only_product_as_production
Applying strategy: drop_unspecified_subcategories
Applying strategy: sp_allocate_products
Applying strategy: fix_zero_allocation_products
Applying strategy: split_simapro_name_geo
Applying strategy: strip_biosphere_exc_locations
Applying strategy: migrate_datasets
Applying strategy: migrate_exchanges
Applying strategy: set_code_by_activity_hash
Applying strategy: link_technosphere_based_on_name_unit_location
Applying strategy: change_electricity_unit_mj_to_kwh
Applying strategy: set_lognormal_loc_value_uncertainty_safe
Applying strategy: normalize_biosphere_categories
Applying strategy: normalize_simapro_biosphere_categories
Applying strategy: normalize_biosphere_names
Applying strategy: normalize_simapro_biosphere_names
Applying strategy: migrate_exchanges
Applying strategy: fix_localized_water_flows
Applying strategy: link_iterable_by_fields
Applying s

(18560, 5068215, 294240)

## Writing unlinked data

In [7]:
agb_importer.write_excel(only_unlinked=True)

Wrote matching file to:
C:\Users\GustaveCoste\AppData\Local\pylca\Brightway3\Importing-AGB31.1200211839fb49bd0efc0b8ce016e951\output\db-matching-agribalyse31-unlinked.xlsx


In [8]:
bw.Database('Agribalyse3.1 biosphere flows').register()
agb_importer.add_unlinked_flows_to_biosphere_database('Agribalyse3.1 biosphere flows')
agb_importer.add_unlinked_activities()
agb_importer.statistics()

Writing activities to SQLite3 database:
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


Title: Writing activities to SQLite3 database:
  Started: 09/23/2022 14:17:20
  Finished: 09/23/2022 14:17:20
  Total time elapsed: 00:00:00
  CPU %: 47.00
  Memory %: 34.04
Applying strategy: link_iterable_by_fields
Applying strategy: link_iterable_by_fields
18609 datasets
5068215 exchanges
0 unlinked exchanges
  


(18609, 5068215, 0)

## Processing metadata

In [10]:
import re

from tqdm import tqdm

dqr_pattern = r"The overall DQR of this product is: (?P<overall>[\d.]+) {P: (?P<P>[\d.]+), TiR: (?P<TiR>[\d.]+), GR: (?P<GR>[\d.]+), TeR: (?P<TeR>[\d.]+)}"
ciqual_pattern = r"\[Ciqual code: (?P<ciqual>[\d_]+)\]"
location_pattern = r" \{(?P<location>[\w-]+)\}\|"

for activity in tqdm(agb_importer):

    # Getting activities locations
    if activity.get('location') is None:
        if '/FR' in activity['name']:
            activity['location'] = 'FR'
        else:
            match = re.search(pattern=location_pattern, string=activity['name'])
            if match is not None:
                activity['location'] = match['location']

    # Getting products CIQUAL code when relevant
    if 'ciqual' in activity['name'].lower():
        match = re.search(pattern=ciqual_pattern, string=activity['name'])
        activity['ciqual_code'] = match['ciqual']

    # Putting SimaPro metadata in the activity fields directly and removing references to SimaPro
    if 'simapro metadata' in activity:
        for sp_field, value in activity['simapro metadata'].items():
            if value != 'Unspecified':
                activity[sp_field] = value

        # Getting the Data Quality Rating of the data when relevant
        if 'Comment' in activity['simapro metadata']:
            match = re.search(pattern=dqr_pattern, string=activity['simapro metadata']['Comment'])

            if match:
                activity['DQR'] = {'overall': match['overall'],
                                   'P': match['P'],
                                   'TiR': match['TiR'],
                                   'GR': match['GR'],
                                   'TeR': match['TeR']}

        del activity['simapro metadata']

    if 'simapro name' in activity:
        del activity['simapro name']

18609it [00:00, 718358.47it/s]


In [13]:
agb_importer.write_database()

Writing activities to SQLite3 database:
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:21:48


Title: Writing activities to SQLite3 database:
  Started: 09/23/2022 14:24:51
  Finished: 09/23/2022 14:46:40
  Total time elapsed: 00:21:48
  CPU %: 70.80
  Memory %: 33.35
Created database: agribalyse3.1


Brightway2 SQLiteBackend: agribalyse3.1

In [20]:
bw.BW2Package.export_objs([bw.Database('agribalyse3.1'), bw.Database('Agribalyse3.1 biosphere flows')], filename='AGB3.1.bw2package', folder='data')

KeyboardInterrupt: 