In [1]:
# Basic imports
import pandas as pd
import numpy as np
import sys
import gzip
import pickle

In [2]:
# import of module
sys.path.append('/home/bill/software/ecospold2matrix/ecospold2matrix/')
import ecospold2matrix as e2m

In [3]:
# paths
ecospold_dir = '/mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3.7.1_cutoff_ecoSpold02/'
project_name = 'ecoinvent_3-7_cutoff_symm'
out_dir = '/mnt/collection/data/ecoinvent/3.7.1'
characterisation_file = '/mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3.7.1_LCIA_implementation/LCIA_implementation_3.7.1.xlsx'

In [4]:
# Define parser object with most parsing parameters
parser = e2m.Ecospold2Matrix(sys_dir=ecospold_dir, 
                             project_name=project_name, 
                             out_dir=out_dir, 
                             characterisation_file=characterisation_file, 
                             float32=True,
                             nan2null=True,
                             PRO_properties=('dry mass', 'wet mass', 'iron content', 'carbon content'))

2021-03-21 02:53:59,299 - ecoinvent_3-7_cutoff_symm - INFO - Ecospold2Matrix Processing
2021-03-21 02:53:59,309 - ecoinvent_3-7_cutoff_symm - INFO - Project name: ecoinvent_3-7_cutoff_symm
2021-03-21 02:53:59,310 - ecoinvent_3-7_cutoff_symm - INFO - Unit process and Master data directory: /mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3.7.1_cutoff_ecoSpold02/
2021-03-21 02:53:59,311 - ecoinvent_3-7_cutoff_symm - INFO - Data saved in: /mnt/collection/data/ecoinvent/3.7.1
2021-03-21 02:53:59,311 - ecoinvent_3-7_cutoff_symm - INFO - Replace Not-a-Number instances with 0.0 in all matrices
2021-03-21 02:53:59,312 - ecoinvent_3-7_cutoff_symm - INFO - Pickle intermediate results to files
2021-03-21 02:53:59,313 - ecoinvent_3-7_cutoff_symm - INFO - Order processes based on: ISIC, activityName
2021-03-21 02:53:59,314 - ecoinvent_3-7_cutoff_symm - INFO - Order elementary exchanges based on: comp, name, subcomp


In [5]:
# If we want to see the list of all properties we have access to
parser.get_all_properties() 

['amount 1 of flow in kg',
 'EcoSpold01Allocation_other_78',
 'mass concentration, titanium',
 'EcoSpold01Allocation_undefined_28',
 'molding efficiency',
 'ash content',
 'EcoSpold01Allocation_undefined_153',
 'silver content',
 'concentration, potassium',
 'UVEK_absolute_amount_1',
 'carbon content, non-fossil',
 'thallium content',
 'EcoSpold01Allocation_undefined_72',
 'concentration of amount 5',
 'EcoSpold01Allocation_other_101',
 'EcoSpold01Allocation_undefined_169',
 'EcoSpold01Allocation_other_51',
 'EcoSpold01Allocation_undefined_32',
 'EcoSpold01Allocation_undefined_105',
 'mass concentration, dissolved nitrite NO2 as N',
 'EcoSpold01Allocation_physical_1',
 'alloy additives',
 'EcoSpold01Allocation_undefined_21',
 'EcoSpold01Allocation_other_16',
 'nitrogen oxides emissions tier T2 for hp < 175',
 'mass concentration, nickel',
 'EcoSpold01Allocation_other_124',
 'radium content',
 'EcoSpold01Allocation_other_86',
 'carbon monoxide emissions tier T0 for hp < 100',
 'nitrogen

In [6]:
parser.ecospold_to_Leontief(fileformats=['Pandas', 'SparseMatrix'])

2021-03-21 02:54:11,659 - ecoinvent_3-7_cutoff_symm - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of 8d0d68e3af6eb45197d5bc5503ef1a0cc3163256
2021-03-21 02:54:52,855 - ecoinvent_3-7_cutoff_symm - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of fadda81d3ec183f025fddb56f9189cde64fc7e82
2021-03-21 02:54:52,968 - ecoinvent_3-7_cutoff_symm - INFO - Processing 19128 files in /mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3.7.1_cutoff_ecoSpold02/datasets
2021-03-21 02:56:41,381 - ecoinvent_3-7_cutoff_symm - INFO - Flows saved in /mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3.7.1_cutoff_ecoSpold02/flows.pickle with SHA-1 of 4dbf714f7e2fecb3dc6ae06fb5a9b8869d6244b5
2021-03-21 02:56:41,503 - ecoinvent_3-7_cutoff_symm - INFO - Processing 19128 files - this may take a while ...
2021-03-21 02:58:15,550 - ecoinvent_3-7_cutoff_symm - INFO - Elementary flows extracted from ElementaryExchanges.xml with SHA-1 of 8eea5670114fff516cafce908d4899803a33502c
2021-03-

starting characterisation


2021-03-21 02:59:21,051 - ecoinvent_3-7_cutoff_symm - INFO - Starting characterisation matching
2021-03-21 02:59:29,435 - ecoinvent_3-7_cutoff_symm - INFO - Characterisation matching done. C matrix created
2021-03-21 02:59:29,489 - ecoinvent_3-7_cutoff_symm - INFO - Starting to export to file
2021-03-21 02:59:29,490 - ecoinvent_3-7_cutoff_symm - INFO - about to write to file
2021-03-21 02:59:35,533 - ecoinvent_3-7_cutoff_symm - INFO - Final, symmetric, normalized matrices saved in /mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3-7_cutoff_symmPandas_symmNorm.gz.pickle with SHA-1 of eacae8caf429611f80652ab265a6b468a0d19fe7
2021-03-21 02:59:37,805 - ecoinvent_3-7_cutoff_symm - INFO - about to write to file
2021-03-21 02:59:38,850 - ecoinvent_3-7_cutoff_symm - INFO - Final, symmetric, normalized matrices saved in /mnt/collection/data/ecoinvent/3.7.1/ecoinvent_3-7_cutoff_symmSparseMatrix_symmNorm.gz.pickle with SHA-1 of 849fada1a37e485002084f5b05295530ce20471f
2021-03-21 03:00:04,273 - ecoi

In [7]:
# We can access key variables directly from the parser
parser.PRO.columns

Index(['activityId', 'productId', 'activityName', 'ISIC', 'price', 'priceUnit',
       'EcoSpoldCategory', 'geography', 'technologyLevel',
       'macroEconomicScenario', 'productionVolume', 'productName', 'unitName',
       'cpc', 'wet mass [kg]', 'dry mass [kg]', 'iron content [dimensionless]',
       'activityNameId', 'activityType', 'startDate', 'endDate',
       'activityName_duplicate'],
      dtype='object')

In [8]:
# Let us delete the parser (free the RAM) and load the system file we just produced
del(parser)


In [9]:
with gzip.open('ecoinvent_3-7_cutoff_symmPandas_symmNorm.gz.pickle', 'rb') as f:
    data = pickle.load(f)

In [10]:
data.keys()

dict_keys(['PRO', 'STR', 'IMP', 'A', 'F', 'C', 'PRO_header', 'STR_header', 'IMP_header', 'processingdata'])

In [11]:
data['A']

index,072a4a12-de8a-4f34-86b3-bf66f975036c_aeaf5266-3f9c-4074-bd34-eba76a61760c,d5330133-59e3-4ea5-897e-35b0a4df5644_aeaf5266-3f9c-4074-bd34-eba76a61760c,930e30a8-4966-43d3-9ef5-a64de5307ea6_281fc4f0-c05d-410a-a784-06e3508e78e6,cc49b2d8-a33f-4f37-8eee-c86038937532_0d860eb4-1a25-41b4-a821-81f5726d86e5,da62e6a5-761c-4007-91d6-2ed3d4a576a2_0d860eb4-1a25-41b4-a821-81f5726d86e5,0a1dcbf0-0a92-46b5-9e22-b51d9b03c447_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,47b6a962-632c-440c-bc45-1d54f5aed13b_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,0fe86f1c-890e-404a-b961-b8a7d97d1f57_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,d28ada1c-8273-4836-bebc-7c005b776d0d_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,ecf86adc-f654-4148-851b-468be3812fa9_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,...,473f3e06-8433-4587-8d5a-43be4a7fcab6_fd27af60-7b68-4549-bf35-022eacd74f93,fcdf3ab2-d7b1-4a40-8c02-c411deb0a292_ad38bbd2-e41c-4509-9dc3-c5c708a36606,2ef6af80-ca0f-456b-82a6-65658cbffa0d_2cb0d6c6-349c-46fc-b31f-a036c847a267,35f65070-7e7d-4276-8686-773f7e952d07_ce165995-6d94-44a8-ac3b-d5860c2c7fdb,33ed0fed-c323-433b-9604-80c53a2bcc61_b3184435-d00c-5713-823d-5ff6741bfbbc,5b5a2d1e-8a5d-4a7f-b893-bfcb5f8884fe_d13ae29b-7d6a-55d2-8380-86343a80fed8,da6aec53-093a-4a2f-ae30-f73517f0aeaa_ed25f589-c6c8-56a7-a518-b01dd3e8a453,9a19469e-945d-4f72-9588-dacb02ddbb7b_101e55f2-2da0-528b-96fd-21687e242c1c,5318a73f-5103-430d-9e5c-37403bbc192e_451550a8-d9e6-4396-9f9a-97e752111228,723bad05-ccbe-451e-b63e-7972d4f7069a_5d15df3e-fa54-45d5-85c6-5e3f5b89b20e
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
072a4a12-de8a-4f34-86b3-bf66f975036c_aeaf5266-3f9c-4074-bd34-eba76a61760c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d5330133-59e3-4ea5-897e-35b0a4df5644_aeaf5266-3f9c-4074-bd34-eba76a61760c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
930e30a8-4966-43d3-9ef5-a64de5307ea6_281fc4f0-c05d-410a-a784-06e3508e78e6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
cc49b2d8-a33f-4f37-8eee-c86038937532_0d860eb4-1a25-41b4-a821-81f5726d86e5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
da62e6a5-761c-4007-91d6-2ed3d4a576a2_0d860eb4-1a25-41b4-a821-81f5726d86e5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5b5a2d1e-8a5d-4a7f-b893-bfcb5f8884fe_d13ae29b-7d6a-55d2-8380-86343a80fed8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
da6aec53-093a-4a2f-ae30-f73517f0aeaa_ed25f589-c6c8-56a7-a518-b01dd3e8a453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9a19469e-945d-4f72-9588-dacb02ddbb7b_101e55f2-2da0-528b-96fd-21687e242c1c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5318a73f-5103-430d-9e5c-37403bbc192e_451550a8-d9e6-4396-9f9a-97e752111228,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# the A-matrix follows the new SparseDtype format
data['A'].dtypes

index
072a4a12-de8a-4f34-86b3-bf66f975036c_aeaf5266-3f9c-4074-bd34-eba76a61760c    Sparse[float32, 0.0]
d5330133-59e3-4ea5-897e-35b0a4df5644_aeaf5266-3f9c-4074-bd34-eba76a61760c    Sparse[float32, 0.0]
930e30a8-4966-43d3-9ef5-a64de5307ea6_281fc4f0-c05d-410a-a784-06e3508e78e6    Sparse[float32, 0.0]
cc49b2d8-a33f-4f37-8eee-c86038937532_0d860eb4-1a25-41b4-a821-81f5726d86e5    Sparse[float32, 0.0]
da62e6a5-761c-4007-91d6-2ed3d4a576a2_0d860eb4-1a25-41b4-a821-81f5726d86e5    Sparse[float32, 0.0]
                                                                                     ...         
5b5a2d1e-8a5d-4a7f-b893-bfcb5f8884fe_d13ae29b-7d6a-55d2-8380-86343a80fed8    Sparse[float32, 0.0]
da6aec53-093a-4a2f-ae30-f73517f0aeaa_ed25f589-c6c8-56a7-a518-b01dd3e8a453    Sparse[float32, 0.0]
9a19469e-945d-4f72-9588-dacb02ddbb7b_101e55f2-2da0-528b-96fd-21687e242c1c    Sparse[float32, 0.0]
5318a73f-5103-430d-9e5c-37403bbc192e_451550a8-d9e6-4396-9f9a-97e752111228    Sparse[float32, 0.0]
723bad05-ccbe-

In [13]:
# Because all matrices are quite sparse
data['F'].sparse.density

0.004233579202211573