In [9]:
# Basic imports
import pandas as pd
import numpy as np
import sys
import gzip
import pickle

In [2]:
# import of module
sys.path.append('/home/bill/software/ecospold2matrix/ecospold2matrix/')
import ecospold2matrix as e2m

In [3]:
# paths
ecospold_dir = '/mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_cut-off_ecoSpold02'
project_name = 'ecoinvent_3-6_cutoff_symm'
out_dir = '/mnt/collection/data/ecoinvent/3.6/'
characterisation_file = '/mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_LCIA_implementation/LCIA_implementation_3.6.xlsx'

In [4]:
# Define parser object with most parsing parameters
parser = e2m.Ecospold2Matrix(sys_dir=ecospold_dir, 
                             project_name=project_name, 
                             out_dir=out_dir, 
                             characterisation_file=characterisation_file, 
                             float32=False,
                             nan2null=True,
                             PRO_properties=('dry mass', 'wet mass', 'iron content', 'carbon content'))

2020-05-21 13:29:15,593 - ecoinvent_3-6_cutoff_symm - INFO - Ecospold2Matrix Processing
2020-05-21 13:29:15,600 - ecoinvent_3-6_cutoff_symm - INFO - Project name: ecoinvent_3-6_cutoff_symm
2020-05-21 13:29:15,601 - ecoinvent_3-6_cutoff_symm - INFO - Unit process and Master data directory: /mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_cut-off_ecoSpold02
2020-05-21 13:29:15,602 - ecoinvent_3-6_cutoff_symm - INFO - Data saved in: /mnt/collection/data/ecoinvent/3.6
2020-05-21 13:29:15,603 - ecoinvent_3-6_cutoff_symm - INFO - Replace Not-a-Number instances with 0.0 in all matrices
2020-05-21 13:29:15,603 - ecoinvent_3-6_cutoff_symm - INFO - Pickle intermediate results to files
2020-05-21 13:29:15,604 - ecoinvent_3-6_cutoff_symm - INFO - Order processes based on: ISIC, activityName
2020-05-21 13:29:15,604 - ecoinvent_3-6_cutoff_symm - INFO - Order elementary exchanges based on: comp, name, subcomp


In [5]:
# If we want to see the list of all properties we have access to
parser.get_all_properties()

['amount 1 of flow in kg',
 'EcoSpold01Allocation_other_78',
 'mass concentration, titanium',
 'EcoSpold01Allocation_undefined_28',
 'molding efficiency',
 'ash content',
 'EcoSpold01Allocation_undefined_153',
 'silver content',
 'concentration, potassium',
 'UVEK_absolute_amount_1',
 'carbon content, non-fossil',
 'thallium content',
 'EcoSpold01Allocation_undefined_72',
 'concentration of amount 5',
 'EcoSpold01Allocation_other_101',
 'EcoSpold01Allocation_undefined_169',
 'EcoSpold01Allocation_other_51',
 'EcoSpold01Allocation_undefined_32',
 'EcoSpold01Allocation_undefined_105',
 'mass concentration, dissolved nitrite NO2 as N',
 'EcoSpold01Allocation_physical_1',
 'alloy additives',
 'EcoSpold01Allocation_undefined_21',
 'EcoSpold01Allocation_other_16',
 'nitrogen oxides emissions tier T2 for hp < 175',
 'mass concentration, nickel',
 'EcoSpold01Allocation_other_124',
 'radium content',
 'EcoSpold01Allocation_other_86',
 'carbon monoxide emissions tier T0 for hp < 100',
 'nitrogen

In [6]:
parser.ecospold_to_Leontief(fileformats=['Pandas', 'SparseMatrix'], with_absolute_flows=True, )

2020-05-21 13:29:40,002 - ecoinvent_3-6_cutoff_symm - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of f462df6b314c131986d790d4503f806e46126845
2020-05-21 13:30:14,981 - ecoinvent_3-6_cutoff_symm - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of 1026b1b395d62da475f6eea059907932ee542bc4
2020-05-21 13:30:15,082 - ecoinvent_3-6_cutoff_symm - INFO - Processing 18121 files in /mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_cut-off_ecoSpold02/datasets
2020-05-21 13:31:47,525 - ecoinvent_3-6_cutoff_symm - INFO - Flows saved in /mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_cut-off_ecoSpold02/flows.pickle with SHA-1 of 5fd4f8e230ae45ded280c4ec0d59f1e25ce5ac80
2020-05-21 13:31:47,625 - ecoinvent_3-6_cutoff_symm - INFO - Processing 18121 files - this may take a while ...
2020-05-21 13:33:11,152 - ecoinvent_3-6_cutoff_symm - INFO - Elementary flows extracted from ElementaryExchanges.xml with SHA-1 of 3abfcfbaacb3cc91011ea4721c4bbeaa154f4

starting characterisation


2020-05-21 13:34:44,353 - ecoinvent_3-6_cutoff_symm - INFO - Will use column 7, named CF_36, for characterisation factors
2020-05-21 13:34:44,354 - ecoinvent_3-6_cutoff_symm - INFO - Starting characterisation matching
2020-05-21 13:34:53,018 - ecoinvent_3-6_cutoff_symm - INFO - Characterisation matching done. C matrix created
2020-05-21 13:34:53,075 - ecoinvent_3-6_cutoff_symm - INFO - Starting to export to file
2020-05-21 13:34:53,075 - ecoinvent_3-6_cutoff_symm - INFO - about to write to file
2020-05-21 13:34:58,897 - ecoinvent_3-6_cutoff_symm - INFO - Final, symmetric, normalized matrices saved in /mnt/collection/data/ecoinvent/3.6/ecoinvent_3-6_cutoff_symmPandas_symmNorm.gz.pickle with SHA-1 of fab62b5469a1958f7076e388573796db48b88a73
2020-05-21 13:35:04,169 - ecoinvent_3-6_cutoff_symm - INFO - Final, symmetric, scaled-up flow matrices saved in /mnt/collection/data/ecoinvent/3.6/ecoinvent_3-6_cutoff_symmPandas_symmScale.gz.pickle with SHA-1 of 22ac4e0d1ecd6c1d410e9fa4f0a021df75abee

In [7]:
# We can access key variables directly from the parser
parser.PRO.iloc[256]

activityId                                   8a3d1a66-89fc-479f-857e-3852175bf0ee
productId                                    98813ba1-c1a0-469d-99e0-6f2d8453c47d
activityName                         bell pepper production, in heated greenhouse
ISIC                            0113:Growing of vegetables and melons, roots a...
price                                                                    0.687623
priceUnit                                                                 EUR2005
EcoSpoldCategory                                                              NaN
geography                                                                     GLO
technologyLevel                                                           Current
macroEconomicScenario                                           Business-as-Usual
productionVolume                                                      3.44975e+10
productName                                                           bell pepper
unitName        

In [None]:
# Let us delete the parser (free the RAM) and load the system file we just produced
del(parser)


In [11]:
with gzip.open('ecoinvent_3-6_cutoff_symmPandas_symmNorm.gz.pickle', 'rb') as f:
    data = pickle.load(f)

In [12]:
data.keys()

dict_keys(['PRO', 'STR', 'IMP', 'A', 'F', 'C', 'PRO_header', 'STR_header', 'IMP_header', 'processingdata'])

In [17]:
data['A']

index,c8b440f1-9fc2-4ca8-9be9-557f35379138_aeaf5266-3f9c-4074-bd34-eba76a61760c,4e3ebe0d-fbf1-4f6f-864b-6751b69c9757_aeaf5266-3f9c-4074-bd34-eba76a61760c,c2d31fc1-1199-43b7-89b5-166f451af9b5_281fc4f0-c05d-410a-a784-06e3508e78e6,c5897e33-3ed2-445a-9f68-7274dbb508df_0d860eb4-1a25-41b4-a821-81f5726d86e5,7248ba47-11dd-47e5-b5c6-02b942bc3f16_0d860eb4-1a25-41b4-a821-81f5726d86e5,fad1fdbf-677f-48f3-8ad3-58e4834dec9a_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,d8ffe66f-0406-42ab-a327-33d510452291_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,f0cca009-1b74-400d-a011-cd10f2239468_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,69964911-3ae6-4dad-b1c2-238a9c360cc0_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,c8ab41c1-0274-45a1-883f-09b19d8d5991_3f6dada9-2497-4e1c-9e1b-eabafa6920f8,...,be9a71fe-fb3d-4aad-8f42-226042b9b89d_fd27af60-7b68-4549-bf35-022eacd74f93,9fdd1edb-b742-4cc4-9c1b-21997ad46b61_ad38bbd2-e41c-4509-9dc3-c5c708a36606,b4c75eda-be8b-4797-80e3-d4ae473a4390_2cb0d6c6-349c-46fc-b31f-a036c847a267,cedddfe5-3a4e-496c-80d7-7671e44d2c1e_ce165995-6d94-44a8-ac3b-d5860c2c7fdb,c7e32113-5fef-428e-bff1-dd620c55772f_b3184435-d00c-5713-823d-5ff6741bfbbc,e705fefd-3afb-4889-a59f-ddfeb6b5779e_d13ae29b-7d6a-55d2-8380-86343a80fed8,8905d83f-7d46-4f0f-88bf-0aa7a8e9ea83_ed25f589-c6c8-56a7-a518-b01dd3e8a453,e4964245-a8dc-4daa-bcd0-5e73099bab33_101e55f2-2da0-528b-96fd-21687e242c1c,1d009030-88b9-4784-be04-ed520ad97f0b_451550a8-d9e6-4396-9f9a-97e752111228,fa8f7c8e-277f-4e71-9a5e-28b13ca93bca_5d15df3e-fa54-45d5-85c6-5e3f5b89b20e
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
c8b440f1-9fc2-4ca8-9be9-557f35379138_aeaf5266-3f9c-4074-bd34-eba76a61760c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4e3ebe0d-fbf1-4f6f-864b-6751b69c9757_aeaf5266-3f9c-4074-bd34-eba76a61760c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
c2d31fc1-1199-43b7-89b5-166f451af9b5_281fc4f0-c05d-410a-a784-06e3508e78e6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
c5897e33-3ed2-445a-9f68-7274dbb508df_0d860eb4-1a25-41b4-a821-81f5726d86e5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7248ba47-11dd-47e5-b5c6-02b942bc3f16_0d860eb4-1a25-41b4-a821-81f5726d86e5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
e705fefd-3afb-4889-a59f-ddfeb6b5779e_d13ae29b-7d6a-55d2-8380-86343a80fed8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8905d83f-7d46-4f0f-88bf-0aa7a8e9ea83_ed25f589-c6c8-56a7-a518-b01dd3e8a453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e4964245-a8dc-4daa-bcd0-5e73099bab33_101e55f2-2da0-528b-96fd-21687e242c1c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1d009030-88b9-4784-be04-ed520ad97f0b_451550a8-d9e6-4396-9f9a-97e752111228,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# the A-matrix follows the new SparseDtype format
data['A'].dtypes

index
c8b440f1-9fc2-4ca8-9be9-557f35379138_aeaf5266-3f9c-4074-bd34-eba76a61760c    Sparse[float64, 0.0]
4e3ebe0d-fbf1-4f6f-864b-6751b69c9757_aeaf5266-3f9c-4074-bd34-eba76a61760c    Sparse[float64, 0.0]
c2d31fc1-1199-43b7-89b5-166f451af9b5_281fc4f0-c05d-410a-a784-06e3508e78e6    Sparse[float64, 0.0]
c5897e33-3ed2-445a-9f68-7274dbb508df_0d860eb4-1a25-41b4-a821-81f5726d86e5    Sparse[float64, 0.0]
7248ba47-11dd-47e5-b5c6-02b942bc3f16_0d860eb4-1a25-41b4-a821-81f5726d86e5    Sparse[float64, 0.0]
                                                                                     ...         
e705fefd-3afb-4889-a59f-ddfeb6b5779e_d13ae29b-7d6a-55d2-8380-86343a80fed8    Sparse[float64, 0.0]
8905d83f-7d46-4f0f-88bf-0aa7a8e9ea83_ed25f589-c6c8-56a7-a518-b01dd3e8a453    Sparse[float64, 0.0]
e4964245-a8dc-4daa-bcd0-5e73099bab33_101e55f2-2da0-528b-96fd-21687e242c1c    Sparse[float64, 0.0]
1d009030-88b9-4784-be04-ed520ad97f0b_451550a8-d9e6-4396-9f9a-97e752111228    Sparse[float64, 0.0]
fa8f7c8e-277f-

In [16]:
# Because all matrices are quite sparse
data['F'].sparse.density

0.004792547978964625