In [1]:
import pandas as pd
import numpy as np
import sys
import gzip
import pickle

In [2]:
sys.path.append('/home/bill/software/ecospold2matrix/ecospold2matrix/')
sys.path.append('/home/bill/software/Python/Modules/')

In [3]:
import ecospold2matrix as e2m

In [4]:
ecospold_dir = '/mnt/collection/data/ecoinvent/3.6/raw_data/3.6_undefined_datasets_public_unallocated'
project_name = 'ecoinvent_3-6_sut'
out_dir = '/mnt/collection/data/ecoinvent/3.6/'
characterisation_file = '/mnt/collection/data/ecoinvent/3.6/raw_data/ecoinvent_3.6_LCIA_implementation/LCIA_implementation_3.6.xlsx'

In [5]:
parser = e2m.Ecospold2Matrix(sys_dir=ecospold_dir, 
                             project_name=project_name, 
                             out_dir=out_dir, 
                             characterisation_file=characterisation_file, 
                             float32=False,
                             nan2null=True,
                             PRO_properties=('dry mass', 'wet mass', 'iron content', 'carbon content'))

2020-05-21 14:38:25,370 - ecoinvent_3-6_sut - INFO - Ecospold2Matrix Processing
2020-05-21 14:38:25,375 - ecoinvent_3-6_sut - INFO - Project name: ecoinvent_3-6_sut
2020-05-21 14:38:25,376 - ecoinvent_3-6_sut - INFO - Unit process and Master data directory: /mnt/collection/data/ecoinvent/3.6/raw_data/3.6_undefined_datasets_public_unallocated
2020-05-21 14:38:25,376 - ecoinvent_3-6_sut - INFO - Data saved in: /mnt/collection/data/ecoinvent/3.6
2020-05-21 14:38:25,376 - ecoinvent_3-6_sut - INFO - Replace Not-a-Number instances with 0.0 in all matrices
2020-05-21 14:38:25,377 - ecoinvent_3-6_sut - INFO - Pickle intermediate results to files
2020-05-21 14:38:25,377 - ecoinvent_3-6_sut - INFO - Order processes based on: ISIC, activityName
2020-05-21 14:38:25,378 - ecoinvent_3-6_sut - INFO - Order elementary exchanges based on: comp, name, subcomp


In [6]:
parser.get_all_properties()

['amount 1 of flow in kg',
 'EcoSpold01Allocation_other_78',
 'mass concentration, titanium',
 'EcoSpold01Allocation_undefined_28',
 'molding efficiency',
 'ash content',
 'EcoSpold01Allocation_undefined_153',
 'silver content',
 'concentration, potassium',
 'UVEK_absolute_amount_1',
 'carbon content, non-fossil',
 'thallium content',
 'EcoSpold01Allocation_undefined_72',
 'concentration of amount 5',
 'EcoSpold01Allocation_other_101',
 'EcoSpold01Allocation_undefined_169',
 'EcoSpold01Allocation_other_51',
 'EcoSpold01Allocation_undefined_32',
 'EcoSpold01Allocation_undefined_105',
 'mass concentration, dissolved nitrite NO2 as N',
 'EcoSpold01Allocation_physical_1',
 'alloy additives',
 'EcoSpold01Allocation_undefined_21',
 'EcoSpold01Allocation_other_16',
 'nitrogen oxides emissions tier T2 for hp < 175',
 'mass concentration, nickel',
 'EcoSpold01Allocation_other_124',
 'radium content',
 'EcoSpold01Allocation_other_86',
 'carbon monoxide emissions tier T0 for hp < 100',
 'nitrogen

In [7]:
# Generate a SUT of the unallocated ecoinvent processes. 
# With `make_untraceable` (optional), aggregate the rows of the use table, 
#such that we represent the total use of a product (e.g., aluminium) regardless of the technology or country of production.

parser.ecospold_to_sut(fileformats=['Pandas'], make_untraceable=True)

2020-05-21 14:38:25,835 - ecoinvent_3-6_sut - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of f462df6b314c131986d790d4503f806e46126845
2020-05-21 14:38:56,939 - ecoinvent_3-6_sut - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of 1026b1b395d62da475f6eea059907932ee542bc4
2020-05-21 14:38:56,987 - ecoinvent_3-6_sut - INFO - Processing 16640 files in /mnt/collection/data/ecoinvent/3.6/raw_data/3.6_undefined_datasets_public_unallocated/datasets
2020-05-21 14:40:04,334 - ecoinvent_3-6_sut - INFO - Flows saved in /mnt/collection/data/ecoinvent/3.6/raw_data/3.6_undefined_datasets_public_unallocated/flows.pickle with SHA-1 of 7f90224eebd26e487ce9fde448017d9521b37e99
2020-05-21 14:40:04,429 - ecoinvent_3-6_sut - INFO - Processing 16640 files - this may take a while ...
2020-05-21 14:40:58,996 - ecoinvent_3-6_sut - INFO - Elementary flows extracted from ElementaryExchanges.xml with SHA-1 of 3abfcfbaacb3cc91011ea4721c4bbeaa154f45aa
2020-05-21 14:40:59,119 - 

In [8]:
# Variables available directly from parser for inspection
parser.products.loc[:, 'iron content [dimensionless]'].dropna()

764ceaef-674e-43d0-8003-11e0134031ec    0.0608500189744765
acb2a59b-ce48-43b9-8d27-31fbd5030e70                0.3635
c388d470-e83c-4f6f-93ec-3861dd53db5b                  0.19
e9d62932-4f38-41b4-95ea-4bf77569fcc8                  0.63
e72f7aa8-2369-4498-992e-9db51852afc6                     0
63bf19bc-8cbc-4d14-906e-abfa110dbb5f              0.014628
e6ba6c24-83b4-4261-aab6-abfa1114116c              0.015374
e63aa910-8938-49f1-ac0e-abfa111b3039              0.015374
7b132b21-631d-49d8-8720-abfa11264288              0.015374
d218fa09-3958-455e-8e4b-abfa1132af93              0.013963
633abfc6-0553-458f-b3cb-abfa114505ae               0.12219
7c3dbbb9-cc2d-43e8-acf9-abfa115d2ae5              0.014771
833ce149-4770-47bc-bff0-abfa11620608              0.030113
74e6ad5d-3831-4496-a046-abfa116962fb                 0.379
16a8785b-286a-4084-811e-abfa119e7ceb             0.0011732
d7a15d6e-8959-4841-8404-b2094db330a7                0.0044
2840620c-e40b-430e-b7b6-b46e9faf410b                 0.1

In [9]:
# Free the RAM
del(parser)

In [10]:
with gzip.open('ecoinvent_3-6_sutPandas_SUT.gz.pickle', 'rb') as f:
    sut = pickle.load(f)

In [11]:
sut.keys()

dict_keys(['products', 'activities', 'STR', 'U', 'V', 'V_prodVol', 'G_act', 'processingdata'])

In [12]:
sut['U']

Unnamed: 0,f7e93a25-56e4-4268-a603-3bfd57c79eff,bd6cb90b-84dc-47ef-bda6-275ca88d94cd,2fd2b47f-d760-4559-aa11-37b71991deb3,7156adef-a44d-463a-933f-ae67df1cedd8,19a6cf49-7787-4a6f-b04e-4604fcd65bec,df3186af-bf58-4a1e-b27f-854ede231863,4a91ab17-d38d-407d-a6e6-3a5be8f61c01,9d6c55a5-b300-4fe9-b6f0-24172dde221a,ad25bc39-5418-472b-b30b-a118813da3e5,d2f46d76-75b3-4209-9ee6-7576f26ee0ac,...,27018094-4ec2-4b31-bec6-2a01e9984c89,7e539797-3f3e-4c03-a4b8-e072951fea63,7d039e42-6b4e-4a58-9bfc-e98376841746,81bdc6fb-bb1d-4929-9631-45c0a493a9f5,306fe090-1b65-43aa-b5dd-984d476c11a5,8490ebd8-5ff3-4349-a7cb-e9d409b47276,ce25df0d-8e40-41f0-8617-02fadd6ba2e4,0331b4f1-64ee-4b7d-af6b-fd7f4f621822,6667a850-9ae6-4be5-8aaa-b51ecd9af096,c99f8079-f484-439b-9128-86ea1ca46ecf
42761d87-05d9-4877-b21e-001ecf0c747d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
c4f00122-eccc-4c3c-8068-0022aa7bf13b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
c7bd9b85-63d3-481e-b51b-00435cbd54e8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86247d21-f010-4ce3-a76d-0050837ddfc0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
915500af-3702-4039-8ea7-005a5f84cd2d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2677ef49-b019-4da3-bc8e-ffa396bad75f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b397787f-c69d-45a6-876c-ffc8a6be4fc6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1e27a902-25f3-449e-bf0f-ffe5192c0c81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
a7f92ffa-a459-4c1c-ae8b-ffefe3804099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
sut['U'].shape

(3205, 35708)

In [14]:
sut['U'].sparse.density

0.0007859205373031768

In [15]:
sut['U'].dtypes

f7e93a25-56e4-4268-a603-3bfd57c79eff    Sparse[float64, 0.0]
bd6cb90b-84dc-47ef-bda6-275ca88d94cd    Sparse[float64, 0.0]
2fd2b47f-d760-4559-aa11-37b71991deb3    Sparse[float64, 0.0]
7156adef-a44d-463a-933f-ae67df1cedd8    Sparse[float64, 0.0]
19a6cf49-7787-4a6f-b04e-4604fcd65bec    Sparse[float64, 0.0]
                                                ...         
8490ebd8-5ff3-4349-a7cb-e9d409b47276    Sparse[float64, 0.0]
ce25df0d-8e40-41f0-8617-02fadd6ba2e4    Sparse[float64, 0.0]
0331b4f1-64ee-4b7d-af6b-fd7f4f621822    Sparse[float64, 0.0]
6667a850-9ae6-4be5-8aaa-b51ecd9af096    Sparse[float64, 0.0]
c99f8079-f484-439b-9128-86ea1ca46ecf    Sparse[float64, 0.0]
Length: 35708, dtype: object

In [17]:
sut['U'].memory_usage().sum()

1104968