In [1]:
%matplotlib inline

In [2]:
import brightway2 as bw
import numpy as np
import pyprind
from matplotlib import pyplot as plt
import seaborn as sb

In [3]:
bw.projects.set_current('Dimension reduction')

In [4]:
bw.databases

Databases dictionary with 2 object(s):
	biosphere3
	ecoinvent 3.6 cutoff

In [5]:
eidb = bw.Database("ecoinvent 3.6 cutoff") 
len(eidb)

18121

In [6]:
methods = [obj for obj in bw.methods 
           if obj[0] == 'ReCiPe Midpoint (E) V1.13'
           and obj[1] != 'natural land transformation']
methods

[('ReCiPe Midpoint (E) V1.13', 'freshwater ecotoxicity', 'FETPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'human toxicity', 'HTPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'marine ecotoxicity', 'METPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'terrestrial ecotoxicity', 'TETPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'metal depletion', 'MDP'),
 ('ReCiPe Midpoint (E) V1.13', 'agricultural land occupation', 'ALOP'),
 ('ReCiPe Midpoint (E) V1.13', 'climate change', 'GWP500'),
 ('ReCiPe Midpoint (E) V1.13', 'fossil depletion', 'FDP'),
 ('ReCiPe Midpoint (E) V1.13', 'freshwater eutrophication', 'FEP'),
 ('ReCiPe Midpoint (E) V1.13', 'ionising radiation', 'IRP_HE'),
 ('ReCiPe Midpoint (E) V1.13', 'marine eutrophication', 'MEP'),
 ('ReCiPe Midpoint (E) V1.13', 'ozone depletion', 'ODPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'particulate matter formation', 'PMFP'),
 ('ReCiPe Midpoint (E) V1.13', 'photochemical oxidant formation', 'POFP'),
 ('ReCiPe Midpoint (E) V1.13', 'terrestrial acidification', 'TAP500'),
 ('ReCiPe Mi

In [8]:
random_act = eidb.random()
random_act

'petrol, low-sulfur, import from Europe' (kilogram, CH, None)

In [9]:
lca = bw.LCA({random_act: 1}, methods[0])
lca.lci()
lca.lcia()

In [10]:
lca.biosphere_matrix

<2077x18121 sparse matrix of type '<class 'numpy.float64'>'
	with 407437 stored elements in Compressed Sparse Row format>

In [11]:
n = lca.biosphere_matrix.shape[0] # number of bioflows

In [13]:
results_array = np.zeros((len(eidb), len(methods), n))

In [14]:
method_matrices = []

for m in methods:
    lca.switch_method(m)
    method_matrices.append(lca.characterization_matrix.copy())

In [15]:
for i, ds in enumerate(pyprind.prog_bar(eidb)):
    lca.redo_lci({ds: 1})
    for j, method in enumerate(methods):
        vector = np.ravel((method_matrices[j] * lca.inventory).sum(axis=1))
        # Normalize to fractions of total
        if not vector.sum():
            continue
        vector /= vector.sum()
        results_array[i, j, :] = vector

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:08:36


Might want to save this result to avoid recalculation

In [15]:
np.save("lcia-results", results_array)

In [16]:
results_array.shape

(18121, 17, 2077)

In [17]:
one_percent = results_array > 0.01
mask = one_percent.sum(axis=(0,1)).astype(bool)
mask.sum()

656

In [19]:
compressed = results_array[:, 0, mask]# choose the lca score from the first lcia method, which is ReCiPe 'freshwater ecotoxicity', 'FETPinf'

In [20]:
compressed.shape

(18121, 656)

In [21]:
score_array = compressed
score_array.shape

(18121, 656)

In [23]:
# form a dataframe including activity id, name, isic, cpc and scores for each elementary flow
new_matrix = {}
act_id = []
act_name = []
act_location = []
isic_num = []
cpc_num = []
n = 0
for obj in pyprind.prog_bar(eidb):
    act_id.append(lca.activity_dict[obj.key])
    act_name.append(obj['name'])
    act_location.append(obj['location'])
    new_matrix[lca.activity_dict[obj.key]] = score_array[n]
    n = n+1
    obj_class = obj['classifications']
    obj_class_whole = ()
    if obj['classifications']:
        for i in range(len(obj_class)):
            obj_class_whole = obj_class_whole + obj_class[i]
            if 'ISIC rev.4 ecoinvent' in obj_class[i]:
                isic = obj_class[i][1]
                isic_num.append(isic.split(':')[0]) 
            if 'CPC' in obj_class[i]:
                cpc = obj_class[i][1]
                cpc_num.append(cpc.split(':')[0])
            if i == len(obj_class)-1 and 'ISIC rev.4 ecoinvent' not in obj_class_whole:
                isic_num.append(0)
            if i == len(obj_class)-1 and 'CPC' not in obj_class_whole:
                cpc_num.append(0)
                
    else:
        isic_num.append(0)
        cpc_num.append(0)

len(act_id),len(act_name),len(cpc_num),len(isic_num),len(new_matrix),len(act_location)
    

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:01:35


(18121, 18121, 18121, 18121, 18121, 18121)

In [24]:
import pandas as pd
df = pd.DataFrame(new_matrix)
df_new = df.T
df_new.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,646,647,648,649,650,651,652,653,654,655
5372,1.465624e-10,5.3276e-16,0.0,4.000396e-13,0.0,9.442185e-15,4.332517e-11,2.294195e-10,1.107797e-10,5.377985e-09,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5681,5.643609e-10,1.851118e-15,0.0,2.029782e-12,0.0,2.818631e-15,9.648675e-11,9.126056e-10,5.673006e-10,6.103602e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14731,6.756535e-08,4.253612e-14,0.0,9.655813e-11,0.0,1.630238e-14,4.196932e-10,1.106172e-07,1.133069e-09,4.416901e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6051,2.688926e-11,8.128458000000001e-17,-0.0,1.806148e-13,-0.0,2.104872e-16,4.350754e-12,4.070606e-11,9.45686e-11,1.459729e-09,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
15980,4.879869e-10,2.849344e-15,0.0,4.854394e-12,0.0,8.302221e-15,1.631697e-10,5.904087e-10,8.523312e-10,6.135968e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5495,2.252089e-10,3.452383e-15,0.0,1.477599e-12,0.0,4.415384e-15,6.399725e-11,3.152194e-10,5.91342e-10,4.317955e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1662,3.906341e-10,1.713121e-15,0.0,6.606208e-12,0.0,1.282085e-14,1.788162e-10,5.741968e-10,7.337264e-10,7.131222e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13065,7.348301e-10,8.88042e-16,0.0,1.531545e-12,0.0,9.731165e-15,2.145952e-10,1.099838e-09,9.540607e-08,9.733253e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17079,1.849384e-09,2.901609e-15,0.0,5.596258e-12,0.0,7.753624e-15,4.451321e-11,2.521503e-09,7.237423e-10,6.555753e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
df_new.to_csv('all_act_bioflow_features_0.csv')

In [26]:
df_act_cpc_isic = pd.DataFrame({'activity':act_id,
                                'name':act_name,
                                'location':act_location,
                                'ISIC':isic_num,
                                'CPC':cpc_num})
df_act_cpc_isic.head()

Unnamed: 0,activity,name,location,ISIC,CPC
0,5372,"treatment of waste plaster-cardboard sandwich,...",CH,3830,39990
1,5681,"electricity, high voltage, import from NO",SE,3510,17100
2,14731,ethanol production from sweet sorghum,RoW,19a,35491
3,6051,"electricity production, photovoltaic, 570kWp o...",CA-AB,3510b,17100
4,15980,glass etching factory construction,DK,4100,53269


In [27]:
df_act_cpc_isic = df_act_cpc_isic.set_index('activity')
df_act_cpc_isic.head()

Unnamed: 0_level_0,name,location,ISIC,CPC
activity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5372,"treatment of waste plaster-cardboard sandwich,...",CH,3830,39990
5681,"electricity, high voltage, import from NO",SE,3510,17100
14731,ethanol production from sweet sorghum,RoW,19a,35491
6051,"electricity production, photovoltaic, 570kWp o...",CA-AB,3510b,17100
15980,glass etching factory construction,DK,4100,53269


In [28]:
df_act_features = df_act_cpc_isic.merge(df_new,right_index = True, left_index = True)
df_act_features.head()

Unnamed: 0_level_0,name,location,ISIC,CPC,0,1,2,3,4,5,...,646,647,648,649,650,651,652,653,654,655
activity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5372,"treatment of waste plaster-cardboard sandwich,...",CH,3830,39990,1.465624e-10,5.3276e-16,0.0,4.000396e-13,0.0,9.442185e-15,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5681,"electricity, high voltage, import from NO",SE,3510,17100,5.643609e-10,1.851118e-15,0.0,2.029782e-12,0.0,2.818631e-15,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14731,ethanol production from sweet sorghum,RoW,19a,35491,6.756535e-08,4.253612e-14,0.0,9.655813e-11,0.0,1.630238e-14,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6051,"electricity production, photovoltaic, 570kWp o...",CA-AB,3510b,17100,2.688926e-11,8.128458000000001e-17,-0.0,1.806148e-13,-0.0,2.104872e-16,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
15980,glass etching factory construction,DK,4100,53269,4.879869e-10,2.849344e-15,0.0,4.854394e-12,0.0,8.302221e-15,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
df_act_features.to_csv('all_act_name_location_cpc_isic_method_0.csv')