In [1]:
import pickle
import pandas as pd
import os           
import numpy as np
import re
import itertools
import plotly.express as px
#import brightway2 as bw 
import random 
import string

In [2]:
import sys
sys.path.append('../../Module')  #two levels up & then down to Module folder
from common_mod import *
from mod_resIC import *

In [3]:
LCIAname = ["CML", "SP", "openLCA", "BW2"]  #source LCIA name 
lcia_name = "CML_Abiotic depletion (element)"            # name will be used when saving data/plots
SP_name = "Abiotic depletion"
cml_col_name = "ADPelement_kgSb"
cml_col_keep = [2,14,15,16,17] # these are "unit",'cas_number','flow'... to keep for each IC, will append cml_col_name later
olca_filepathname = "../../Data_rawCFs/olca_CML_ADP_Sb_saved.dat"
bw2_filepathname =  "../../Data_rawCFs/BW2_CML2001_depletion of abiotic resources.dat"
#used in final calculation to create dict names
pairwise_name = ["SPvsCML", "SPvsBW2", "SPvsopenLCA", "openLCAvsBW2", "openLCAvsCML", "BW2vsCML"]

<h3> read in raw cleaned-up CML and select ADP (element) cols </h3>

In [4]:
cml = pd.read_excel("CML_cleaned_11IC.xlsx" )  #header=[1,2,3]
cml.columns

Index(['Unnamed: 0', 'Substance', 'unit', 'ADPelement_kgSb', 'ADPfossil_MJ',
       'GWP_kgCO2', 'ODP_kgCFC-11', 'HTP_kg1,4DCB', 'FAETP_kg1,4DCB',
       'MAETP_kg1,4DCB', 'TETP_kg1,4DCB', 'POFP_kgEthylene', 'AP_kgSO2',
       'EP_kgPO4', 'cas_number', 'flow', 'subcategory', 'category'],
      dtype='object')

In [5]:
cml_col_keep.append(cml.columns.get_loc(cml_col_name) )
cml_col_keep

[2, 14, 15, 16, 17, 3]

In [6]:
cml_raw = cml.iloc[: , cml_col_keep]
print(len(cml_raw))
cml_raw = cml_raw.dropna(subset=[cml_col_name])  #drop all na value for the IC
print(len(cml_raw))
cml_raw = cml_raw.rename(columns={cml_col_name:"value"  })
cml_raw = cml_raw.rename(str.lower, axis='columns')
#cml_raw.head()

1961
49


In [7]:
cml_raw['flow'] = cml_raw['flow'].str.lower()
cml_raw.head()

Unnamed: 0,unit,cas_number,flow,subcategory,category,value
1,kg,007429-90-5,aluminium,res_landscape_resources,resourcesIC,1.094332e-09
2,kg,007440-36-0,antimony,res_landscape_resources,resourcesIC,1.0
4,kg,007440-38-2,arsenic (as),res_landscape_resources,resourcesIC,0.002974487
5,kg,007440-39-3,barium,res_landscape_resources,resourcesIC,6.042037e-06
6,kg,007440-41-7,beryllium,res_landscape_resources,resourcesIC,1.259024e-05


<h3> read in raw SP </h3>

In [8]:
SP_cml = pd.read_excel("../../Data_rawCFs/SP_CML-IA.xlsx", sheet_name=SP_name, 
                                  header=[0], converters={'CAS':str})

print(len(SP_cml))
SP_cml = SP_cml.rename(columns={"Compartment": "category", "Subcompartment": "subcategory", 
                               "Substance": "flow", "Factor":"value", "CAS": "cas_number" })
SP_cml = SP_cml.rename(str.lower, axis='columns')

108


In [9]:
SP_cml['flow'] = SP_cml['flow'].str.lower()
SP_cml.head()

Unnamed: 0,category,subcategory,flow,cas_number,value,unit
0,Raw,(unspecified),aluminium,007429-90-5,1.09e-09,kg Sb eq / kg
1,Raw,(unspecified),antimony,007440-36-0,1.0,kg Sb eq / kg
2,Raw,(unspecified),arsenic,007440-38-2,0.00297,kg Sb eq / kg
3,Raw,(unspecified),barium,007440-39-3,6.04e-06,kg Sb eq / kg
4,Raw,(unspecified),beryllium,007440-41-7,1.26e-05,kg Sb eq / kg


<h3> read in raw openLCA dat </h3>

In [10]:
olca_cml = unpickle_data(olca_filepathname)
olca_cml = olca_cml.rename(str.lower, axis='columns')
olca_cml['flow'] = olca_cml['flow'].str.lower()
olca_cml.head()

Unnamed: 0,impact_method,impact_category_uuid,impact_category,reference_unit,flow_uuid,flow,cas_number,subcategory,category,unit,value
0,CML-IA baseline,d098dc03-4caa-3c76-84df-bdd28659d40f,Abiotic depletion,kg Sb eq,a55ba777-c1ec-338a-9482-fc6e006a56de,"platinum, pt 2.5e-4%, pd 7.3e-4%, rh 2.0e-5%, ...",007440-06-4,in ground,Resource,kg,2.22
1,CML-IA baseline,d098dc03-4caa-3c76-84df-bdd28659d40f,Abiotic depletion,kg Sb eq,e16fd15c-0ebc-55ba-8d3b-9704f13663cb,"arsenic, in ground",007440-38-2,in ground,Resource,kg,0.00297
2,CML-IA baseline,d098dc03-4caa-3c76-84df-bdd28659d40f,Abiotic depletion,kg Sb eq,2d65a3f7-2a10-4a10-ac9e-a0cc7cd57979,"gold, au 2.1e-4%, ag 2.1e-4%, in ore",007440-57-5,in ground,Resource,kg,52.0
3,CML-IA baseline,d098dc03-4caa-3c76-84df-bdd28659d40f,Abiotic depletion,kg Sb eq,4b8ac2cb-3fa6-4047-a9ab-183d9e63ccac,"pd, pd 2.0e-4%, pt 4.8e-4%, rh 2.4e-5%, ni 3.7...",,in ground,Resource,kg,0.571
4,CML-IA baseline,d098dc03-4caa-3c76-84df-bdd28659d40f,Abiotic depletion,kg Sb eq,d13b2665-505d-49e2-8edd-dc966b0342af,"platinum, in ground",,in ground,Resource,kg,2.22


<h3> read in BW2 </h3>

In [11]:
bw_cml = unpickle_data(bw2_filepathname)
print(len(bw_cml))
bw_cml['flow'] = bw_cml['flow'].str.lower()
bw_cml.head()

158


Unnamed: 0,cas_number,flow,flow_uuid,type,category,subcategory,unit,value
0,007440-36-0,"antimony, in ground",47262180-8308-5d4c-9332-c77617e032ef,natural resource,natural resource,in ground,kilogram,0.77908
1,7440-39-3,"barium, in ground",240177d8-6f3b-43f5-8d1e-0c18114dfa02,natural resource,natural resource,in ground,kilogram,4.9088e-05
2,007726-95-6,"bromine, in water",45d6f26b-596b-5182-8c08-d6d975ff4efe,natural resource,natural resource,in water,kilogram,0.0066732
3,7440-43-9,"cadmium, in ground",bf377e4f-3a95-4ce2-a9ba-66ee31f00f60,natural resource,natural resource,in ground,kilogram,0.33008
4,007440-45-1,"cerium, in ground",4057f8b4-f20a-59c9-9bb7-fdeaf5ad106d,natural resource,natural resource,in ground,kilogram,5.32e-09


### 1. after preparing the data, see different emission compartments by diff. source, need to convert to common_catg

In [12]:
all_raw_LCIAdf = [cml_raw, SP_cml, olca_cml, bw_cml]

In [13]:
uniq_catg_all_raw_LCIAdf = []
for lcia in all_raw_LCIAdf: 
    uniq_catg_all_raw_LCIAdf.append( uniq_catg(lcia))

In [14]:
#uniq_catg_all_raw_LCIAdf
print([uniq_catg_all_raw_LCIAdf[i][0] for i in range(len(uniq_catg_all_raw_LCIAdf))])

[1, 1, 3, 3]


<h3> 2. unlike emission-type IC, for resources, no "common_category" will be added </h3>

<h4> <font color = 'red'> for the same one flow, it has same CF value regardless of its compartment (except for fresh water/seawater) </font> </h4>

using res_check_EF_value (df, df_EF_name_col, df_EF_value_col, EF_list):  #df_EF_col_name is "flow"   

### 3. final calculation, pair-wise comparison, corr. matrix

<h4> <font color = 'red'> for raw CML, it uses diff. naming rules, unlike all other sources, no same flow name found for CML vs. other LCIA sources </font> </h4>


In [15]:
# create 6 empty dict: dict_SPvsRIVM; dict_SPvsBW2; dict_SPvsopenLCA; dict_openLCAvsBW2; dict_openLCAvsRIVM; dict_BW2vsRIVM
# each dict with four keys and null values (to be assigned later)
for name in pairwise_name:
    #print(name)
    keys = [name+"_commonEF_sumtable", name+"_commonEF_list", name + "_result_diffEFs", name + "_result_corr" ]
    globals()['dict_%s' % name] = dict.fromkeys(keys, None)

In [16]:
k_list = list(dict_SPvsCML)
dict_SPvsCML[k_list[0]], dict_SPvsCML[k_list[1]] = res_compare_included_EF(SP_cml, "flow", cml_raw,  "flow", to_print = "no" )
if len(dict_SPvsCML[k_list[1]] ) == 0: 
    dict_SPvsCML[k_list[2]] = pd.DataFrame ({'SP_value': np.NaN , 'CML_value': np.NaN }, index=[0])
else:
    dict_SPvsCML[k_list[2]] = pd.DataFrame (
        {'SP_value': final_EF_combined(SP_cml, "flow", "value", cml_raw, "flow", "value", dict_SPvsCML[k_list[1]])[0] , 
         'CML_value': final_EF_combined(SP_cml, "flow", "value", cml_raw, "flow", "value", dict_SPvsCML[k_list[1]])[1] }, 
        index = dict_SPvsCML[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 0


In [17]:
k_list = list(dict_SPvsBW2)
dict_SPvsBW2[k_list[0]],dict_SPvsBW2[k_list[1]] = res_compare_included_EF(SP_cml, "flow", bw_cml,  "flow", to_print = "no" )
if len(dict_SPvsBW2[k_list[1]] ) == 0: 
    dict_SPvsBW2[k_list[2]] = pd.DataFrame ({'SP_value': np.NaN , 'BW2_value': np.NaN }, index=[0])
else:
    dict_SPvsBW2[k_list[2]] = pd.DataFrame (
        {'SP_value': final_EF_combined(SP_cml, "flow", "value", bw_cml, "flow", "value", dict_SPvsBW2[k_list[1]])[0] , 
         'BW2_value': final_EF_combined(SP_cml, "flow", "value", bw_cml, "flow", "value", dict_SPvsBW2[k_list[1]])[1] }, 
        index = dict_SPvsBW2[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 0


In [18]:
k_list = list(dict_SPvsopenLCA)
dict_SPvsopenLCA[k_list[0]],dict_SPvsopenLCA[k_list[1]] = res_compare_included_EF(SP_cml, "flow", olca_cml,  "flow", to_print = "no" )
if len(dict_SPvsopenLCA[k_list[1]] ) == 0: 
    dict_SPvsopenLCA[k_list[2]] = pd.DataFrame ({'SP_value': np.NaN , 'olca_value': np.NaN }, index=[0])
else:
    dict_SPvsopenLCA[k_list[2]] = pd.DataFrame (
        {'SP_value': final_EF_combined(SP_cml, "flow", "value", olca_cml, "flow", "value", dict_SPvsopenLCA[k_list[1]])[0] , 
         'olca_value': final_EF_combined(SP_cml, "flow", "value", olca_cml, "flow", "value", dict_SPvsopenLCA[k_list[1]])[1] },
        index = dict_SPvsopenLCA[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 108


In [19]:
k_list = list(dict_openLCAvsBW2)
dict_openLCAvsBW2[k_list[0]],dict_openLCAvsBW2[k_list[1]] = res_compare_included_EF(olca_cml, "flow_uuid", bw_cml, "flow_uuid", to_print = "no" )

if len(dict_openLCAvsBW2[k_list[1]] ) == 0: 
    dict_openLCAvsBW2[k_list[2]] = pd.DataFrame ({'olca_value': np.NaN , 'BW2_value': np.NaN }, index=[0])
else:
    dict_openLCAvsBW2[k_list[2]] = pd.DataFrame (
        {'olca_value': final_EF_combined(olca_cml, "flow_uuid", "value", bw_cml, "flow_uuid", "value", 
                                       dict_openLCAvsBW2[k_list[1]])[0] , 
         'BW2_value': final_EF_combined(olca_cml, "flow_uuid", "value", bw_cml, "flow_uuid", "value",
                                         dict_openLCAvsBW2[k_list[1]])[1] },
        index =  dict_openLCAvsBW2[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 94


In [20]:
k_list = list(dict_openLCAvsCML)
dict_openLCAvsCML[k_list[0]],dict_openLCAvsCML[k_list[1]] = res_compare_included_EF(olca_cml, "flow", cml_raw, "flow", to_print = "no" )

if len(dict_openLCAvsCML[k_list[1]] ) == 0: 
    dict_openLCAvsCML[k_list[2]] = pd.DataFrame ({'olca_value': np.NaN , 'CML_value': np.NaN }, index=[0])
else:
    dict_openLCAvsCML[k_list[2]] = pd.DataFrame (
        {'olca_value': final_EF_combined(olca_cml, "flow", "value", cml_raw, "flow", "value", 
                                       dict_openLCAvsCML[k_list[1]])[0] , 
         'CML_value': final_EF_combined(olca_cml, "flow", "value", cml_raw, "flow", "value",
                                         dict_openLCAvsCML[k_list[1]])[1] },
        index = dict_openLCAvsCML[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 0


In [21]:
k_list = list(dict_BW2vsCML)
dict_BW2vsCML[k_list[0]],dict_BW2vsCML[k_list[1]] = res_compare_included_EF(bw_cml, "flow", cml_raw, "flow", to_print = "no" )
if len(dict_BW2vsCML[k_list[1]] ) == 0: 
    dict_BW2vsCML[k_list[2]] = pd.DataFrame ({'BW2_value': np.NaN , 'CML_value': np.NaN }, index=[0])
else:
    dict_BW2vsRIVM[k_list[2]] = pd.DataFrame (
        {'BW2_value': final_EF_combined(bw_cml, "flow", "value", cml_raw, "flow", "value", 
                                       dict_BW2vsCML[k_list[1]])[0] , 
         'CML_value': final_EF_combined(bw_cml, "flow", "value", cml_raw, "flow", "value",
                                         dict_BW2vsCML[k_list[1]])[1] },
        index = dict_BW2vsCML[k_list[1]]
    )

Detailed differences won't be printed out, to print, add input argument: to_print = 'yes'.  
N of common EFs to be used is: 0


<h4>  Assign values to final key for each dict: result_corr_coeff </h4>

Note: for the 3rd key of each dict, _result_diffEFs: unlike emission types of IC, it's not the EFs with diff. CF values. It's all combined common EFs used for pair-wise comp.

In [22]:
for name in pairwise_name:
    new_df = globals()['dict_%s' % name][name + "_result_diffEFs"]
    try:
        new_df_corr = new_df.corr()
    except: 
        error
    globals()['dict_%s' % name][name + "_result_corr"] = new_df_corr

In [23]:
# saving each dict_ to a single worksheet, different rows , # saving to sub-folder "results/Resources_IC/"

if not os.path.exists("results/Resources_IC"):
    os.makedirs("results/Resources_IC")

filename = lcia_name + "_pairwise_comp_result.xlsx"
writer = pd.ExcelWriter("results/Resources_IC/" + filename, engine='xlsxwriter')

for name in pairwise_name:
    sum_table = globals()['dict_%s' % name][name + "_commonEF_sumtable"]
    new_df = globals()['dict_%s' % name][name + "_result_diffEFs"]
    new_df_corr = globals()['dict_%s' % name][name + "_result_corr"]

    sum_table.to_excel(writer,sheet_name=name,startrow=0 , startcol=0)   # write first  (3rows)
    new_df_corr.to_excel(writer,sheet_name=name,startrow=5 , startcol=0) # write next   (3rows)
    new_df.to_excel(writer,sheet_name=name,startrow=10 , startcol=0)     # finally, write the resulting EFs 
     
writer.save()

In [24]:
for name in pairwise_name:
    name1, name2 = name.split("vs",1)[0], name.split("vs",1)[1]
    diff_EF = globals()['dict_%s' % name][name + "_result_diffEFs"]
    print(diff_EF)

   SP_value  CML_value
0       NaN        NaN
   SP_value  BW2_value
0       NaN        NaN
                                                        SP_value  olca_value
molybdenum, 0.11% in sulfide, mo 0.41% and cu 0...  1.780000e-02     0.01780
barium                                              6.040000e-06     0.00001
copper                                              1.370000e-03     0.00137
manganese                                           2.540000e-06     0.00000
tantalum                                            4.060000e-05     0.00004
...                                                          ...         ...
aluminium                                           1.090000e-09     0.00000
copper, 0.52% in sulfide, cu 0.27% and mo 8.2e-...  1.370000e-03     0.00137
vanadium                                            7.700000e-07     0.00000
yttrium                                             5.690000e-07     0.00000
uranium, 2291 gj per kg                             1.400000e