In [2]:
import pandas as pd

# Mappings

In [5]:
substances_table = pd.read_csv('data/MetalliCan/substances_table.csv')

In [6]:
substances_table

Unnamed: 0,substance_id,substance_name
0,SUB07971a0766,"1,2,4-Trimethylbenzene"
1,SUBbdfdffeb95,1-Nitropyrene
2,SUB0f01e7c1d0,2-Butoxyethanol
3,SUB324959500a,"7,12-Dimethylbenz[a]anthracene"
4,SUBfee6b26acd,"7H-Dibenzo[c,g]carbazole"
...,...,...
169,SUB787e5ac1a2,Vanadium (and its compounds)
170,SUBe849a2ff09,Xylene (all isomers)
171,SUBae95939c3f,Zinc (and its compounds)
172,SUB090a1237c1,i-Butyl alcohol


In [7]:
# Create mapping NPRI-EI through IW+ mapping
# Import existing concordances 
mapping_npri_iw = pd.read_excel(r'data/Mappings/openIO_IW_EI_concordance.xlsx', sheet_name='NPRI_to_IW21')
mapping_ei_iw = pd.read_excel(r'data/Mappings/openIO_IW_EI_concordance.xlsx', sheet_name='EI_to_IW+')

In [8]:
# Standardize column names for merging
mapping_npri_iw.rename(columns={'IMPACT World+ flows': 'iw_name'}, inplace=True)
mapping_ei_iw.rename(columns={'iw name': 'iw_name'}, inplace=True)

In [9]:
# Add the 'EI' column to df_npri_iw based on matching iw_name with df_ei_iw
mapping_npri_iw['EI'] = mapping_npri_iw['iw_name'].map(
    lambda iw: mapping_ei_iw[mapping_ei_iw['iw_name'] == iw]['ecoinvent name'].iloc[0]
    if iw in mapping_ei_iw['iw_name'].values else 'No match found'
)


In [10]:
mapping_npri_iw

Unnamed: 0,OpenIO flows,iw_name,EI
0,Carbon dioxide,"Carbon dioxide, fossil","Carbon dioxide, fossil"
1,Methane,"Methane, fossil",Methane
2,Dinitrogen monoxide,Dinitrogen monoxide,Dinitrogen monoxide
3,CF4,"Methane, tetrafluoro-, CFC-14","Methane, tetrafluoro-, R-14"
4,C2F6,"Ethane, hexafluoro-, HFC-116","Ethane, hexafluoro-, HFC-116"
...,...,...,...
348,Propylene oxide,Propylene oxide,Propylene oxide
349,Thiourea,Thiourea,No match found
350,2-Ethoxyethyl acetate,2-Ethoxyethyl acetate,No match found
351,Azo disperse dyes,,No match found


In [11]:
mapping_npri_iw.rename(columns={'OpenIO flows': 'substance_name'}, inplace=True)

In [12]:
substances_table

Unnamed: 0,substance_id,substance_name
0,SUB07971a0766,"1,2,4-Trimethylbenzene"
1,SUBbdfdffeb95,1-Nitropyrene
2,SUB0f01e7c1d0,2-Butoxyethanol
3,SUB324959500a,"7,12-Dimethylbenz[a]anthracene"
4,SUBfee6b26acd,"7H-Dibenzo[c,g]carbazole"
...,...,...
169,SUB787e5ac1a2,Vanadium (and its compounds)
170,SUBe849a2ff09,Xylene (all isomers)
171,SUBae95939c3f,Zinc (and its compounds)
172,SUB090a1237c1,i-Butyl alcohol


In [13]:
substances_table_df = substances_table.merge(mapping_npri_iw[['substance_name', 'EI']],
              on='substance_name',
              how='left')

In [14]:
substances_table_df

Unnamed: 0,substance_id,substance_name,EI
0,SUB07971a0766,"1,2,4-Trimethylbenzene",No match found
1,SUBbdfdffeb95,1-Nitropyrene,No match found
2,SUB0f01e7c1d0,2-Butoxyethanol,No match found
3,SUB324959500a,"7,12-Dimethylbenz[a]anthracene",No match found
4,SUBfee6b26acd,"7H-Dibenzo[c,g]carbazole",No match found
...,...,...,...
169,SUB787e5ac1a2,Vanadium (and its compounds),Vanadium
170,SUBe849a2ff09,Xylene (all isomers),Xylene
171,SUBae95939c3f,Zinc (and its compounds),Zinc
172,SUB090a1237c1,i-Butyl alcohol,2-Methyl-1-propanol


# Data coverage statistics

In [17]:
data_coverage = pd.read_csv('data/MetalliCan/data_coverage_per_table.csv')

In [20]:
data_coverage

Unnamed: 0,main_id,facility_type,facility_name,commodities,Archetype table,By-product ratios table,Climate category table,Conflict table,Energy table,Environmental intensity table,...,Land occupation table,Peatland table,Population table,Prioritize conservation areas table,Production table,Protected & Indigenous land table,Reserves table,Tailings table,Water risk table,Weather table
0,QC-MAIN-5801b453,advanced project,Abcourt-Barvue,"Zinc, silver",0,0,9,0,0,0,...,2,1,4,1,0,30,0,0,32,128
1,QC-MAIN-d29e0839,mining,Sleeping Giant,"Gold, silver",1,0,9,0,0,0,...,6,1,4,1,0,44,2,0,32,128
2,QC-MAIN-f1ff4920,advanced project,Akasaba West,"Gold, copper",0,0,9,0,0,0,...,10,1,4,1,0,46,0,0,32,128
3,NU-MAIN-4ea8ac9d,mining,Amaruq,Gold,1,0,9,0,8,2,...,0,1,4,1,5,1,4,0,32,128
4,QC-MAIN-e7e6a960,mining,Canadian Malartic,"Gold, silver",1,0,9,0,8,2,...,6,1,4,1,1,41,5,2,32,128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,ON-MAIN-c5fefb01,mining,Mishi,Gold,1,0,9,0,8,6,...,4,1,4,1,5,11,5,0,32,128
266,BC-MAIN-feb13c30,advanced project,Record Ridge,Magnesium,0,0,9,0,0,0,...,0,1,4,1,0,24,0,0,32,128
267,YT-MAIN-5e0d8b48,advanced project,Casino,"Copper, gold, molybdenum, silver",0,0,9,0,0,0,...,0,1,4,1,0,2,0,0,32,128
268,ON-MAIN-12c68d49,advanced project,Eagle's Nest,"Nickel, copper, gold, platinum, palladium",0,0,9,0,0,0,...,0,1,4,1,0,1,0,0,32,128


In [24]:
col_of_interest = ['Energy table', 'Environmental intensity table', 'Production table', 'Environmental flows table', 'Tailings table']
stat_data_coverage = data_coverage[col_of_interest].describe()

In [25]:
stat_data_coverage

Unnamed: 0,Energy table,Environmental intensity table,Production table,Environmental flows table,Tailings table
count,270.0,270.0,270.0,270.0,270.0
mean,0.655556,0.218519,1.177778,52.762963,0.318519
std,2.035892,0.990864,2.724982,102.823067,1.268151
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.0,48.75,0.0
max,9.0,8.0,21.0,758.0,15.0


In [None]:
data_coverage