In [1]:
import pandas as pd

# Import cleaned MetalliCan data

In [2]:
from core.data_manipulations import get_info_for_ids, merge_main_and_group

In [3]:
# Raw MetalliCan tables
main_table = pd.read_csv(r'C:\Users\mp_ma\OneDrive - polymtl\POST_DOC\CODE\metallican_db\database\CSV\main_table.csv')
substances_table = pd.read_csv(r'C:\Users\mp_ma\OneDrive - polymtl\POST_DOC\CODE\metallican_db\database\CSV\substances_table.csv')

In [4]:
# Pre-processed production table
production_table = pd.read_excel(r'data/MetalliCan/sites_for_lci.xlsx', sheet_name='prod_data')

In [5]:
def build_activity_name(row):
    # ---- 1️⃣ Déterminer le type d’opération ----
    mpt = str(row.get("mining_processing_type", "")).lower()
    parts = []

    if "open-pit" in mpt and "underground" in mpt:
        parts.append("OP and UG mining")
    elif "open-pit" in mpt:
        parts.append("OP mining")
    elif "underground" in mpt:
        parts.append("UG mining")

    if "concentrator" in mpt:
        parts.append("and beneficiation")

    op = " ".join(parts)

    # ---- 2️⃣ Déterminer les commodities ----
    commodities = []
    for col in production_table.columns:
        if col.endswith("_t") and col != "ore_processed_t":
            if pd.notna(row[col]) and row[col] != 0:
                commodities.append(col.replace("_t", ""))
    commodities_str = " and ".join(commodities)

    # ---- 3️⃣ Choisir le nom de site ----
    facility = row.get("facility_name") if pd.notna(row.get("facility_name")) else row.get("facility_group_name", "")

    # ---- 4️⃣ Construire la chaîne finale ----
    if commodities_str and op and facility:
        return f"{commodities_str}, {op}, {facility}"
    elif commodities_str and op:
        return f"{commodities_str}, {op}"
    elif commodities_str and facility:
        return f"{commodities_str}, {facility}"
    else:
        return None

In [6]:
production_table["activity_name_lci"] = production_table.apply(build_activity_name, axis=1)

In [7]:
# Normalized MetalliCan tables
energy_table = pd.read_csv(r'data/MetalliCan/cleaned_data/energy_df.csv')
material_table = pd.read_csv(r'data/MetalliCan/cleaned_data/material_df.csv')
biosphere_table = pd.read_csv(r'data/MetalliCan/cleaned_data/biosphere_df.csv')
#land_table = pd.read_csv(r'data/MetalliCan/cleaned_data/land_table_mining.xlsx')

In [8]:
biosphere_table = biosphere_table.merge(substances_table[['substance_id', 'substance_name']], how='left', on='substance_id')

In [9]:
# Removing rows with value_normalized is NaN and divide it by 10e6 to have values per kg
energy_table = energy_table[~energy_table['value_normalized'].isna()]
energy_table['value_normalized'] = energy_table['value_normalized'] / 1e6

In [10]:
material_table = material_table[~material_table['value_normalized'].isna()]
material_table['value_normalized'] = material_table['value_normalized'] / 1e6

In [11]:
# Removing rows with value_normalized is NaN and divide it by 10e6 to have values per kg
biosphere_table = biosphere_table[~biosphere_table['value_normalized'].isna()]
biosphere_table['value_normalized'] = biosphere_table['value_normalized'] / 1e6

# Keeping only relevant columns

In [12]:
energy_df = energy_table[['main_id', 'facility_group_id', 'subflow_type', 'value_normalized']]
energy_df['unit'] = 'MJ'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  energy_df['unit'] = 'MJ'


In [13]:
material_df = material_table[['main_id', 'facility_group_id', 'subflow_type', 'value_normalized']]
material_df['unit'] = 't'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  material_df['unit'] = 't'


In [14]:
# Put energy_df and material_df together in a single dataframe and add the province from the main_df to specify electricity location later
technosphere_df = pd.concat([energy_df, material_df], ignore_index=True)
technosphere_df = technosphere_df.merge(production_table[['main_id', 'facility_group_id', 'province']], on=['main_id', 'facility_group_id'], how='left')
technosphere_df

Unnamed: 0,main_id,facility_group_id,subflow_type,value_normalized,unit,province
0,BC-MAIN-857b7b89,,Acetylene,1.112991e-07,MJ,British Columbia
1,BC-MAIN-857b7b89,,Aviation fuel,4.378079e-04,MJ,British Columbia
2,BC-MAIN-857b7b89,,Diesel,1.729171e-03,MJ,British Columbia
3,BC-MAIN-857b7b89,,Gasoline,8.173765e-05,MJ,British Columbia
4,BC-MAIN-857b7b89,,Propane,2.534400e-04,MJ,British Columbia
...,...,...,...,...,...,...
210,QC-MAIN-02884fb5,,Lime,4.902611e-09,t,Quebec
211,QC-MAIN-02884fb5,,Motor/drill oil,1.827853e-11,t,Quebec
212,QC-MAIN-02884fb5,,Sulfur dioxide,5.212766e-10,t,Quebec
213,QC-MAIN-02884fb5,,Transmission oil,7.833656e-12,t,Quebec


In [15]:
biosphere_df = biosphere_table[['main_id', 'facility_group_id', 'substance_name', 'unit', 'value_normalized']]
biosphere_df = biosphere_df.merge(production_table[['main_id', 'facility_group_id', 'province']], on=['main_id', 'facility_group_id'], how='left')
biosphere_df

Unnamed: 0,main_id,facility_group_id,substance_name,unit,value_normalized,province
0,ON-MAIN-1f126a43,,Ammonia (total),tonnes,1.440257e-12,Ontario
1,ON-MAIN-1f126a43,,Arsenic (and its compounds),kg,2.626883e-13,Ontario
2,ON-MAIN-1f126a43,,Carbon monoxide,tonnes,1.725772e-10,Ontario
3,ON-MAIN-1f126a43,,Chromium (and its compounds),tonnes,2.377782e-15,Ontario
4,ON-MAIN-1f126a43,,Cobalt (and its compounds),kg,4.076198e-13,Ontario
...,...,...,...,...,...,...
1284,,GRP-a13779f8,Manganese (and its compounds),tonnes,0.000000e+00,Manitoba
1285,,GRP-a13779f8,Zinc (and its compounds),tonnes,9.152123e-14,Manitoba
1286,,GRP-a13779f8,Zinc (and its compounds),tonnes,0.000000e+00,Manitoba
1287,,GRP-a13779f8,Ammonia (total),tonnes,9.866373e-11,Manitoba


# Map MetalliCan flows to EI and RI flows

In [16]:
from core.conversion_functions import map_technosphere_to_ecoinvent, map_biosphere_to_ecoinvent
from core.constants import CA_provinces

## Technosphere flows

In [17]:
mapping_technosphere = pd.read_excel(r'data/Mappings/MAPPINGS_RI.xlsx', sheet_name='technosphere')

In [18]:
mapping_technosphere

Unnamed: 0,Type,MetalliCan,MetalliCan_unit,DB_to_map,Reference product,Flow name,Location,Unit,Comment
0,Energy,Acetylene,MJ,Regioinvent,acetylene,consumption market for acetylene,CA,kilogram,
1,Energy,Ammonium nitrate,MJ,Regioinvent,ammonium nitrate,consumption market for ammonium nitrate,CA,kilogram,"Precursor to ANFO; not burned on its own, usua..."
2,Energy,ANFO,MJ,Regioinvent,"explosive, tovex","consumption market for explosive, tovex",CA,kilogram,Or consumption market for ammonium nitrate?
3,Energy,Aviation fuel,MJ,Regioinvent,kerosene,consumption market for kerosene,CA,kilogram,
4,Energy,Biodiesel,MJ,Regioinvent,biogas,consumption market for biogas,CA,cubic meter,No biodiesel inventory found
...,...,...,...,...,...,...,...,...,...
70,Material,Sulfuric acid (H2SO4),t,Regioinvent,sulfuric acid,consumption market for sulfuric acid,CA,kilogram,
71,Material,Tires,t,Regioinvent,synthetic rubber,consumption market for synthetic rubber,CA,kilogram,
72,Material,Total blasting agents used e.g. ANFO,t,Regioinvent,"explosive, tovex","consumption market for explosive, tovex",CA,kilogram,Or consumption market for ammonium nitrate?
73,Material,Total sodium cyanide used,t,Regioinvent,sodium cyanide,consumption market for sodium cyanide,CA,kilogram,Same as Lai et al (2025)


In [19]:
# Apply the function
mapped_technosphere_df = map_technosphere_to_ecoinvent(technosphere_df, mapping_technosphere, CA_provinces)

Index(['main_id', 'facility_group_id', 'subflow_type', 'value_normalized',
       'unit', 'province', 'Type', 'MetalliCan', 'MetalliCan_unit',
       'DB_to_map', 'Reference product', 'Flow name', 'Location', 'Unit',
       'Comment'],
      dtype='object')
⚠️ Les flux suivants n'ont pas trouvé de correspondance dans Ecoinvent:
 - Energy use
 - Other
⚠️ Pas de conversion définie pour MJ → kilogram (flux: Acetylene)
⚠️ Pas de conversion définie pour MJ → kilogram (flux: Used oil)
⚠️ Pas de conversion définie pour MJ → kg (flux: Electricity consumption|Generated on-site)
ℹ️ Conversion explosive par défaut utilisée (3.0 MJ/kg) pour: Explosives
ℹ️ Conversion explosive par défaut utilisée (3.0 MJ/kg) pour: Explosives
ℹ️ Conversion explosive par défaut utilisée (3.0 MJ/kg) pour: Explosives
ℹ️ Conversion explosive par défaut utilisée (3.0 MJ/kg) pour: Explosives
⚠️ Pas de conversion définie pour MJ → kg (flux: Electricity consumption|Generated on-site)
⚠️ Pas de conversion définie pour MJ → k

In [20]:
# Drop rows where ecoinvent_flow_name is "No mapping" and Amount is NaN for now
mapped_technosphere_df = mapped_technosphere_df[
    (mapped_technosphere_df["Activity"] != "No mapping") &
    (~mapped_technosphere_df["Amount"].isna())
]

In [21]:
mapped_technosphere_df = mapped_technosphere_df[['main_id', 'facility_group_id', 'Amount', 'Activity', 'Product', 'Unit', 'Location', 'Database']]

In [22]:
mapped_technosphere_df

Unnamed: 0,main_id,facility_group_id,Amount,Activity,Product,Unit,Location,Database
0,BC-MAIN-857b7b89,,1.112991e-07,consumption market for acetylene,acetylene,kilogram,CA,Regioinvent
1,BC-MAIN-857b7b89,,1.015796e-05,consumption market for kerosene,kerosene,kilogram,CA,Regioinvent
2,BC-MAIN-857b7b89,,4.021329e-05,consumption market for diesel,diesel,kg,CA,Regioinvent
3,BC-MAIN-857b7b89,,1.840938e-06,consumption market for diesel,diesel,kg,CA,Regioinvent
4,BC-MAIN-857b7b89,,5.462069e-06,consumption market for propane,propane,kilogram,CA,Regioinvent
...,...,...,...,...,...,...,...,...
229,QC-MAIN-02884fb5,,4.902611e-06,consumption market for lime,lime,kilogram,CA,Regioinvent
230,QC-MAIN-02884fb5,,1.827853e-08,consumption market for lubricating oil,lubricating oil,kilogram,CA,Regioinvent
231,QC-MAIN-02884fb5,,5.212766e-07,"consumption market for sulfur dioxide, liquid","sulfur dioxide, liquid",kilogram,CA,Regioinvent
232,QC-MAIN-02884fb5,,7.833656e-09,consumption market for lubricating oil,lubricating oil,kilogram,CA,Regioinvent


## Biosphere flows mapping

In [23]:
mapping_biosphere = pd.read_excel(r'data/Mappings/MAPPINGS_RI.xlsx', sheet_name='biosphere')
mapping_biosphere

Unnamed: 0,Type,substance_id,substance_name,compartment_name,release_pathway,flow_direction,MetalliCan_unit,DB_to_map,Flow name,Compartments,Unit,Comment
0,Environmental_flows,95-63-6,"1,2,4-Trimethylbenzene",Air,Other,Emission,tonnes,biosphere3,"2,2,4-Trimethylpentane","('air',)",kilogram,
1,Environmental_flows,57-97-6,"7,12-Dimethylbenz[a]anthracene",Air,Fugitive Emissions,Emission,,biosphere3,"Dibenz(a,h)anthracene","('air',)",kilogram,
2,Environmental_flows,194-59-2,"7H-Dibenzo[c,g]carbazole",Air,Stack Emissions,Emission,kg,biosphere3,"PAH, polycyclic aromatic hydrocarbons","('air',)",kilogram,Polycyclic Aromatic Hydrocarbon (PAH)
3,Environmental_flows,83-32-9,Acenaphthene,Air,Fugitive Emissions,Emission,kg,biosphere3,Acenaphthene,"('air',)",kilogram,
4,Environmental_flows,83-32-9,Acenaphthene,Air,Stack Emissions,Emission,kg,biosphere3,Acenaphthene,"('air',)",kilogram,
...,...,...,...,...,...,...,...,...,...,...,...,...
413,Environmental_flows,NA - 14,Zinc (and its compounds),Air,Storage / Handling,Emission,tonnes,biosphere3,Zinc II,"('air',)",kilogram,
414,Environmental_flows,NA - 14,Zinc (and its compounds),Land,Other,Emission,tonnes,biosphere3,Zinc II,"('soil', 'industrial')",kilogram,
415,Environmental_flows,NA - 14,Zinc (and its compounds),Land,Spills,Emission,tonnes,biosphere3,Zinc II,"('soil', 'industrial')",kilogram,
416,Environmental_flows,NA - 14,Zinc (and its compounds),Water,Direct Discharge,Emission,tonnes,biosphere3,Zinc II,"('water',)",kilogram,


In [24]:
biosphere_df

Unnamed: 0,main_id,facility_group_id,substance_name,unit,value_normalized,province
0,ON-MAIN-1f126a43,,Ammonia (total),tonnes,1.440257e-12,Ontario
1,ON-MAIN-1f126a43,,Arsenic (and its compounds),kg,2.626883e-13,Ontario
2,ON-MAIN-1f126a43,,Carbon monoxide,tonnes,1.725772e-10,Ontario
3,ON-MAIN-1f126a43,,Chromium (and its compounds),tonnes,2.377782e-15,Ontario
4,ON-MAIN-1f126a43,,Cobalt (and its compounds),kg,4.076198e-13,Ontario
...,...,...,...,...,...,...
1284,,GRP-a13779f8,Manganese (and its compounds),tonnes,0.000000e+00,Manitoba
1285,,GRP-a13779f8,Zinc (and its compounds),tonnes,9.152123e-14,Manitoba
1286,,GRP-a13779f8,Zinc (and its compounds),tonnes,0.000000e+00,Manitoba
1287,,GRP-a13779f8,Ammonia (total),tonnes,9.866373e-11,Manitoba


In [25]:
mapped_biosphere_df = map_biosphere_to_ecoinvent(biosphere_df, mapping_biosphere, CA_provinces)

Index(['main_id', 'facility_group_id', 'substance_name', 'unit',
       'value_normalized', 'province', 'Type', 'substance_id',
       'compartment_name', 'release_pathway', 'flow_direction',
       'MetalliCan_unit', 'DB_to_map', 'Flow name', 'Compartments', 'Unit',
       'Comment'],
      dtype='object')
⚠️ 6 biosphere flows could not be mapped to Ecoinvent:
   - 1-Nitropyrene
   - Quinoline
   - Ethylene glycol
   - Water
   - nan
   - PFCs
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No conversion defined for tonnes → kilo becquerel (flow: Manganese (and its compounds))
⚠️ No

In [26]:
mapped_biosphere_df

Unnamed: 0,main_id,facility_group_id,substance_name,province,Flow Name,Compartments,Amount,unit,Unit,Database
0,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
1,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
2,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
3,ON-MAIN-1f126a43,,Ammonia (total),Ontario,Ammonia,"('soil', 'industrial')",1.440257e-09,tonnes,kilogram,biosphere3
4,ON-MAIN-1f126a43,,Ammonia (total),Ontario,Ammonium,"('water',)",1.440257e-09,tonnes,kilogram,biosphere3
...,...,...,...,...,...,...,...,...,...,...
7046,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7047,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7048,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7049,,GRP-a13779f8,Ammonia (total),Manitoba,Ammonia,"('soil', 'industrial')",0.000000e+00,tonnes,kilogram,biosphere3


In [27]:
# Drop rows where ecoinvent_flow_name is "No mapping" and Amount is NaN for now
mapped_biosphere_df = mapped_biosphere_df[
    (mapped_biosphere_df["Flow Name"] != "No mapping") &
    (~mapped_biosphere_df["Amount"].isna())
]

In [28]:
mapped_biosphere_df

Unnamed: 0,main_id,facility_group_id,substance_name,province,Flow Name,Compartments,Amount,unit,Unit,Database
0,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
1,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
2,ON-MAIN-1f126a43,,Ammonia (total),Ontario,"Ammonia, CA-ON","('air', 'low population density long term')",1.440257e-09,tonnes,kilogram,biosphere3_spatialized_flows
3,ON-MAIN-1f126a43,,Ammonia (total),Ontario,Ammonia,"('soil', 'industrial')",1.440257e-09,tonnes,kilogram,biosphere3
4,ON-MAIN-1f126a43,,Ammonia (total),Ontario,Ammonium,"('water',)",1.440257e-09,tonnes,kilogram,biosphere3
...,...,...,...,...,...,...,...,...,...,...
7046,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7047,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7048,,GRP-a13779f8,Ammonia (total),Manitoba,"Ammonia, CA-MB","('air', 'low population density long term')",0.000000e+00,tonnes,kilogram,biosphere3_spatialized_flows
7049,,GRP-a13779f8,Ammonia (total),Manitoba,Ammonia,"('soil', 'industrial')",0.000000e+00,tonnes,kilogram,biosphere3


In [29]:
mapped_biosphere_df = mapped_biosphere_df[['main_id', 'facility_group_id', 'Amount', 'Unit', 'Flow Name', 'Compartments', 'Database']]

In [30]:
production_table

Unnamed: 0,main_id,facility_group_id,facility_name,facility_group_name,province,facility_type,mining_processing_type,commodities,ore_processed_t,Au_t,Ag_t,Cu_t,Ni_t,Mo_t,Zn_t,Pb_t,Fe_t,Pt_t,activity_name_lci
0,BC-MAIN-857b7b89,,Brucejack,,British Columbia,mining,"Underground, concentrator","Gold, silver",166000.0,0.902002,,,,,,,,,"Au, UG mining and beneficiation, Brucejack"
1,QC-MAIN-e7e6a960,,Canadian Malartic,,Quebec,mining,"Open-pit, concentrator","Gold, silver",19594930.0,21.2947,9.642085,,,,,,,,"Au and Ag, OP mining and beneficiation, Canadi..."
2,NL-MAIN-dd723db4,,Carol Lake,,Newfoundland and Labrador,mining,"Open-pit, concentrator",Iron,17880000.0,,,,,,,,4497892.8,,"Fe, OP mining and beneficiation, Carol Lake"
3,QC-MAIN-b86f7d07,,Casa Berardi,,Quebec,mining,"Open-pit, underground, concentrator","Gold, silver",1361450.0,2.693563,0.752705,,,,,,,,"Au and Ag, OP and UG mining and beneficiation,..."
4,BC-MAIN-599152a0,,Copper Mountain,,British Columbia,mining,"Open-pit, concentrator","Copper, gold, silver",6862152.0,0.275204,6.789334,19050.0,,,,,,,"Au and Ag and Cu, OP mining and beneficiation,..."
5,ON-MAIN-aeafbb59,,Detour Lake,,Ontario,mining,"Open-pit, concentrator",Gold,25434850.0,21.070942,2.457177,,,,,,,,"Au and Ag, OP mining and beneficiation, Detour..."
6,ON-MAIN-cb85213a,,Eagle River,,Ontario,mining,"Underground, concentrator",Gold,222627.0,2.72053,,,,,,,,,"Au, UG mining and beneficiation, Eagle River"
7,QC-MAIN-6dc537e6,,Éléonore,,Quebec,mining,"Underground, concentrator",Gold,1661000.0,7.216012,,,,,,,,,"Au, UG mining and beneficiation, Éléonore"
8,BC-MAIN-6b4800fe,,Gibraltar,,British Columbia,mining,"Open-pit, concentrator","Copper, molybdenum, silver",30000000.0,,,55610.3792,,545.217584,,,,,"Cu and Mo, OP mining and beneficiation, Gibraltar"
9,QC-MAIN-c0660aec,,Goldex,,Quebec,mining,"Underground, concentrator","Gold, silver",2886927.0,4.385051,0.062207,,,,39.036758,,,,"Au and Ag and Zn, UG mining and beneficiation,..."


# LCI creation

In [31]:
mapped_technosphere_df.to_csv(r'data/MetalliCan/data_for_lci_initialization/mapped_technosphere_df.csv', index=False)
mapped_biosphere_df.to_csv(r'data/MetalliCan/data_for_lci_initialization/mapped_biosphere_df.csv', index=False)
production_table.to_csv(r'data/MetalliCan/data_for_lci_initialization/production_table.csv', index=False)

In [32]:
def add_site_id(
    df: pd.DataFrame,
    main_col: str = "main_id",
    group_col: str = "facility_group_id",
    out_col: str = "site_id",
) -> pd.DataFrame:
    """
    Create a single canonical site_id column:
    - Prefer `main_id` when present, otherwise `facility_group_id`
    - Normalize to uppercase, strip whitespace
    """
    # copy to avoid mutating caller
    df = df.copy()

    # make sure both columns exist even if missing
    if main_col not in df.columns:
        df[main_col] = pd.NA
    if group_col not in df.columns:
        df[group_col] = pd.NA

    # unify true missing values
    df[main_col] = df[main_col].replace({None: pd.NA, "": pd.NA, "nan": pd.NA})
    df[group_col] = df[group_col].replace({None: pd.NA, "": pd.NA, "nan": pd.NA})

    # prefer main_id, fallback to facility_group_id
    site = df[main_col].fillna(df[group_col])

    # normalize: string, strip, uppercase
    site = site.astype(str).str.strip()
    site = site.mask(site.eq("nan"), pd.NA)  # undo string "nan"
    site = site.fillna(pd.NA)
    #site = site.str.upper()

    df[out_col] = site
    return df

In [33]:
mapped_technosphere_df = add_site_id(mapped_technosphere_df)
mapped_biosphere_df = add_site_id(mapped_biosphere_df)
production_table = add_site_id(production_table)

In [34]:
from core.lci_database_builder import LCIDatabaseBuilder

In [35]:
# Step 1 — initialize the builder
builder = LCIDatabaseBuilder(
    db_name='metallican_lci_',
    project_name='metallican'
)

📂 Active Brightway project: metallican
✅ Using existing database 'metallican_lci_'.


In [36]:
# Step 2 — create the activity shells from the main dataframe
builder.build_lci_entries(df=production_table)
print(len(builder.lcis))

✅ Created 33 base LCI activities with production exchanges.
33


In [37]:
# Step 3a — Populate with the technosphere exchanges
builder.populate_technosphere_exchanges(technosphere_df=mapped_technosphere_df)

⚙️ Populating technosphere exchanges
   ✅ Cached 218246 activities from Regioinvent
   ✅ Cached 20769 activities from ecoinvent-3.10-cutoff regionalized
✅ Added 232 technosphere exchanges.


In [38]:
# Step 3b — Populate with the biosphere exchanges
builder.populate_biosphere_exchanges(biosphere_df=mapped_biosphere_df)

🌱 Populating biosphere exchanges
   ✅ Cached 110559 biosphere flows from biosphere3_spatialized_flows
   ✅ Cached 4362 biosphere flows from biosphere3
✅ Added 6891 biosphere exchanges.
⚠️ 37 biosphere flows could not be matched:
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   - ('soil',) (emission, biosphere3)
   ... and 27 more.


In [39]:
# Step 4 - Consolidate duplicate flows
builder.consolidate_exchanges()

🧮 Consolidation: 7156 → 768 exchanges (summed duplicates).


In [40]:
builder.write_to_database()

🧱 Writing 33 activities to database 'metallican_lci_'...
♻️ Overwriting existing activity: Au, UG mining and beneficiation, Brucejack


            Please use `del databases['metallican_lci_']` instead.
            Otherwise, the metadata and database get out of sync.
            Call `.delete(warn=False)` to skip this message in the future.
            


✅ Database 'metallican_lci_' processed successfully with 33 activities.
