In [None]:
import pandas as pd

# Import cleaned MetalliCan data

In [None]:
from core.data_manipulations import get_info_for_ids

In [None]:
substances_table = pd.read_csv(r'C:\Users\mp_ma\OneDrive - polymtl\POST_DOC\CODE\metallican_db\database\CSV\substances_table.csv')

In [None]:
main_table = pd.read_csv(r'C:\Users\mp_ma\OneDrive - polymtl\POST_DOC\CODE\metallican_db\database\CSV\main_table.csv')
ta_table = pd.read_csv(r'C:\Users\mp_ma\OneDrive - polymtl\POST_DOC\CODE\metallican_db\database\CSV\tech_attributes_table.csv')
energy_table = pd.read_excel(r'data/MetalliCan/cleaned_data/energy_df_sd_norm.xlsx')
material_table = pd.read_excel(r'data/MetalliCan/cleaned_data/material_df_sd_norm.xlsx')
biosphere_table = pd.read_excel(r'data/MetalliCan/cleaned_data/biosphere_df_norm.xlsx')
land_table = pd.read_excel(r'data/MetalliCan/cleaned_data/land_table_mining.xlsx')

In [None]:
biosphere_table = biosphere_table.merge(substances_table[['substance_id', 'substance_name']], how='left', on='substance_id')

In [None]:
# Remove the '/t' at the end in the unit_normalized column
biosphere_table['unit_normalized'] = biosphere_table['unit_normalized'].str.replace('/t', '', regex=False)

# Separating when we have only biosphere flows or both technosphere and biosphere flows

In [None]:
ids_energy = set(energy_table[['main_id', 'facility_group_id']].apply(tuple, axis=1))
ids_material = set(material_table[['main_id', 'facility_group_id']].apply(tuple, axis=1))
ids_biosphere = set(biosphere_table[['main_id', 'facility_group_id']].apply(tuple, axis=1))
ids_land = set(land_table[['main_id', 'facility_group_id']].apply(tuple, axis=1))

In [None]:
# Facilities with at least technosphere and biosphere flows
best_ids = ids_energy & ids_material & ids_biosphere
best_ids

In [None]:
# Facilities with only biosphere flows
other_ids = ids_biosphere - best_ids
other_ids

## Facilities with both technosphere and biosphere flows

In [None]:
main_df = get_info_for_ids(main_table, best_ids)
main_df = main_df[['main_id', 'facility_name', 'province', 'mining_processing_type', 'commodities']]
main_df

In [None]:
energy_df = get_info_for_ids(energy_table, best_ids)
energy_df = energy_df[['main_id', 'facility_name', 'commodities', 'subflow_type', 'value_normalized']]
energy_df['unit'] = 'MJ'

In [None]:
material_df = get_info_for_ids(material_table, best_ids)
material_df = material_df[['main_id', 'facility_name', 'commodities', 'subflow_type', 'value_normalized']]
material_df['unit'] = 't'

In [None]:
# Put energy_df and material_df together in a single dataframe and add the province from the main_df to specify electricity location later
technosphere_df = pd.concat([energy_df, material_df], ignore_index=True)
technosphere_df = technosphere_df.merge(main_df[['main_id', 'province']], on=['main_id'], how='left')
technosphere_df

In [None]:
biosphere_df = get_info_for_ids(biosphere_table, best_ids)

In [None]:
biosphere_df = biosphere_df[['main_id', 'facility_name', 'commodities', 'substance_name', 'unit_normalized', 'value_normalized']]
biosphere_df = biosphere_df.merge(main_df[['main_id', 'province']], on=['main_id'], how='left')
biosphere_df

## Facilities with only biosphere flows

In [None]:
main_df_other = get_info_for_ids(main_table, other_ids)
main_df_other = main_df_other[['main_id', 'facility_name', 'province', 'facility_type', 'mining_processing_type', 'commodities']]
main_df_other

In [None]:
biosphere_df_other = get_info_for_ids(biosphere_table, best_ids)
biosphere_df_other = biosphere_df[
    ['main_id', 'facility_name', 'commodities', 'substance_name', 'unit_normalized', 'value_normalized']]
biosphere_df_other = biosphere_df.merge(main_df[['main_id', 'province']], on=['main_id'], how='left')
biosphere_df_other

# Map MetalliCan flows to EI and RI flows

## Technosphere flows

In [None]:
mapping_technosphere = pd.read_excel(r'data/Mappings/MAPPINGS_RI.xlsx', sheet_name='technosphere')

In [None]:
from core.conversion_functions import map_technosphere_to_ecoinvent, map_biosphere_to_ecoinvent
from core.constants import CA_provinces

In [None]:
mapping_technosphere

In [None]:
technosphere_df

In [None]:
from core.constants import CA_provinces

In [None]:
# Apply the function
mapped_technosphere_df = map_technosphere_to_ecoinvent(technosphere_df, mapping_technosphere, CA_provinces)

In [None]:
# Drop rows where ecoinvent_flow_name is "No mapping" and Amount is NaN for now
mapped_technosphere_df = mapped_technosphere_df[
    (mapped_technosphere_df["Activity"] != "No mapping") &
    (~mapped_technosphere_df["Amount"].isna())
]

In [None]:
mapped_technosphere_df = mapped_technosphere_df[['main_id', 'facility_name', 'Amount', 'Activity', 'Product', 'Unit', 'Location', 'Database']]

In [None]:
mapped_technosphere_df

In [None]:
mapped_technosphere_df.to_csv(r'mapped_technosphere_df.csv', index=False)

## Biosphere flows mapping

In [None]:
mapping_biosphere = pd.read_excel(r'data/Mappings/MAPPINGS_RI.xlsx', sheet_name='biosphere')
mapping_biosphere

In [None]:
biosphere_df

In [None]:
# Apply the function
mapped_biosphere_df = map_biosphere_to_ecoinvent(biosphere_df, mapping_biosphere, CA_provinces)

In [None]:
mapped_biosphere_df

In [None]:
# Drop rows where ecoinvent_flow_name is "No mapping" and Amount is NaN for now
mapped_biosphere_df = mapped_biosphere_df[
    (mapped_biosphere_df["Flow Name"] != "No mapping") &
    (~mapped_biosphere_df["Amount"].isna())
]

In [None]:
mapped_biosphere_df = mapped_biosphere_df[['main_id', 'facility_name', 'Amount', 'Unit', 'Flow Name', 'Compartments', 'Database']]

In [None]:
mapped_biosphere_df

In [None]:
mapped_biosphere_df.to_csv(r'mapped_biosphere_df.csv', index=False)

# LCI creation

In [None]:
from core.lci_database_builder import LCIDatabaseBuilder

In [None]:
# Step 1 — initialize the builder
builder = LCIDatabaseBuilder(
    db_name='metallican_lci',
    project_name='metallican'
)

In [None]:
# Step 2 — create the activity shells from the main dataframe
builder.build_lci_entries(
    df=main_df,
    facility_col="facility_name",
    site_id_col="main_id"
)

# You can check what was created:
print(len(builder.lcis))
list(builder.lcis.keys())[:5]

In [None]:
# Step 3a — Populate with the technosphere exchanges
builder.populate_technosphere_exchanges(
    technosphere_df=mapped_technosphere_df,
    site_id_column="main_id"
)

In [None]:
# Step 3b — Populate with the biosphere exchanges
builder.populate_biosphere_exchanges(
    biosphere_df=mapped_biosphere_df,
    site_id_column="main_id"
)

In [None]:
builder.write_to_database()