In [1]:
import pandas as pd

# Create mapping between NPRI and EI

In [2]:
df_npri_iw = pd.read_excel(r'../data/Concordances/openIO_IW_EI_concordance.xlsx', sheet_name='NPRI_to_IW21')
df_ei_iw = pd.read_excel(r'../data/Concordances/openIO_IW_EI_concordance.xlsx', sheet_name='EI_to_IW+')

In [3]:
# Standardize column names for merging
df_npri_iw.rename(columns={'IMPACT World+ flows': 'iw_name'}, inplace=True)
df_ei_iw.rename(columns={'iw name': 'iw_name'}, inplace=True)

In [4]:
# Add the 'EI' column to df_npri_iw based on matching iw_name with df_ei_iw
df_npri_iw['EI'] = df_npri_iw['iw_name'].map(
    lambda iw: df_ei_iw[df_ei_iw['iw_name'] == iw]['ecoinvent name'].iloc[0]
    if iw in df_ei_iw['iw_name'].values else 'No match found'
)


In [5]:
# df_npri_iw.to_excel(r'concordances.xlsx', index=False)

# Import and analyze NPRI data

The National Pollutant Inventory Report (NPRI) can be downloaded from [here](https://www.canada.ca/en/services/environment/pollution-waste-management/national-pollutant-release-inventory.html)


## Get summary data for all substances

In [7]:
npri_df = pd.read_excel(r'../data/Air_emissions/NPRI-INRP_DataDonnées_2023.xlsx', sheet_name='INRP-NPRI 2023', skiprows=3)

In [8]:
npri_df

Unnamed: 0,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,Province,Postal Code,...,Organic Compound Recovery,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling"
0,2023,6154,Valero Energy Inc.,Gaspé Terminal,Gaspé,Gaspé,,Gaspésie--Îles-de-la-Madeleine,QC,G4X 2E3,...,,,,,,,,,,0.009400
1,2023,15,"Dofasco Inc., Stelco Inc.",Baycoat,Hamilton,Hamilton,Hamilton,Hamilton--Niagara Peninsula,ON,L8N 3K7,...,,,,,,,,,,1.740000
2,2023,31,BASF Canada Inc.,Windsor Site,Windsor,Windsor,Windsor,Windsor--Sarnia,ON,N9A 5Y1,...,,,,,,,,,0.56000,0.647000
3,2023,79,Veolia Water Technologies & Solutions Canada,Veolia Water Technologies & Solutions Canada- ...,Edmonton,Edmonton,Edmonton,Edmonton,AB,T5L 2H8,...,,,,,,,,,,0.050225
4,2023,397,Honda Canada Inc.,Honda of Canada Mfg.,Alliston,New Tecumseth,Toronto,Kitchener--Waterloo--Barrie,ON,L9R 1A2,...,,,,,,,,,1.79300,27.095000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63869,2023,33865,Southwest Agromart,Ridgetown,Ridgetown,Chatham-Kent,Chatham-Kent,Windsor--Sarnia,ON,N0P 2C0,...,,,,,,,,,,0.019000
63870,2023,34480,1916354 Alberta Ltd.,J/E Bearing & Machine Ltd.,Tillsonburg,Tillsonburg,Tillsonburg,London,ON,N4G 5V3,...,,0.14300,,,,,,,0.14300,0.143000
63871,2023,34754,Sasco Tubes & Roll Forming Inc,Atkore Unistrut,Toronto,Toronto,Toronto,Toronto,ON,M9L 1C8,...,,3.56465,,,,,,,3.56465,3.626140
63872,2023,34791,Teck Metals Limited,Sullivan Mine,Kimberley,Kimberley,,Kootenay,BC,V1A 1E2,...,,,,,,,,,,51.360000


In [9]:
metal_ore_mining = npri_df[npri_df['NAICS 4 Sector Name (English)'] == 'Metal ore mining']

In [10]:
# Define the category mapping
category_mapping = {
    'Air Emissions': ['Stack Emissions', 'Storage / Handling', 'Fugitive Emissions', 'Spills', 'Other', 'Total'],
    'Water Releases': ['Direct Discharge', 'Spills', 'Leaks', 'Total', 'Receiving Waterbody (English)', 'Receiving Waterbody (French)'],
    'Land Releases': ['Spills', 'Leaks', 'Other', 'Total'],
    'Total Releases': ['Total Releases (Excluding Road Dust)', 'Road Dust Emissions', 'Total Releases Including Road Dust'],
    'On-Site Disposal': ['Land Fill', 'Land Treatment', 'Underground Injection', 'Tailings', 'Waste Rock', 'Total On-Site'],
    'Off-Site Disposal': ['Land Fill', 'Land Treatment', 'Underground Injection', 'Storage', 'Tailings', 'Waste Rock', 'Total Off-Site'],
    'Transfers for Treatment': ['Physical Treatment', 'Chemical Treatment', 'Biological Treatment', 'Incineration', 'Transfer to a Municipal Sewage Treatment Plant', 'Total'],
    'Total On/Off Treatment': ['Total'],
    'Transfers for Recycling': [
        'Energy Recovery', 'Solvent Recovery', 'Organic Compound Recovery', 'Metal Recovery',
        'Inorganic Compound Recovery', 'Acid or Base Recovery', 'Catalyst Recovery',
        'Pollution Abatement Residue Recovery', 'Used Oil Recovery', 'Other', 'Total'
    ],
    'Grand Total': ['Total Releases, Disposals and Transfers for Recycling']
}

def get_summary_emissions_with_all_substances(df, category_mapping):
    """
    Summarize emissions with all substances, even if their emissions are zero, 
    with substance names, units, substance type (e.g., Stack Emissions), and category.
    """
    results = []
    for category, columns in category_mapping.items():
        valid_columns = [col for col in columns if col in df.columns]
        if valid_columns:
            for col in valid_columns:
                # Ensure the values are numeric
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
                # Group by substance and unit, summing the specific column
                grouped = df.groupby(['Substance Name (English)', 'Units'])[col].sum()
                # Include all substances, even if total emissions are zero
                for (substance, unit), total in grouped.items():
                    results.append({
                        'Substance': substance,
                        'Unit': unit,
                        'Substance Type': col,
                        'Category': category,
                        'Total Emissions': total
                    })
    # Convert to DataFrame and ensure all substances are included
    summary_df = pd.DataFrame(results)
    return summary_df


In [11]:
# Apply the updated function to the metal ore mining data
summary_df = get_summary_emissions_with_all_substances(metal_ore_mining, category_mapping)
summary_df 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
A value is trying to be s

Unnamed: 0,Substance,Unit,Substance Type,Category,Total Emissions
0,1-Nitropyrene,kg,Stack Emissions,Air Emissions,7.087000
1,Acenaphthene,kg,Stack Emissions,Air Emissions,0.099400
2,Acenaphthylene,kg,Stack Emissions,Air Emissions,7.349500
3,Acetaldehyde,tonnes,Stack Emissions,Air Emissions,7.121000
4,Acrolein,tonnes,Stack Emissions,Air Emissions,0.000000
...,...,...,...,...,...
4279,Vanadium (and its compounds),tonnes,"Total Releases, Disposals and Transfers for Re...",Grand Total,25234.057627
4280,Volatile Organic Compounds (Total),tonnes,"Total Releases, Disposals and Transfers for Re...",Grand Total,1601.837475
4281,Xylene (all isomers),tonnes,"Total Releases, Disposals and Transfers for Re...",Grand Total,0.018100
4282,Zinc (and its compounds),tonnes,"Total Releases, Disposals and Transfers for Re...",Grand Total,40298.834259


In [12]:
def summarize_emissions_by_facility(df, category_mapping):
    """
    Summarize emissions for all big categories as separate columns with company, facility, and city details.
    """
    results = df[['Company Name', 'Facility Name', 'City', 'NAICS 6 Sector Name (English)']].copy()
    for category, columns in category_mapping.items():
        valid_columns = [col for col in columns if col in df.columns]
        if valid_columns:
            # Sum emissions for the current category
            df[category] = df[valid_columns].apply(pd.to_numeric, errors='coerce').fillna(0).sum(axis=1)
            results[category] = df[category]
    # Group by facility details and sum the emissions for all categories
    results = results.groupby(['Company Name', 'Facility Name', 'City', 'NAICS 6 Sector Name (English)'], as_index=False).sum()
    return results

In [13]:
facility_df = summarize_emissions_by_facility(metal_ore_mining, category_mapping)
facility_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[category] = df[valid_columns].apply(pd.to_numeric, errors='coerce').fillna(0).sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[category] = df[valid_columns].apply(pd.to_numeric, errors='coerce').fillna(0).sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[category] = df[v

Unnamed: 0,Company Name,Facility Name,City,NAICS 6 Sector Name (English),Air Emissions,Water Releases,Land Releases,Total Releases,On-Site Disposal,Off-Site Disposal,Transfers for Treatment,Total On/Off Treatment,Transfers for Recycling,Grand Total
0,1911 Gold Corporation,True North Gold Mine,Bissett,Gold and silver ore mining,2.176000,1.088000,1.088000,2.176000,0.000000e+00,0.000000e+00,1.088000,1.088000,1.088000,1.088000e+00
1,Agnico Eagle Mines,Macassa Mine,Kirkland Lake,Gold and silver ore mining,674.604440,529.169340,337.302220,1208.918680,4.710390e+04,2.355195e+04,337.302220,337.302220,337.302220,2.415641e+04
2,Agnico Eagle Mines Limited,Hope Bay Project,Cambridge Bay,Gold and silver ore mining,1239.798076,619.899038,796.539038,2467.678076,2.550000e+02,1.275000e+02,619.899038,619.899038,796.539038,1.361339e+03
3,Agnico Eagle Mines Limited (MAIN),Mine Canadian Malartic,Malartic,Gold and silver ore mining,3628.942200,1940.931800,1984.111100,7944.103600,6.370642e+06,3.185321e+06,1814.471100,1814.471100,1999.771100,3.189309e+06
4,Agnico Eagle Mines Ltd.,Division Meadowbank,Baker Lake,Gold and silver ore mining,8117.675826,4075.681021,4063.458641,14862.651884,4.314285e+07,2.157142e+07,4063.219913,4063.219913,4063.458641,2.157886e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Victoria Gold Corp.,Eagle Gold Mine,Vancouver,Gold and silver ore mining,1498.472400,749.236200,1157.616000,2289.269000,4.390361e+06,2.195181e+06,749.236200,749.236200,1157.616000,2.196325e+06
97,Wesdome Gold Mines Ltd.,Eagle River Mine & Mill Complex,Wawa,Gold and silver ore mining,6.397006,3.905922,3.198503,9.976774,1.664430e+04,8.322152e+03,3.198503,3.198503,3.198503,8.327140e+03
98,Wesdome Gold Mines Ltd.,Eagle River Mine Site,Wawa,Gold and silver ore mining,503.497284,251.748642,251.748642,593.170526,7.228000e+01,3.614000e+01,251.748642,251.748642,251.748642,3.327253e+02
99,Williams Operating Corporation,Williams Mine,Marathon,Gold and silver ore mining,1895.302436,1232.023515,947.651218,2483.514332,1.781431e+05,8.907154e+04,947.651218,947.651218,947.651218,9.031330e+04


In [14]:
with pd.ExcelWriter('npri_data_sumup.xlsx') as writer:
    facility_df.to_excel(writer, sheet_name='facility', index=False)
    summary_df.to_excel(writer, sheet_name='summary', index=False)

## Mapping to EI

In [15]:
mapping = df_npri_iw
#mapping = pd.read_excel(r'../data/openIO_IW_EI_concordance.xlsx', sheet_name='NPRI_to_EI')

In [16]:
# Need to rename it to merge it to NPRI data
mapping.rename(columns={'OpenIO flows': 'Substance Name (English)'}, inplace=True)

In [18]:
mapping

Unnamed: 0,Substance Name (English),iw_name,EI
0,Carbon dioxide,"Carbon dioxide, fossil","Carbon dioxide, fossil"
1,Methane,"Methane, fossil",Methane
2,Dinitrogen monoxide,Dinitrogen monoxide,Dinitrogen monoxide
3,CF4,"Methane, tetrafluoro-, CFC-14","Methane, tetrafluoro-, R-14"
4,C2F6,"Ethane, hexafluoro-, HFC-116","Ethane, hexafluoro-, HFC-116"
...,...,...,...
348,Propylene oxide,Propylene oxide,Propylene oxide
349,Thiourea,Thiourea,No match found
350,2-Ethoxyethyl acetate,2-Ethoxyethyl acetate,No match found
351,Azo disperse dyes,,No match found


In [19]:
df_npri = pd.read_excel('../data/Air_emissions/NPRI-INRP_DataDonnées_2023.xlsx', sheet_name='INRP-NPRI 2023', skiprows=3)

In [22]:
df_npri

Unnamed: 0,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,Province,Postal Code,...,Organic Compound Recovery,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling"
0,2023,6154,Valero Energy Inc.,Gaspé Terminal,Gaspé,Gaspé,,Gaspésie--Îles-de-la-Madeleine,QC,G4X 2E3,...,,,,,,,,,,0.009400
1,2023,15,"Dofasco Inc., Stelco Inc.",Baycoat,Hamilton,Hamilton,Hamilton,Hamilton--Niagara Peninsula,ON,L8N 3K7,...,,,,,,,,,,1.740000
2,2023,31,BASF Canada Inc.,Windsor Site,Windsor,Windsor,Windsor,Windsor--Sarnia,ON,N9A 5Y1,...,,,,,,,,,0.56000,0.647000
3,2023,79,Veolia Water Technologies & Solutions Canada,Veolia Water Technologies & Solutions Canada- ...,Edmonton,Edmonton,Edmonton,Edmonton,AB,T5L 2H8,...,,,,,,,,,,0.050225
4,2023,397,Honda Canada Inc.,Honda of Canada Mfg.,Alliston,New Tecumseth,Toronto,Kitchener--Waterloo--Barrie,ON,L9R 1A2,...,,,,,,,,,1.79300,27.095000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63869,2023,33865,Southwest Agromart,Ridgetown,Ridgetown,Chatham-Kent,Chatham-Kent,Windsor--Sarnia,ON,N0P 2C0,...,,,,,,,,,,0.019000
63870,2023,34480,1916354 Alberta Ltd.,J/E Bearing & Machine Ltd.,Tillsonburg,Tillsonburg,Tillsonburg,London,ON,N4G 5V3,...,,0.14300,,,,,,,0.14300,0.143000
63871,2023,34754,Sasco Tubes & Roll Forming Inc,Atkore Unistrut,Toronto,Toronto,Toronto,Toronto,ON,M9L 1C8,...,,3.56465,,,,,,,3.56465,3.626140
63872,2023,34791,Teck Metals Limited,Sullivan Mine,Kimberley,Kimberley,,Kootenay,BC,V1A 1E2,...,,,,,,,,,,51.360000


In [23]:
# We add the Ecoinvent name based on the mapping 
df_npri = df_npri.merge(mapping[['Substance Name (English)', 'EI']],
              on='Substance Name (English)',
              how='left')

In [24]:
df_npri

Unnamed: 0,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,Province,Postal Code,...,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling",EI
0,2023,6154,Valero Energy Inc.,Gaspé Terminal,Gaspé,Gaspé,,Gaspésie--Îles-de-la-Madeleine,QC,G4X 2E3,...,,,,,,,,,0.009400,No match found
1,2023,15,"Dofasco Inc., Stelco Inc.",Baycoat,Hamilton,Hamilton,Hamilton,Hamilton--Niagara Peninsula,ON,L8N 3K7,...,,,,,,,,,1.740000,No match found
2,2023,31,BASF Canada Inc.,Windsor Site,Windsor,Windsor,Windsor,Windsor--Sarnia,ON,N9A 5Y1,...,,,,,,,,0.56000,0.647000,No match found
3,2023,79,Veolia Water Technologies & Solutions Canada,Veolia Water Technologies & Solutions Canada- ...,Edmonton,Edmonton,Edmonton,Edmonton,AB,T5L 2H8,...,,,,,,,,,0.050225,No match found
4,2023,397,Honda Canada Inc.,Honda of Canada Mfg.,Alliston,New Tecumseth,Toronto,Kitchener--Waterloo--Barrie,ON,L9R 1A2,...,,,,,,,,1.79300,27.095000,No match found
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63869,2023,33865,Southwest Agromart,Ridgetown,Ridgetown,Chatham-Kent,Chatham-Kent,Windsor--Sarnia,ON,N0P 2C0,...,,,,,,,,,0.019000,Zinc
63870,2023,34480,1916354 Alberta Ltd.,J/E Bearing & Machine Ltd.,Tillsonburg,Tillsonburg,Tillsonburg,London,ON,N4G 5V3,...,0.14300,,,,,,,0.14300,0.143000,Zinc
63871,2023,34754,Sasco Tubes & Roll Forming Inc,Atkore Unistrut,Toronto,Toronto,Toronto,Toronto,ON,M9L 1C8,...,3.56465,,,,,,,3.56465,3.626140,Zinc
63872,2023,34791,Teck Metals Limited,Sullivan Mine,Kimberley,Kimberley,,Kootenay,BC,V1A 1E2,...,,,,,,,,,51.360000,Zinc


In [13]:
#df_npri['NAICS 4 Sector Name (English)'].value_counts()

In [25]:
df_metal = df_npri[df_npri['NAICS 4 Sector Name (English)'] == 'Metal ore mining'].reset_index()

In [26]:
df_metal

Unnamed: 0,index,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,Province,...,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling",EI
0,259,2023,5013,Iron Ore Company of Canada,Carol Project,Labrador City,"Division No. 10, Subd. D",,West Coast--Northern Peninsula--Labrador / Côt...,NL,...,,,,,,,,,7.087000,No match found
1,412,2023,2372,Myra Falls Mine Ltd,Myra Falls Mine Ltd,Campbell River,Strathcona D (Oyster Bay - Buttle Lake),Campbell River,Vancouver Island and Coast / Île de Vancouver ...,BC,...,,,,,,,,,0.099400,Acenaphthene
2,483,2023,2372,Myra Falls Mine Ltd,Myra Falls Mine Ltd,Campbell River,Strathcona D (Oyster Bay - Buttle Lake),Campbell River,Vancouver Island and Coast / Île de Vancouver ...,BC,...,,,,,,,,,0.158500,Acenaphthylene
3,515,2023,5448,Glencore,Mine Raglan,Laval,Rivière-Koksoak,,Nord-du-Québec,QC,...,,,,,,,,,7.191000,Acenaphthylene
4,556,2023,2000,ArcelorMittal Exploitation Minière Canada s.e....,Usine de Bouletage de Port-Cartier,Port-Cartier,Port-Cartier,,Côte-Nord,QC,...,,,,,,,,,7.121000,Acetaldehyde
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,63843,2023,30807,Eldorado Gold (Québec) Inc.,Eldorado Gold (Québec) Inc.,Val-d'Or,Val-d'Or,Val-d'Or,Abitibi-Témiscamingue,QC,...,,,,,,,,,74.047800,Zinc
1672,63853,2023,32235,NEWCREST RED CHRIS MINING LIMITED,Red Chris Mine,Dease Lake,Kitimat-Stikine D,,North Coast / Côte-nord,BC,...,,,,,,,,,149.164900,Zinc
1673,63861,2023,33049,Prodigy Gold Inc.,Magino Mine,Dubreuilville,"Algoma, Unorganized, North Part",,Northeast / Nord-est,ON,...,,,,,,,,,474.501309,Zinc
1674,63868,2023,33819,Red Lake Madsen Mine Ltd.,Red Lake Madsen Mine,Madsen,Red Lake,,Northwest / Nord-ouest,ON,...,,,,,,,,,0.000000,Zinc


In [27]:
# To see how much NPRI substances do not found a match in Ecoinvent 
percentage_matching = (df_metal['EI'].eq('No match found').mean() * 100)
percentage_matching

np.float64(16.16945107398568)

In [17]:
#df_metal['NAICS 6 Sector Name (English)'].value_counts()

In [28]:
df_nickel = df_metal[df_metal['NAICS 6 Sector Name (English)'] == 'Nickel-copper ore mining'].reset_index()

In [29]:
df_nickel

Unnamed: 0,level_0,index,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,...,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling",EI
0,3,515,2023,5448,Glencore,Mine Raglan,Laval,Rivière-Koksoak,,Nord-du-Québec,...,,,,,,,,,7.191000,Acenaphthylene
1,12,867,2023,1233,Glencore Canada Corporation,Sudbury Operations Mines Mill - Onaping Area,Onaping,Greater Sudbury / Grand Sudbury,Greater Sudbury / Grand Sudbury,Northeast / Nord-est,...,,,,,,,,,28.338478,Ammonia
2,13,877,2023,1465,Vale Canada Limited,Clarabelle Mill,Copper Cliff,Greater Sudbury / Grand Sudbury,Greater Sudbury / Grand Sudbury,Northeast / Nord-est,...,,,,,,,,,86.670000,Ammonia
3,14,878,2023,1473,Vale Canada Ltd,Thompson Operations,Thompson,Mystery Lake,,North / Nord,...,,,,,,,,,11.350270,Ammonia
4,32,1070,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,384.221000,Ammonia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,1616,63376,2023,1473,Vale Canada Ltd,Thompson Operations,Thompson,Mystery Lake,,North / Nord,...,,,,,,,,,139.220038,Zinc
272,1636,63557,2023,5448,Glencore,Mine Raglan,Laval,Rivière-Koksoak,,Nord-du-Québec,...,,,,,,,,,190.097400,Zinc
273,1638,63596,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,204.440000,Zinc
274,1649,63689,2023,11154,FNX Mining Company Inc.,McCreedy West Mine,Levack,Greater Sudbury / Grand Sudbury,Greater Sudbury / Grand Sudbury,Northeast / Nord-est,...,,,,,,,,,0.009750,Zinc


## Analysis per company and facility

In [30]:
# Create a dictionary to store individual DataFrames for each Company-Facility
nickel_groups = df_nickel.groupby(['Company Name', 'Facility Name'])

dfs_by_company_facility = {
    f"{company}_{facility}".replace(" ", "_").replace("/", "-"): group
    for (company, facility), group in nickel_groups
}

dfs_by_company_facility.keys()

dict_keys(['Canadian_Royalties_inc._Projet_Nunavik_Nickel', 'FNX_Mining_Company_Inc._Levack_Mine', 'FNX_Mining_Company_Inc._McCreedy_West_Mine', 'FNX_Mining_Company_Inc._Victoria_Advanced_Exploration_Project', 'Glencore_Mine_Raglan', 'Glencore_Canada_Corporation_Nickel_Rim_South_Mine', 'Glencore_Canada_Corporation_Sudbury_Operations_Mines_Mill_-_Onaping_Area', 'Northern_Sun_Mining_Corp._Redstone_Mine_and_Mill', 'Vale_Canada_Limited_Clarabelle_Mill', 'Vale_Canada_Limited_Coleman_Mine', 'Vale_Canada_Limited_Copper_Cliff_Mine_(North)', 'Vale_Canada_Limited_Copper_Cliff_Mine_(South)', 'Vale_Canada_Limited_Creighton_Mine', 'Vale_Canada_Limited_Ellen_Pit', 'Vale_Canada_Limited_Frood-Stobie_Mine', 'Vale_Canada_Limited_Garson_Mine', 'Vale_Canada_Limited_Gertrude_Pit', 'Vale_Canada_Limited_Totten_Mine', 'Vale_Canada_Ltd_Thompson_Operations', "Vale_Newfoundland_and_Labrador_Limited_Voisey's_Bay_Mine_Site"])

In [31]:
# To access a specific one
dfs_by_company_facility["Vale_Newfoundland_and_Labrador_Limited_Voisey's_Bay_Mine_Site"]

Unnamed: 0,level_0,index,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,...,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling",EI
4,32,1070,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,384.221,Ammonia
11,100,1714,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1421.578,Arsenic
25,191,3009,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1604.145,Cadmium
39,258,3672,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,335.124,"Carbon monoxide, fossil"
54,329,7468,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,563.493,Chromium
61,391,7957,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,241743.578,Cobalt
74,464,8340,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1237.842,Copper
87,541,9147,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,0.0,No match found
89,553,9702,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,56.975,56.975,56.975,No match found
91,571,10468,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,4.808,"Benzene, hexachloro-"


# Import production data per facility

In [26]:
#df_production = pd.read_excel(r'../data/canada_site_specific_data.xlsx')

PermissionError: [Errno 13] Permission denied: '../data/canada_site_specific_data.xlsx'

## Example 

In [32]:
npri_voisey = dfs_by_company_facility["Vale_Newfoundland_and_Labrador_Limited_Voisey's_Bay_Mine_Site"]
npri_voisey

Unnamed: 0,level_0,index,Year,NPRI ID,Company Name,Facility Name,City,CSD,CA or CMA,Economic Region,...,Metal Recovery,Inorganic Compound Recovery,Acid or Base Recovery,Catalyst Recovery,Pollution Abatement Residue Recovery,Used Oil Recovery,Other.2,Total.5,"Total Releases, Disposals and Transfers for Recycling",EI
4,32,1070,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,384.221,Ammonia
11,100,1714,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1421.578,Arsenic
25,191,3009,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1604.145,Cadmium
39,258,3672,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,335.124,"Carbon monoxide, fossil"
54,329,7468,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,563.493,Chromium
61,391,7957,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,241743.578,Cobalt
74,464,8340,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,1237.842,Copper
87,541,9147,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,0.0,No match found
89,553,9702,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,56.975,56.975,56.975,No match found
91,571,10468,2023,6093,Vale Newfoundland and Labrador Limited,Voisey's Bay Mine Site,Happy Valley-Goose Bay,"Division No. 10, Subd. E",,West Coast--Northern Peninsula--Labrador / Côt...,...,,,,,,,,,4.808,"Benzene, hexachloro-"


In [33]:
def allocate_emissions(
    df, production_data, metals, economic_values=None, method='mass',
    unit_column='Units', 
):
    """
    Updated function to exclude specific columns like 'EI' from conversion,
    while performing unit conversion, normalization, and allocation.
    """
    # Step 1: Trim the DataFrame to include only relevant columns
    start_col = 'Substance Name (English)'
    df = df.loc[:, start_col:].copy()

    # Step 2: Identify emission columns (exclude non-numeric columns like 'EI')
    start_col = 'Stack Emissions'
    end_col = 'Total Releases, Disposals and Transfers for Recycling'
    emission_columns = df.loc[:, start_col:end_col].select_dtypes(include='number').columns.tolist()

    # Step 3: Convert units to kg for emissions columns only
    conversion_factors = {'tonnes': 1000, 'kg': 1, 'grams': 0.001, 'g TEQ': 0.001}
    if unit_column in df.columns:
        for unit, factor in conversion_factors.items():
            mask = df[unit_column] == unit
            df.loc[mask, emission_columns] = df.loc[mask, emission_columns] * factor
        df[unit_column] = 'kg'

    # Step 4: Normalize emissions per kg of total production
    total_production = sum(production_data.values())
    for col in emission_columns:
        df[f'{col}_per_kg_Total'] = df[col] / total_production

    # Step 5: Allocate emissions (mass or economic)
    allocation_factors = {}
    for metal, production in production_data.items():
        if method == 'mass':
            allocation_factors[metal] = production / total_production
        elif method == 'economic':
            total_economic_value = sum(production_data[m] * economic_values[m] for m in metals)
            allocation_factors[metal] = (production * economic_values[metal]) / total_economic_value
        else:
            raise ValueError("Invalid method. Use 'mass' or 'economic'.")

    for col in emission_columns:
        for metal in metals:
            df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]

    return df

In [34]:
production_data = {'Nickel': 50000000, 'Copper': 32000000}  # kg of nickel and copper metal in concentrate
economic_values = {'Nickel': 13800, 'Copper': 6160}  # unit value in 2019 $/t given by USGS
metals = ['Nickel', 'Copper']

In [35]:
mass_df = allocate_emissions(
    npri_voisey, production_data, metals, economic_values=economic_values, method='mass', 
    unit_column='Units'
)

economic_df = allocate_emissions(
    npri_voisey, production_data, metals, economic_values=economic_values, method='economic', 
    unit_column='Units'
)

  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Total'] * allocation_factors[metal]
  df[f'{col}_per_kg_{metal}'] = df[f'{col}_per_kg_Tota

In [36]:
def organize_emissions_data(df, metals):
    """
    Organize the emissions data into a clean format with desired columns.
    """
    # Define category mapping
    category_mapping = {
        'Air Emissions': ['Stack Emissions', 'Storage / Handling', 'Fugitive Emissions', 'Spills', 'Other', 'Total'],
        'Water Releases': ['Direct Discharge', 'Spills', 'Leaks', 'Total'],
        'Land Releases': ['Spills', 'Leaks', 'Other', 'Total'],
        'Total Releases': ['Total Releases (Excluding Road Dust)', 'Road Dust Emissions', 'Total Releases Including Road Dust'],
        'On-Site Disposal': ['Land Fill', 'Land Treatment', 'Underground Injection', 'Tailings', 'Waste Rock', 'Total On-Site'],
        'Off-Site Disposal': ['Land Fill', 'Land Treatment', 'Underground Injection', 'Storage', 'Tailings', 'Waste Rock', 'Total Off-Site'],
        'Transfers for Recycling': ['Energy Recovery', 'Metal Recovery', 'Other', 'Total']
    }

    # Prepare rows for clean output
    rows = []
    for category, columns in category_mapping.items():
        for col in columns:
            total_col = f"{col}_per_kg_Total"
            metal_cols = [f"{col}_per_kg_{metal}" for metal in metals]

            if total_col in df.columns:
                for _, row in df.iterrows():
                    row_data = {
                        'Type': category,
                        'Category': col,
                        'Substance Name (English)': row['Substance Name (English)'],
                        'Ecoinvent Name': row['EI'] if 'EI' in df.columns else 'Unknown',
                        'Unit': row['Units'],
                        'Total Emissions (per kg)': row[total_col],
                    }
                    for metal, metal_col in zip(metals, metal_cols):
                        row_data[f'{metal} Emissions (per kg)'] = row[metal_col] if metal_col in df.columns else 0
                    rows.append(row_data)

    # Convert to DataFrame
    result_df = pd.DataFrame(rows)

    return result_df

In [37]:
mass_df = organize_emissions_data(mass_df, metals)
economic_df = organize_emissions_data(economic_df, metals)

In [38]:
with pd.ExcelWriter('allocation.xlsx', engine='xlsxwriter') as writer:
    mass_df.to_excel(writer, sheet_name='mass', index=False)
    economic_df.to_excel(writer, sheet_name='eco', index=False) 