In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

# Imports

In [2]:
# From datasets see dataset_processing.ipynb
main_gdf = gpd.read_file(r'data/Tables/Raw_datasets/main_gdf.geojson')
tailings_gdf = gpd.read_file(r'data/Tables/Raw_datasets/tailings_gdf.geojson')
min_can_gdf = gpd.read_file(r'data/Tables/Raw_datasets/min_can_gdf.geojson')
ctrace_gdf = gpd.read_file(r'data/Tables/Raw_datasets/ctrace_gdf.geojson')
conflict_gdf = gpd.read_file(r'data/Tables/Raw_datasets/conflict_gdf.geojson')
ghg_gdf = gpd.read_file(r'data/Tables/Raw_datasets/ghg_gdf.geojson')
pollutant_gdf = gpd.read_file(r'data/Tables/Raw_datasets/pollutant_gdf.geojson')
climate_category_gdf = gpd.read_file(r'data/Tables/Raw_datasets/climate_category_gdf.geojson')
weather_gdf = gpd.read_file(r'data/Tables/Raw_datasets/weather_gdf.geojson')
peatland_gdf = gpd.read_file(r'data/Tables/Raw_datasets/peatland_gdf.geojson')
prioritization_area_gdf = gpd.read_file(r'data/Tables/Raw_datasets/prioritization_area_gdf.geojson')
population_gdf = gpd.read_file(r'data/Tables/Raw_datasets/population_gdf.geojson')
water_risk_gdf = gpd.read_file(r'data/Tables/Raw_datasets/water_risk_gdf.geojson')
land_cover_gdf = gpd.read_file(r'data/Tables/Raw_datasets/land_cover_gdf.geojson')
npv_gdf = gpd.read_file(r'data/Tables/Raw_datasets/npv_gdf.geojson')
indigenous_can_gdf = gpd.read_file(r'data/Tables/Raw_datasets/indigenous_can_gdf.geojson')
wpda_gdf_polygons = gpd.read_file(r'data/Tables/Raw_datasets/wpda_gdf_polygons.geojson')
tang_canada_gdf = gpd.read_file(r'data/Tables/Raw_datasets/tang_canada_gdf.geojson')

In [3]:
# From manually collected data
production_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Production')
reserves_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Reserves')
energy_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Energy')
env_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Env')
env_int_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Env_int')
archetypes_df = pd.read_excel(r'data/Tables/extracted_data.xlsx', sheet_name='Archetype')

In [4]:
from db_creation_function import assign_row_id_to_collected_data

In [5]:
# We need to assign row_ids to the manually collected data
production_df = assign_row_id_to_collected_data(production_df, 
                  facility_id_col='main_id',
                  prefix='PROD',
                  year_col='year'           
                  )

reserves_df = assign_row_id_to_collected_data(reserves_df, 
                  facility_id_col='main_id',
                  prefix='RES',
                  year_col='year'           
                  )

energy_df = assign_row_id_to_collected_data(energy_df, 
                  facility_id_col='main_id',
                  prefix='NRJ',
                  year_col='year'           
                  )

env_df = assign_row_id_to_collected_data(env_df, 
                  facility_id_col='main_id',
                  prefix='ENV',
                  year_col='year'           
                  )

env_int_df = assign_row_id_to_collected_data(env_int_df, 
                  facility_id_col='main_id',
                  prefix='ENV_INT',
                  year_col='year'           
                  )

archetypes_df = assign_row_id_to_collected_data(archetypes_df, 
                  facility_id_col='main_id',
                  prefix='ARCH',
                  )

# Create auxiliary tables

## Source table

In [6]:
# For the main table
source_map = {
    "project": "https://open.canada.ca/data/en/dataset/22b2db8a-dc12-47f2-9737-99d3da921751",
    "mining": "https://open.canada.ca/data/en/dataset/000183ed-8864-42f0-ae43-c4313a860720",
    "manufacturing": "https://open.canada.ca/data/en/dataset/000183ed-8864-42f0-ae43-c4313a860720"
}
main_gdf["source_id"] = main_gdf["facility_type"].map(source_map)
main_gdf

Unnamed: 0,main_id,facility_name,facility_group_name,facility_group_id,reported_company,company_name_folder,company_id,longitude,latitude,city,...,status,activity_status,development_stage,facility_type,mining_processing_type,commodity_group,primary_commodity,commodities,source_id,geometry
0,QC-MAIN-5801b453,Abcourt-Barvue,,,Abcourt Mines Inc.,Abcourt Mines Inc,CMP-0a6a3062,-77.679010,48.521540,,...,project_facility,Active,Advanced project,project,,,Zinc,"Zinc, silver",https://open.canada.ca/data/en/dataset/22b2db8...,POINT (-77.67901 48.52154)
1,QC-MAIN-d29e0839,Sleeping Giant,,,Abcourt Mines Inc.,Abcourt Mines Inc,CMP-0a6a3062,-77.974340,49.132590,north of Amos,...,active_facility,,,mining,Concentrator,Precious metals,Gold,"Gold, silver",https://open.canada.ca/data/en/dataset/000183e...,POINT (-77.97434 49.13259)
2,QC-MAIN-f1ff4920,Akasaba West,,,Agnico Eagle Mines Limited,Agnico Eagle Mines Limited,CMP-6265c407,-77.580000,48.040000,,...,project_facility,Active,Advanced project,project,,,Gold,"Gold, copper",https://open.canada.ca/data/en/dataset/22b2db8...,POINT (-77.58000 48.04000)
3,NU-MAIN-4ea8ac9d,Amaruq,Meadowbank complex,GRP-0a2c0d69,Agnico Eagle Mines Limited,Agnico Eagle Mines Limited,CMP-6265c407,-96.697000,65.415000,Baker Lake,...,active_facility,,,mining,"Open-pit, underground",Precious metals,Gold,Gold,https://open.canada.ca/data/en/dataset/000183e...,POINT (-96.69700 65.41500)
4,QC-MAIN-e7e6a960,Canadian Malartic,,,Agnico Eagle Mines Limited,Agnico Eagle Mines Limited,CMP-6265c407,-78.130824,48.122223,Malartic,...,active_facility,,,mining,"Open-pit, concentrator",Precious metals,Gold,"Gold, silver",https://open.canada.ca/data/en/dataset/000183e...,POINT (-78.13082 48.12222)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,ON-MAIN-c5fefb01,Mishi,,,Wesdome Gold Mines Ltd.,Wesdome Gold Mines Ltd,CMP-3d2c4955,-85.452500,48.110280,Wawa,...,active_facility,,,mining,Open-pit,Precious metals,Gold,Gold,https://open.canada.ca/data/en/dataset/000183e...,POINT (-85.45250 48.11028)
266,BC-MAIN-feb13c30,Record Ridge,,,West High Yield Resources Ltd.,West High Yield Resources Ltd,CMP-1023f9c0,-117.884000,49.082000,,...,project_facility,Active,Advanced project,project,,,Magnesium,Magnesium,https://open.canada.ca/data/en/dataset/22b2db8...,POINT (-117.88400 49.08200)
267,YT-MAIN-5e0d8b48,Casino,,,Western Copper and Gold Corporation,Western Copper and Gold Corporation,CMP-2dac9896,-138.833330,62.733330,,...,project_facility,Active,Advanced project,project,,,Copper,"Copper, gold, molybdenum, silver",https://open.canada.ca/data/en/dataset/22b2db8...,POINT (-138.83333 62.73333)
268,ON-MAIN-12c68d49,Eagle's Nest,,,Wyloo,Wyloo,CMP-65b36728,-86.303800,52.742020,,...,project_facility,Active,Advanced project,project,,,Nickel,"Nickel, copper, gold, platinum, palladium",https://open.canada.ca/data/en/dataset/22b2db8...,POINT (-86.30380 52.74202)


In [7]:
# For the datasets
tailings_gdf["source_id"] = 'https://tailing.grida.no/'
min_can_gdf["source_id"] = '10.1038/s41597-024-03116-3'
ctrace_gdf["source_id"] = 'Jolleys, M. et al (2024). Mineral Extraction sector: Mining and Quarrying Emissions from Copper, Iron, Bauxite, Rock and Sand, Hypervine, UK, Climate TRACE Emissions Inventory'
conflict_gdf["source_id"] = 'http://journals.librarypublishing.arizona.edu/jpe/article/id/1932/'
ghg_gdf["source_id"] = 'https://www.canada.ca/en/environment-climate-change/services/environmental-indicators/greenhouse-gas-emissions/large-facilities.html'
pollutant_gdf["source_id"] = 'https://www.canada.ca/en/environment-climate-change/services/national-pollutant-release-inventory/tools-resources-data/exploredata.html' 
prioritization_area_gdf['source_id'] = 'https://doi.org/10.5281/zenodo.5006332'
climate_category_gdf['source_id'] = 'https://doi.org/10.1038/s41597-023-02549-6'
weather_gdf['source_id'] = 'https://climate-scenarios.canada.ca/?page=CanDCS6-indices'
peatland_gdf['source_id'] = 'https://greifswaldmoor.de/global-peatland-database-en.html'
population_gdf['source_id'] = 'https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F40-C41DA8332CFE' 
water_risk_gdf['source_id'] = 'https://www.wri.org/data/aqueduct-global-maps-40-data'
land_cover_gdf['source_id'] = 'https://doi.org/10.5067/MODIS/MCD12Q1.061 + https://doi.org/10.5281/zenodo.7254221'
npv_gdf['source_id'] = 'https://doi.org/10.7717/peerj.5457'
indigenous_can_gdf['source_id'] = 'http://www.landmarkmap.org'
wpda_gdf_polygons['source_id'] = 'https://www.protectedplanet.net'
tang_canada_gdf['source_id'] = 'https://doi.org/10.1038/s43247-023-00805-6'
#maus_canada_gdf['source_id'] = 'https://doi.org/10.1038/s41597-022-01547-4'

In [8]:
from db_creation_function import add_source_id_to_collected_data

In [9]:
production_df = add_source_id_to_collected_data(production_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

reserves_df = add_source_id_to_collected_data(reserves_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

energy_df = add_source_id_to_collected_data(energy_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

env_df = add_source_id_to_collected_data(env_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

env_int_df = add_source_id_to_collected_data(env_int_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

archetypes_df = add_source_id_to_collected_data(archetypes_df, 
                                    company_col="reported_company_nrcan", 
                                    facility_col="name", 
                                    source_col="source")

In [10]:
from db_creation_function import create_source_table_from_datasets

In [11]:
dataset_dict = {
    "Global Tailings Portal": tailings_gdf,
    "A Comprehensive Historical and Geolocalized Database of Mining Activities in Canada": min_can_gdf,
    "ClimateTRACE mineral extraction sector": ctrace_gdf,
    "Environmental Justice Atlas": conflict_gdf,
    "GHG from large facilities": ghg_gdf,
    "National Pollutant Inventory Report (NPRI)": pollutant_gdf,
    "Areas of global importance for conserving terrestrial biodiversity, carbon and water": prioritization_area_gdf,
    "High-Resolution (1 km) Koppen-Geiger Maps for 1901–2099 Based on Constrained CMIP6 Projections": climate_category_gdf,
    "CMIP6 statistically downscaled climate indices (CanDCS-M6)": weather_gdf,
    "Global Peatland Database": peatland_gdf,
    "Global Human Settlement Layer (GHSL)": population_gdf,
    "Aqueduct 4.0": water_risk_gdf,
    "Modis Land Cover and ESA World Cover": land_cover_gdf,
    "Potential Natural Vegetation (NPV)": npv_gdf,
    "Indigenous Peoples’ and Local Community Lands and Territories": indigenous_can_gdf,
    "World Database on Protected Areas (WDPA)": wpda_gdf_polygons,
    "Global Mining Footprint Mapped from High-Resolution Satellite Imagery": tang_canada_gdf,
}

manual_dfs = [production_df, reserves_df, energy_df, env_df, env_int_df, archetypes_df] 

source_table = create_source_table_from_datasets(dataset_dict, manual_dfs)

In [12]:
source_table

Unnamed: 0,source_id,source_provenance,source_name
0,https://tailing.grida.no/,dataset,Global Tailings Portal
16,https://doi.org/10.1038/s43247-023-00805-6,dataset,Global Mining Footprint Mapped from High-Resol...
15,https://www.protectedplanet.net,dataset,World Database on Protected Areas (WDPA)
14,http://www.landmarkmap.org,dataset,Indigenous Peoples’ and Local Community Lands ...
13,https://doi.org/10.7717/peerj.5457,dataset,Potential Natural Vegetation (NPV)
...,...,...,...
70,SRC_SilverLakeResourcesLtd_MDI42C14SE00005,report,MDI42C14SE00005
71,SRC_SilverLakeResourcesLtd_MDI42C14SE00006,report,MDI42C14SE00006
72,SRC_SSRMiningInc_FY2024,report,FY2024
65,SRC_RioTinto_rt-fact-book-addendum,report,rt-fact-book-addendum.xlsx


In [13]:
source_table.to_csv(r'data/Tables/Auxiliary_tables/source_table.csv', index=False)

## Substance table

In [14]:
from db_creation_function import create_substance_table

In [15]:
substance_table = create_substance_table(pollutant_gdf, env_df)

In [16]:
substance_table

Unnamed: 0,substance_id,harmonized_name,original_name,source
0,SUB07971a0766,"1,2,4-Trimethylbenzene","1,2,4-Trimethylbenzene",NPRI
1,SUBbdfdffeb95,1-Nitropyrene,1-Nitropyrene,NPRI
2,SUB0f01e7c1d0,2-Butoxyethanol,2-Butoxyethanol,NPRI
3,SUB324959500a,"7,12-Dimethylbenz[a]anthracene","7,12-Dimethylbenz[a]anthracene",NPRI
4,SUBfee6b26acd,"7H-Dibenzo[c,g]carbazole","7H-Dibenzo[c,g]carbazole",NPRI
...,...,...,...,...
169,SUB787e5ac1a2,Vanadium (and its compounds),Vanadium (and its compounds),NPRI
170,SUBe849a2ff09,Xylene (all isomers),Xylene (all isomers),NPRI
171,SUBae95939c3f,Zinc (and its compounds),Zinc (and its compounds),NPRI
172,SUB090a1237c1,i-Butyl alcohol,i-Butyl alcohol,NPRI


In [17]:
pollutant_gdf = pollutant_gdf.merge(
    substance_table[['substance_id', 'original_name']],
    how='left',
    left_on='substance_name_npri',
    right_on='original_name'
).drop(columns='original_name')

In [18]:
pollutant_gdf

Unnamed: 0,row_id,year,npri_id,facility_name_npri,company_name_npri,facility_type,longitude,latitude,terrestrial_ecozone,watershed,substance_name_npri,substance_name_ecoinvent,substance_unit,emission_type,emission_subtype,value,source_id,geometry,substance_id
0,npri-1568-2023-1,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Ammonia (total),Ammonia,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.636,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB1c6e015419
1,npri-1568-2023-2,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Arsenic (and its compounds),Arsenic,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.116,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB0cbc90a094
2,npri-1568-2023-3,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Carbon monoxide,"Carbon monoxide, fossil",tonnes,Air Emissions / Émissions à l'air,Stack Emissions,76.208,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB642eab0862
3,npri-1568-2023-4,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Chromium (and its compounds),Chromium,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.00105,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB7e3f017a30
4,npri-1568-2023-5,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Cobalt (and its compounds),Cobalt,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.18,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUBd94d47b1d8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24125,npri-5697-2023-6,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",25.1008,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUBb215244183
24126,npri-5698-2023-6,2023,5698,Bowes Manufacturing,,manufacturing,-79.49480,43.80360,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",23.418,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49480 43.80360),SUBb215244183
24127,npri-34697-2023-2,2023,34697,541 Bowes Warehouse,,manufacturing,-79.49707,43.81726,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",16.543746,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49707 43.81726),SUBb215244183
24128,npri-5697-2023-7,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,Volatile Organic Compounds (Total),,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",12.5823,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUB8319f60a2f


In [19]:
env_df = env_df.merge(
    substance_table[['substance_id', 'original_name']],
    how='left',
    left_on='substance_name',
    right_on='original_name'
).drop(columns='original_name')

In [20]:
# For CTrace and GHG datasets, we create a substance_id column = NaN because we do not have substance names, only flow_tyoe
ctrace_gdf['substance_id'] = np.nan
ghg_gdf['substance_id'] = np.nan

In [21]:
substance_table.to_csv(r'data/Tables/Auxiliary_tables/substance_table.csv', index=False)

## Compartment table

In [22]:
pollutant_gdf[pollutant_gdf['emission_type'] == 'Water Releases / Rejets à l\'eau']['emission_subtype'].unique()

array(['Direct Discharge', 'Total', 'Receiving Waterbody (English)',
       'Receiving Waterbody (French)', 'Spills'], dtype=object)

In [23]:
# Change the value to 'None' for emission_subtype when emission_type = 'Water Releases / Rejets à l'eau' and 'emission_subtype' is 'Receiving Waterbody (English)' or 'Receiving Waterbody (French)'
pollutant_gdf.loc[
    (pollutant_gdf['emission_type'] == 'Water Releases / Rejets à l\'eau') & 
    (pollutant_gdf['emission_subtype'].isin(['Receiving Waterbody (English)', 'Receiving Waterbody (French)'])),
    'emission_subtype'
] = None

In [24]:
from db_creation_function import create_compartment_table

In [25]:
compartment_table = create_compartment_table(pollutant_gdf, env_df)

In [26]:
pollutant_gdf = pollutant_gdf.merge(
    compartment_table[[
        'compartment_id', 'raw_compartment_label', 'raw_pathway_label'
    ]],
    how='left',
    left_on=['emission_type', 'emission_subtype'],
    right_on=['raw_compartment_label', 'raw_pathway_label']
).drop(columns=['raw_compartment_label', 'raw_pathway_label'])


In [27]:
env_df['compartment_pathway'] = 'Unspecified'
env_df = env_df.merge(
    compartment_table[[
        'compartment_id', 'raw_compartment_label', 'raw_pathway_label'
    ]],
    how='left',
    left_on=['compartment', 'compartment_pathway'],
    right_on=['raw_compartment_label', 'raw_pathway_label']
).drop(columns=['raw_compartment_label', 'raw_pathway_label'])


In [28]:
# For CTrace and GHG datasets, we assign the compartment_id corresponding to 'Air' and 'Unspecified' pathway
ctrace_gdf['compartment_id'] = 'CMPf3265e9fca'
ghg_gdf['compartment_id'] = 'CMPf3265e9fca'

In [29]:
compartment_table.to_csv(r'data/Tables/Auxiliary_tables/compartment_table.csv', index=False)

# Matching

## Point to point matching

In [30]:
from matching_functions import match_facilities, one_to_many_relationships

### MinCan

Some mines in MinCan dataset have very slightly different name (e.g. LaRonde - Zone 5 and LaRonde), or several companies for the same mines.

In [31]:
lt_main_mincan = match_facilities(main_gdf, min_can_gdf,
                     id_col1='main_id', id_col2='mincan_id',
                     name_col1='facility_name', name_col2='namemine'
                     )

In [32]:
matches_main_mincan = one_to_many_relationships(
    lt_main_mincan,
    id_main_col='main_id',
    id_sat_col='mincan_id',
    distance_threshold_m=2000,
    similarity_threshold=80,
    similarity_metric="token_set"  
)

In [33]:
min_can_gdf = min_can_gdf.merge(
    matches_main_mincan,
    on="mincan_id", 
    how="left"
)

In [34]:
# For SQL
min_can_gdf = min_can_gdf[min_can_gdf["main_id"].notna()].copy()

In [35]:
col_to_keep = ["mincan_id", "main_id", "mine_status", "operation_periods"]
min_can_gdf = min_can_gdf[col_to_keep]
min_can_gdf

Unnamed: 0,mincan_id,main_id,mine_status,operation_periods
4,QC-MINCAN-d1e8b6b1,QC-MAIN-30c1828c,Inactive,1927–1976
53,NL-MINCAN-01316355,NL-MAIN-842ba1b4,Inactive,1997–2010
55,QC-MINCAN-494c7715,QC-MAIN-9de9bb0d,Active,1936–1996; 2005–2013; 2021–open
117,BC-MINCAN-8cc41b53,BC-MAIN-6b4800fe,Active,1972–1998; 2004–open
136,QC-MINCAN-8321e85e,QC-MAIN-e51eda66,Active,1988–open
...,...,...,...,...
871,NL-MINCAN-a34e9244,NL-MAIN-c139de6d,Inactive,1971–2019
872,BC-MINCAN-3e33555f,BC-MAIN-b1fe389a,Inactive,1971–2008
897,SK-MINCAN-b20ab2db,SK-MAIN-bb89158f,Inactive,1983–2018
906,ON-MINCAN-7b29d7b3,ON-MAIN-bfbcd72e,Active,1987–1994; 2012–open


In [36]:
min_can_gdf['mine_status'].value_counts()

mine_status
Active      47
Inactive    22
Name: count, dtype: int64

In [37]:
# # Integrate the operation_periods from min_can_gdf into the main_gdf
# main_gdf_a = main_gdf.merge(
#     min_can_gdf[['main_id', 'operation_periods']],
#     on='main_id',
#     how='left'
# )
# 
# # Add the source_id for MinCan only where operation_periods are not null
# main_gdf['source_id'] = main_gdf.apply(
#     lambda row: row['source_id'] + ';' + 'https://doi.org/10.1038/s41597-024-03116-3' if pd.notna(row['operation_periods']) else row['source_id'],
#     axis=1
# )

### CTrace 

In [38]:
lt_main_ctrace = match_facilities(main_gdf, ctrace_gdf,
                     id_col1='main_id', id_col2='ctrace_id',
                     name_col1='facility_name', name_col2='source_name'
                     )

In [39]:
matches_main_ctrace = one_to_many_relationships(
    lt_main_ctrace,
    id_main_col='main_id',
    id_sat_col='ctrace_id',
    distance_threshold_m=2000,
    similarity_threshold=80,
    similarity_metric="token_set"  
)

In [40]:
ctrace_gdf = ctrace_gdf.merge(
    matches_main_ctrace,
    on="ctrace_id", 
    how="left"
)

In [41]:
# For SQL
ctrace_gdf = ctrace_gdf[ctrace_gdf["main_id"].notna()].copy()

In [42]:
ctrace_gdf.columns

Index(['ctrace_id', 'year', 'source_name', 'emissions_quantity', 'gas',
       'activity', 'activity_units', 'capacity', 'capacity_units', 'latitude',
       'longitude', 'source_id', 'geometry', 'substance_id', 'compartment_id',
       'main_id'],
      dtype='object')

In [43]:
col_to_keep = [
'ctrace_id', 
'year',
'emissions_quantity', 
'gas',
'activity', 'activity_units', 
'capacity', 'capacity_units', 
'source_id',  'substance_id', 'compartment_id',
'main_id'           
]
ctrace_gdf = ctrace_gdf[col_to_keep]
ctrace_gdf

Unnamed: 0,ctrace_id,year,emissions_quantity,gas,activity,activity_units,capacity,capacity_units,source_id,substance_id,compartment_id,main_id
6,1754055,2023,5610.0,co2e_100yr,863000.0,t of copper ore,4624000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-6e9b2449
7,1754056,2023,6403.0,co2e_100yr,985000.0,t of copper ore,35547000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-63b394c3
8,1754057,2023,229916.0,co2e_100yr,14575000.0,t of copper ore,367000000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-599152a0
9,1754058,2023,2639.0,co2e_100yr,406000.0,t of copper ore,11041000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-52224e1e
11,1754060,2023,4225.0,co2e_100yr,650000.0,t of copper ore,4822000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-48fe2205
12,1754061,2023,131638.0,co2e_100yr,88100000.0,t of copper ore,645000000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-6b4800fe
16,1754066,2023,2412130.0,co2e_100yr,72886000.0,t of copper ore,263100000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-bf503b6b
17,1754067,2023,0.0,co2e_100yr,0.0,t of copper ore,87879590.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-30c1828c
25,1754077,2023,67116.0,co2e_100yr,1501481.0,t of copper ore,19882000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-e51eda66
29,1754081,2023,7782.0,co2e_100yr,527971.0,t of copper ore,87879590.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-7001a391


In [44]:
# We separate emissions and production data
ctrace_ghg_gdf = ctrace_gdf.drop(columns=['activity', 'activity_units', 'capacity', 'capacity_units'])
ctrace_prod_gdf = ctrace_gdf.drop(columns=['emissions_quantity', 'gas'])

### Conflict

In [45]:
lt_main_conflict = match_facilities(main_gdf, conflict_gdf,
                     id_col1='main_id', id_col2='ej_atlas_id',
                     name_col1='facility_name', name_col2='facility_name_ejatlas'
                     )

In [46]:
matches_main_conflict = one_to_many_relationships(
    lt_main_conflict,
    id_main_col='main_id',
    id_sat_col='ej_atlas_id',
    distance_threshold_m=20000,
    similarity_threshold=80,
    similarity_metric="token_set"  # "partial" or "token_set"
)

In [47]:
conflict_gdf = conflict_gdf.merge(
    matches_main_conflict,
    on="ej_atlas_id",  
    how="left"
)

In [48]:
# For SQL
conflict_gdf = conflict_gdf[conflict_gdf["main_id"].notna()].copy()

In [49]:
col_to_keep = ["ej_atlas_id",
  "main_id",
  "case_name",
  "start_date",
  "end_date",
  "conflict_description",
  "conflict_details",
  "population_affected",
  "conflict_intensity",
  "project_status",
  "source_id"             
               ]
conflict_gdf = conflict_gdf[col_to_keep]
conflict_gdf

Unnamed: 0,ej_atlas_id,main_id,case_name,start_date,end_date,conflict_description,conflict_details,population_affected,conflict_intensity,project_status,source_id
9,4135,NL-MAIN-2d8801d6,"Vale Mining in Voisey's Bay, Labrador, Canada",11/1994,,<p>Voiseyâ€™s Bay nickel mine in Labrador was ...,"Voiseyâ€™s Bay is a â€œ6,000 tonnes-per-day fa...",Rural,"MEDIUM (street protests, visible mobilization)",In operation,http://journals.librarypublishing.arizona.edu/...
10,4145,ON-MAIN-2e13aaa0,Port Colborne Class Action Lawsuit Against Val...,06/2000,04/2012,"<p>In Pearson (later Smith) v. Inco, the large...",Raw materials from Valeâ€™s Sudbury mining ope...,Urban,LOW (some local organising),In operation,http://journals.librarypublishing.arizona.edu/...
15,5599,QC-MAIN-d6748d40,Nouveau Mondeâ€™s Matawinie graphite mine in Q...,01/01/2017,,<p>Five kilometers southwest of the municipali...,The project includes 319 mining concessions (c...,Rural,"MEDIUM (street protests, visible mobilization)",Under construction,http://journals.librarypublishing.arizona.edu/...


### Tailings

In [50]:
lt_main_tailings = match_facilities(main_gdf, tailings_gdf,
                     id_col1='main_id', id_col2='tailing_id',
                     name_col1='facility_name', name_col2='related_mine'
                     )

In [51]:
matches_main_tailings = one_to_many_relationships(
    lt_main_tailings,
    id_main_col='main_id',
    id_sat_col='tailing_id',
    distance_threshold_m=20000,
    similarity_threshold=80,
    similarity_metric="token_set"  # "partial" or "token_set"
)

In [52]:
tailings_gdf = tailings_gdf.merge(
    matches_main_tailings,
    on="tailing_id",  
    how="left"
)

In [53]:
# For SQL
tailings_gdf = tailings_gdf[tailings_gdf["main_id"].notna()].copy()

In [54]:
col_to_keep = [
  "row_id",
  "main_id",
  "year",
  "tailing_id",
  "tsf_name",
  "status",
  "construction_year",
  "raise_type",
  "current_maximum_height",
  "current_tailings_storage",
  "planned_storage_5_years",
  "hazard_categorization",
  "classification_system",
  "geometry", 
  "source_id"             
               ]
tailings_gdf = tailings_gdf[col_to_keep]
tailings_gdf

Unnamed: 0,row_id,main_id,year,tailing_id,tsf_name,status,construction_year,raise_type,current_maximum_height,current_tailings_storage,planned_storage_5_years,hazard_categorization,classification_system,geometry,source_id
0,TAIL-12.0-2019-1,NU-MAIN-730aefe3,2019,12.0,North Cell,Active,2009.0,"Downstream, Upstream",31.0,14400000,14400000,Medium,Canadian Dam Association,POINT (-96.04333 65.02156),https://tailing.grida.no/
1,TAIL-13.0-2019-1,NU-MAIN-730aefe3,2019,13.0,South Cell,Active,2012.0,Downstream,49.0,10420000,10800000,Medium,Canadian Dam Association,POINT (-96.03339 65.01378),https://tailing.grida.no/
2,TAIL-14.0-2019-1,NU-MAIN-730aefe3,2019,14.0,In pit,Active,2009.0,,,850000,12500000,Low,,POINT (-96.03254 65.00190),https://tailing.grida.no/
3,TAIL-15.0-2019-1,NU-MAIN-8b0264c9,2019,15.0,Meliadine TSF,Active,2019.0,,5.0,89000,4354000,Medium,Canadian Dam Association,POINT (-92.14034 63.02083),https://tailing.grida.no/
4,TAIL-16.0-2019-1,QC-MAIN-c0660aec,2019,16.0,South TSF,Active,2007.0,Not Raised,5.0,1664000,2500000,Medium,Canadian Dam Association,POINT (-77.84198 48.05434),https://tailing.grida.no/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,TAIL-1126.0-2019-1,ON-MAIN-f080c409,2019,1126.0,R1 Tailings,Active,1985.0,Compacted\nOuter Shell,14.0,2900000,4000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
216,TAIL-1127.0-2019-1,ON-MAIN-f080c409,2019,1127.0,R2 Tailings,Active,1985.0,Compacted\nOuter Shell,13.0,18000000,18000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
217,TAIL-1128.0-2019-1,ON-MAIN-f080c409,2019,1128.0,R3 Tailings,Active,1985.0,Compacted\nOuter Shell,27.0,39000000,42000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
218,TAIL-1129.0-2019-1,ON-MAIN-f080c409,2019,1129.0,R4 Tailings,Active,1990.0,Compacted\nOuter Shell,22.0,59000000,63000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/


### GHG

In [55]:
lt_main_ghg = match_facilities(main_gdf, ghg_gdf,
                     id_col1='main_id', id_col2='ghg_id',
                     name_col1='facility_name', name_col2='facility_name_ghg'
                     )

In [56]:
matches_main_ghg = one_to_many_relationships(
    lt_main_ghg,
    id_main_col='main_id',
    id_sat_col='ghg_id',
    distance_threshold_m=10000,
    similarity_threshold=80,
    similarity_metric="token_set"  # "partial" or "token_set"
)

In [57]:
ghg_gdf = ghg_gdf.merge(
    matches_main_ghg,
    on="ghg_id",  
    how="left"
)

In [58]:
# For SQL
ghg_gdf = ghg_gdf[ghg_gdf["main_id"].notna()].copy()

In [59]:
col_to_keep = [
  "row_id",
  "main_id",
  "year",
  "value",
  "unit",
  "compartment_id",
  "substance_id",
  "source_id"             
               ]
ghg_gdf = ghg_gdf[col_to_keep]
ghg_gdf

Unnamed: 0,row_id,main_id,year,value,unit,compartment_id,substance_id,source_id
1,GHG-10101-2022-1,NT-MAIN-6c1d6ee6,2022,164.88,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
4,GHG-10188-2022-1,QC-MAIN-084bd95c,2022,293.10,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
7,GHG-10240-2022-1,QC-MAIN-a97821c0,2022,129.26,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
8,GHG-10299-2022-1,QC-MAIN-e25eed27,2022,909.89,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
9,GHG-10350-2022-1,NL-MAIN-b64bae7a,2022,110.93,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
...,...,...,...,...,...,...,...,...
153,GHG-10791-2022-1,QC-MAIN-c1c7eb99,2022,96.48,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
154,GHG-10810-2022-1,NL-MAIN-d9036091,2022,50.09,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
169,GHG-11723-2022-1,ON-MAIN-2e13aaa0,2022,14.17,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
170,GHG-11730-2022-1,QC-MAIN-92628f16,2022,380.82,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...


### Pollution

In [60]:
pollutant_gdf

Unnamed: 0,row_id,year,npri_id,facility_name_npri,company_name_npri,facility_type,longitude,latitude,terrestrial_ecozone,watershed,substance_name_npri,substance_name_ecoinvent,substance_unit,emission_type,emission_subtype,value,source_id,geometry,substance_id,compartment_id
0,npri-1568-2023-1,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Ammonia (total),Ammonia,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.636,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB1c6e015419,CMP1718f4466d
1,npri-1568-2023-2,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Arsenic (and its compounds),Arsenic,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.116,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB0cbc90a094,CMP1718f4466d
2,npri-1568-2023-3,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Carbon monoxide,"Carbon monoxide, fossil",tonnes,Air Emissions / Émissions à l'air,Stack Emissions,76.208,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB642eab0862,CMP1718f4466d
3,npri-1568-2023-4,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Chromium (and its compounds),Chromium,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.00105,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB7e3f017a30,CMP1718f4466d
4,npri-1568-2023-5,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,Cobalt (and its compounds),Cobalt,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.18,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUBd94d47b1d8,CMP1718f4466d
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24125,npri-5697-2023-6,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",25.1008,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUBb215244183,CMP281acc6045
24126,npri-5698-2023-6,2023,5698,Bowes Manufacturing,,manufacturing,-79.49480,43.80360,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",23.418,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49480 43.80360),SUBb215244183,CMP281acc6045
24127,npri-34697-2023-2,2023,34697,541 Bowes Warehouse,,manufacturing,-79.49707,43.81726,Mixedwood Plain,St. Lawrence Drainage Area,Speciated VOC - Hydrotreated heavy naphtha,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",16.543746,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49707 43.81726),SUBb215244183,CMP281acc6045
24128,npri-5697-2023-7,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,Volatile Organic Compounds (Total),,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",12.5823,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUB8319f60a2f,CMP281acc6045


In [61]:
lt_main_pollution = match_facilities(main_gdf, pollutant_gdf,
                     id_col1='main_id', id_col2='npri_id',
                     name_col1='facility_name', name_col2='facility_name_npri'
                     )

In [62]:
matches_main_pollution = one_to_many_relationships(
    lt_main_pollution,
    id_main_col='main_id',
    id_sat_col='npri_id',
    distance_threshold_m=10000,
    similarity_threshold=80,
    similarity_metric="token_set"  # "partial" or "token_set"
)

In [63]:
pollutant_gdf = pollutant_gdf.merge(
    matches_main_pollution,
    on="npri_id",  
    how="left"
)

In [64]:
pollutant_gdf

Unnamed: 0,row_id,year,npri_id,facility_name_npri,company_name_npri,facility_type,longitude,latitude,terrestrial_ecozone,watershed,...,substance_name_ecoinvent,substance_unit,emission_type,emission_subtype,value,source_id,geometry,substance_id,compartment_id,main_id
0,npri-1568-2023-1,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,...,Ammonia,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.636,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB1c6e015419,CMP1718f4466d,ON-MAIN-1f126a43
1,npri-1568-2023-2,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,...,Arsenic,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.116,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB0cbc90a094,CMP1718f4466d,ON-MAIN-1f126a43
2,npri-1568-2023-3,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,...,"Carbon monoxide, fossil",tonnes,Air Emissions / Émissions à l'air,Stack Emissions,76.208,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB642eab0862,CMP1718f4466d,ON-MAIN-1f126a43
3,npri-1568-2023-4,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,...,Chromium,tonnes,Air Emissions / Émissions à l'air,Stack Emissions,0.00105,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUB7e3f017a30,CMP1718f4466d,ON-MAIN-1f126a43
4,npri-1568-2023-5,2023,1568,Macassa Mine,Agnico Eagle Mines,mining,-80.08734,48.13032,Boreal Shield,St. Lawrence Drainage Area,...,Cobalt,kg,Air Emissions / Émissions à l'air,Stack Emissions,0.18,https://www.canada.ca/en/environment-climate-c...,POINT (-80.08734 48.13032),SUBd94d47b1d8,CMP1718f4466d,ON-MAIN-1f126a43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24125,npri-5697-2023-6,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,...,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",25.1008,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUBb215244183,CMP281acc6045,
24126,npri-5698-2023-6,2023,5698,Bowes Manufacturing,,manufacturing,-79.49480,43.80360,Mixedwood Plain,St. Lawrence Drainage Area,...,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",23.418,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49480 43.80360),SUBb215244183,CMP281acc6045,
24127,npri-34697-2023-2,2023,34697,541 Bowes Warehouse,,manufacturing,-79.49707,43.81726,Mixedwood Plain,St. Lawrence Drainage Area,...,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",16.543746,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49707 43.81726),SUBb215244183,CMP281acc6045,
24128,npri-5697-2023-7,2023,5697,Rayette Manufacturing,,manufacturing,-79.49600,43.80800,Mixedwood Plain,St. Lawrence Drainage Area,...,,tonnes,Grand Total,"Total Releases, Disposals and Transfers for Re...",12.5823,https://www.canada.ca/en/environment-climate-c...,POINT (-79.49600 43.80800),SUB8319f60a2f,CMP281acc6045,


In [65]:
# For SQL
pollutant_gdf = pollutant_gdf[pollutant_gdf["main_id"].notna()].copy()

In [66]:
# # Adding terrestrial ecozone and watershed to the main_gdf
# main_gdf_b = main_gdf.merge(
#     pollutant_gdf[['main_id', 'terrestrial_ecozone', 'watershed']],
#     on='main_id',
#     how='left'
# )
# 
# # Adding NPRI source for the main_gdf where watershed and terrestrial ecozone are not null
# main_gdf['source_id'] = main_gdf.apply(
#     lambda row: row['source_id'] + ';' + 'https://www.canada.ca/en/environment-climate-change/services/national-pollutant-release-inventory/tools-resources-data/exploredata.html' if pd.notna(row['watershed']) or pd.notna(row['terrestrial_ecozone']) else row['source_id'],
#     axis=1
# )

In [67]:
col_to_keep = [
  "row_id",
  #"npri_id",
  "main_id",
  "year",
  "value",
  "substance_unit",
  "compartment_id",
  "substance_id",
  "source_id"
 ]
pollutant_gdf = pollutant_gdf[col_to_keep]
pollutant_gdf.rename(columns={'substance_unit': 'unit'}, inplace=True)
pollutant_gdf

Unnamed: 0,row_id,main_id,year,value,unit,compartment_id,substance_id,source_id
0,npri-1568-2023-1,ON-MAIN-1f126a43,2023,0.636,tonnes,CMP1718f4466d,SUB1c6e015419,https://www.canada.ca/en/environment-climate-c...
1,npri-1568-2023-2,ON-MAIN-1f126a43,2023,0.116,kg,CMP1718f4466d,SUB0cbc90a094,https://www.canada.ca/en/environment-climate-c...
2,npri-1568-2023-3,ON-MAIN-1f126a43,2023,76.208,tonnes,CMP1718f4466d,SUB642eab0862,https://www.canada.ca/en/environment-climate-c...
3,npri-1568-2023-4,ON-MAIN-1f126a43,2023,0.00105,tonnes,CMP1718f4466d,SUB7e3f017a30,https://www.canada.ca/en/environment-climate-c...
4,npri-1568-2023-5,ON-MAIN-1f126a43,2023,0.18,kg,CMP1718f4466d,SUBd94d47b1d8,https://www.canada.ca/en/environment-climate-c...
...,...,...,...,...,...,...,...,...
24116,npri-28761-2023-117,NL-MAIN-d9036091,2023,4.405,tonnes,CMP281acc6045,SUB4cedd6b899,https://www.canada.ca/en/environment-climate-c...
24117,npri-28761-2023-118,NL-MAIN-d9036091,2023,0.2475,tonnes,CMP281acc6045,SUBda6a6d231d,https://www.canada.ca/en/environment-climate-c...
24118,npri-28761-2023-119,NL-MAIN-d9036091,2023,29.277299999999997,tonnes,CMP281acc6045,SUB28f44d0e11,https://www.canada.ca/en/environment-climate-c...
24119,npri-28761-2023-120,NL-MAIN-d9036091,2023,0.485,tonnes,CMP281acc6045,SUBba3b5d11c7,https://www.canada.ca/en/environment-climate-c...


In [68]:
pollutant_gdf

Unnamed: 0,row_id,main_id,year,value,unit,compartment_id,substance_id,source_id
0,npri-1568-2023-1,ON-MAIN-1f126a43,2023,0.636,tonnes,CMP1718f4466d,SUB1c6e015419,https://www.canada.ca/en/environment-climate-c...
1,npri-1568-2023-2,ON-MAIN-1f126a43,2023,0.116,kg,CMP1718f4466d,SUB0cbc90a094,https://www.canada.ca/en/environment-climate-c...
2,npri-1568-2023-3,ON-MAIN-1f126a43,2023,76.208,tonnes,CMP1718f4466d,SUB642eab0862,https://www.canada.ca/en/environment-climate-c...
3,npri-1568-2023-4,ON-MAIN-1f126a43,2023,0.00105,tonnes,CMP1718f4466d,SUB7e3f017a30,https://www.canada.ca/en/environment-climate-c...
4,npri-1568-2023-5,ON-MAIN-1f126a43,2023,0.18,kg,CMP1718f4466d,SUBd94d47b1d8,https://www.canada.ca/en/environment-climate-c...
...,...,...,...,...,...,...,...,...
24116,npri-28761-2023-117,NL-MAIN-d9036091,2023,4.405,tonnes,CMP281acc6045,SUB4cedd6b899,https://www.canada.ca/en/environment-climate-c...
24117,npri-28761-2023-118,NL-MAIN-d9036091,2023,0.2475,tonnes,CMP281acc6045,SUBda6a6d231d,https://www.canada.ca/en/environment-climate-c...
24118,npri-28761-2023-119,NL-MAIN-d9036091,2023,29.277299999999997,tonnes,CMP281acc6045,SUB28f44d0e11,https://www.canada.ca/en/environment-climate-c...
24119,npri-28761-2023-120,NL-MAIN-d9036091,2023,0.485,tonnes,CMP281acc6045,SUBba3b5d11c7,https://www.canada.ca/en/environment-climate-c...


## Point to polygon matching

### Protected lands datasets 

In [69]:
from matching_functions import associate_facilities_near_polygons

In [70]:
wpda_gdf_polygons

Unnamed: 0,WDPA_PID,NAME,DESIG,OWN_TYPE,MANG_AUTH,STATUS_YR,source_id,geometry
0,611_A,Wood Buffalo National Park Of Canada,National Park,State,Parks Canada Agency,1922,https://www.protectedplanet.net,"POLYGON ((-112.87253 59.99877, -111.53006 59.7..."
1,611_B,Wood Buffalo National Park Of Canada,National Park,State,Parks Canada Agency,1922,https://www.protectedplanet.net,"POLYGON ((-112.87253 59.99877, -115.58450 59.9..."
2,612,Kluane National Park Reserve Of Canada,National Park,State,Parks Canada Agency,1972,https://www.protectedplanet.net,"POLYGON ((-141.00187 60.30634, -137.43466 60.7..."
3,613_B,Auyuittuq National Park Of Canada,National Park,State,Parks Canada Agency,1972,https://www.protectedplanet.net,"MULTIPOLYGON (((-64.62292 67.13706, -63.92605 ..."
4,613_A,Auyuittuq National Park Of Canada,National Park,State,Parks Canada Agency,1972,https://www.protectedplanet.net,"MULTIPOLYGON (((-68.23194 67.76712, -67.56846 ..."
...,...,...,...,...,...,...,...,...
13463,555785733,Grosse Île and the Irish Memorial National His...,Lieu historique national,State,Parks Canada Agency,2023,https://www.protectedplanet.net,"MULTIPOLYGON (((-70.67843 47.01639, -70.67803 ..."
13464,555785734,Navy Island National Historic Site,National Historic Site,State,Parks Canada Agency,2023,https://www.protectedplanet.net,"POLYGON ((-79.01037 43.04900, -79.00871 43.060..."
13465,555785735,Fort George National Historic Site (Paradise G...,National Historic Site,State,Parks Canada Agency,2023,https://www.protectedplanet.net,"POLYGON ((-79.06782 43.24368, -79.05901 43.248..."
13466,555785736,Rideau Canal National Historic Site,National Historic Site,State,Parks Canada Agency,2023,https://www.protectedplanet.net,"MULTIPOLYGON (((-76.24341 44.90190, -76.19385 ..."


In [71]:
lt_wpda = associate_facilities_near_polygons(
    main_gdf,
    wpda_gdf_polygons,
    facility_id_col="main_id",
    polygon_id_col="WDPA_PID",
    buffer_km=50,
    crs="EPSG:3978"
)

In [72]:
wpda_gdf_polygons = wpda_gdf_polygons.merge(lt_wpda, on="WDPA_PID", how="left")

In [73]:
# For SQL
wpda_gdf_polygons = wpda_gdf_polygons[wpda_gdf_polygons["main_id"].notna()].copy()

In [74]:
lt_indigenous = associate_facilities_near_polygons(
    main_gdf,
    indigenous_can_gdf,
    facility_id_col="main_id",
    polygon_id_col="indigenous_land_id",
    buffer_km=50,
    crs="EPSG:3978"
)

In [75]:
indigenous_can_gdf = indigenous_can_gdf.merge(lt_indigenous, on="indigenous_land_id", how="left")

In [76]:
# For SQL
indigenous_can_gdf = indigenous_can_gdf[indigenous_can_gdf["main_id"].notna()].copy()

### Land occupation footprint datasets

In [77]:
from matching_functions import assign_polygons_to_points

In [78]:
tailings_gdf

Unnamed: 0,row_id,main_id,year,tailing_id,tsf_name,status,construction_year,raise_type,current_maximum_height,current_tailings_storage,planned_storage_5_years,hazard_categorization,classification_system,geometry,source_id
0,TAIL-12.0-2019-1,NU-MAIN-730aefe3,2019,12.0,North Cell,Active,2009.0,"Downstream, Upstream",31.0,14400000,14400000,Medium,Canadian Dam Association,POINT (-96.04333 65.02156),https://tailing.grida.no/
1,TAIL-13.0-2019-1,NU-MAIN-730aefe3,2019,13.0,South Cell,Active,2012.0,Downstream,49.0,10420000,10800000,Medium,Canadian Dam Association,POINT (-96.03339 65.01378),https://tailing.grida.no/
2,TAIL-14.0-2019-1,NU-MAIN-730aefe3,2019,14.0,In pit,Active,2009.0,,,850000,12500000,Low,,POINT (-96.03254 65.00190),https://tailing.grida.no/
3,TAIL-15.0-2019-1,NU-MAIN-8b0264c9,2019,15.0,Meliadine TSF,Active,2019.0,,5.0,89000,4354000,Medium,Canadian Dam Association,POINT (-92.14034 63.02083),https://tailing.grida.no/
4,TAIL-16.0-2019-1,QC-MAIN-c0660aec,2019,16.0,South TSF,Active,2007.0,Not Raised,5.0,1664000,2500000,Medium,Canadian Dam Association,POINT (-77.84198 48.05434),https://tailing.grida.no/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,TAIL-1126.0-2019-1,ON-MAIN-f080c409,2019,1126.0,R1 Tailings,Active,1985.0,Compacted\nOuter Shell,14.0,2900000,4000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
216,TAIL-1127.0-2019-1,ON-MAIN-f080c409,2019,1127.0,R2 Tailings,Active,1985.0,Compacted\nOuter Shell,13.0,18000000,18000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
217,TAIL-1128.0-2019-1,ON-MAIN-f080c409,2019,1128.0,R3 Tailings,Active,1985.0,Compacted\nOuter Shell,27.0,39000000,42000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
218,TAIL-1129.0-2019-1,ON-MAIN-f080c409,2019,1129.0,R4 Tailings,Active,1990.0,Compacted\nOuter Shell,22.0,59000000,63000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/


In [79]:
assigned_df = assign_polygons_to_points(
    main_gdf,
    tailings_gdf,
    tang_canada_gdf,
    max_dist_km=10
)


In [80]:
assigned_df

Unnamed: 0,main_id,tailing_id,tang_id,distance_km,relation_type
0,NS-MAIN-c9c77a59,,45954,0.000000,one-to-many
1,NS-MAIN-c9c77a59,,45955,1.186584,one-to-many
2,NL-MAIN-d9036091,,7631,0.811781,one-to-many
3,NL-MAIN-d9036091,,7641,4.647047,one-to-many
4,NL-MAIN-d9036091,,7633,0.000000,many-to-one
...,...,...,...,...,...
789,NT-MAIN-6c1d6ee6,,26078,2.857250,one-to-many
790,NT-MAIN-6c1d6ee6,,43740,0.833606,one-to-many
791,NT-MAIN-6c1d6ee6,,3479,0.000000,one-to-many
792,NT-MAIN-6c1d6ee6,,50385,2.150861,one-to-many


In [81]:
assigned_df['relation_type'].value_counts()

relation_type
one-to-many    707
many-to-one     56
one-to-one      31
Name: count, dtype: int64

In [82]:
polygon_gdf = tang_canada_gdf.merge(assigned_df, on="tang_id", how="left")

In [83]:
polygon_gdf

Unnamed: 0,tang_id,area_km2,source_id,geometry,main_id,tailing_id,distance_km,relation_type
0,6383,0.026130,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1795479.979 -83564.531 0.000, 1795...",,,,
1,6388,0.671399,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1796123.448 -80625.526 0.000, 1796...",,,,
2,6390,0.050909,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1796791.634 -80270.697 0.000, 1796...",,,,
3,45954,2.622800,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((2466485.124 185586.547 0.000, 2466...",NS-MAIN-c9c77a59,,0.000000,one-to-many
4,45955,0.441869,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((2465809.682 185862.507 0.000, 2465...",NS-MAIN-c9c77a59,,1.186584,one-to-many
...,...,...,...,...,...,...,...,...
1915,3399,0.541024,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-717210.659 1915423.857 0.000, -71...",,,,
1916,3401,1.053173,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-715648.337 1916385.463 0.000, -71...",,,,
1917,26364,0.079894,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-717332.782 1916999.286 0.000, -71...",,,,
1918,43739,1.519488,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-713032.430 1919630.086 0.000, -71...",,,,


In [84]:
from visualization_functions import plot_polygons_assginment

In [85]:
plot_polygons_assginment(
    main_gdf,
    tailings_gdf,
    polygon_gdf, 
    output_html="results/data_coverage/facilities_tailings_maps.html"
)

✅ Map saved to results/data_coverage/facilities_tailings_maps.html


# Create final tables

## Environment table

### CTrace dataset

In [86]:
ctrace_ghg_gdf

Unnamed: 0,ctrace_id,year,emissions_quantity,gas,source_id,substance_id,compartment_id,main_id
6,1754055,2023,5610.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-6e9b2449
7,1754056,2023,6403.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-63b394c3
8,1754057,2023,229916.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-599152a0
9,1754058,2023,2639.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-52224e1e
11,1754060,2023,4225.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-48fe2205
12,1754061,2023,131638.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-6b4800fe
16,1754066,2023,2412130.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-bf503b6b
17,1754067,2023,0.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-30c1828c
25,1754077,2023,67116.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-e51eda66
29,1754081,2023,7782.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-7001a391


In [87]:
ctrace_ghg_gdf.rename(columns={
    'ctrace_id': 'env_id',
    'emissions_quantity': 'value',
    'gas': 'unit'}, inplace=True)

In [88]:
ctrace_ghg_gdf

Unnamed: 0,env_id,year,value,unit,source_id,substance_id,compartment_id,main_id
6,1754055,2023,5610.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-6e9b2449
7,1754056,2023,6403.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-63b394c3
8,1754057,2023,229916.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-599152a0
9,1754058,2023,2639.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-52224e1e
11,1754060,2023,4225.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-48fe2205
12,1754061,2023,131638.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-6b4800fe
16,1754066,2023,2412130.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-bf503b6b
17,1754067,2023,0.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-30c1828c
25,1754077,2023,67116.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-e51eda66
29,1754081,2023,7782.0,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-7001a391


In [89]:
## We miss flow_type
ctrace_ghg_gdf['flow_type'] = 'ghg'

### NPRI dataset

In [90]:
pollutant_gdf

Unnamed: 0,row_id,main_id,year,value,unit,compartment_id,substance_id,source_id
0,npri-1568-2023-1,ON-MAIN-1f126a43,2023,0.636,tonnes,CMP1718f4466d,SUB1c6e015419,https://www.canada.ca/en/environment-climate-c...
1,npri-1568-2023-2,ON-MAIN-1f126a43,2023,0.116,kg,CMP1718f4466d,SUB0cbc90a094,https://www.canada.ca/en/environment-climate-c...
2,npri-1568-2023-3,ON-MAIN-1f126a43,2023,76.208,tonnes,CMP1718f4466d,SUB642eab0862,https://www.canada.ca/en/environment-climate-c...
3,npri-1568-2023-4,ON-MAIN-1f126a43,2023,0.00105,tonnes,CMP1718f4466d,SUB7e3f017a30,https://www.canada.ca/en/environment-climate-c...
4,npri-1568-2023-5,ON-MAIN-1f126a43,2023,0.18,kg,CMP1718f4466d,SUBd94d47b1d8,https://www.canada.ca/en/environment-climate-c...
...,...,...,...,...,...,...,...,...
24116,npri-28761-2023-117,NL-MAIN-d9036091,2023,4.405,tonnes,CMP281acc6045,SUB4cedd6b899,https://www.canada.ca/en/environment-climate-c...
24117,npri-28761-2023-118,NL-MAIN-d9036091,2023,0.2475,tonnes,CMP281acc6045,SUBda6a6d231d,https://www.canada.ca/en/environment-climate-c...
24118,npri-28761-2023-119,NL-MAIN-d9036091,2023,29.277299999999997,tonnes,CMP281acc6045,SUB28f44d0e11,https://www.canada.ca/en/environment-climate-c...
24119,npri-28761-2023-120,NL-MAIN-d9036091,2023,0.485,tonnes,CMP281acc6045,SUBba3b5d11c7,https://www.canada.ca/en/environment-climate-c...


In [91]:
pollutant_gdf.rename(columns={
    'row_id': 'env_id'}, inplace=True)

In [None]:
## We miss flow_type
#pollutant_gdf['flow_type'] = ''

### GHG dataset

In [93]:
ghg_gdf

Unnamed: 0,row_id,main_id,year,value,unit,compartment_id,substance_id,source_id
1,GHG-10101-2022-1,NT-MAIN-6c1d6ee6,2022,164.88,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
4,GHG-10188-2022-1,QC-MAIN-084bd95c,2022,293.10,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
7,GHG-10240-2022-1,QC-MAIN-a97821c0,2022,129.26,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
8,GHG-10299-2022-1,QC-MAIN-e25eed27,2022,909.89,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
9,GHG-10350-2022-1,NL-MAIN-b64bae7a,2022,110.93,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
...,...,...,...,...,...,...,...,...
153,GHG-10791-2022-1,QC-MAIN-c1c7eb99,2022,96.48,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
154,GHG-10810-2022-1,NL-MAIN-d9036091,2022,50.09,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
169,GHG-11723-2022-1,ON-MAIN-2e13aaa0,2022,14.17,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...
170,GHG-11730-2022-1,QC-MAIN-92628f16,2022,380.82,kilotonnes of carbon dioxide equivalents (kt C...,CMPf3265e9fca,,https://www.canada.ca/en/environment-climate-c...


In [94]:
ghg_gdf.rename(columns={'row_id': 'env_id'}, inplace=True)

In [95]:
## We miss flow_type
ghg_gdf['flow_type'] = 'ghg'

### Manually collected data 

In [96]:
env_df

Unnamed: 0,row_id,year,reporting_level,reported_company_nrcan,name,main_id,facility_group_id,company_id,facility_type,mining_processing_type,...,subflow_type,substance_name,unit,value,comment,source,source_id,substance_id,compartment_pathway,compartment_id
0,ENV-1f126a43-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a43,-,,mining,"Underground, concentrator",...,-,SOx,t,315,"""In absence of site-level monitoring emission ...",2023-Sustainability-Performance_data.xlsx,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,Unspecified,CMPf3265e9fca
1,ENV-1f126a43-2023-2,2023,Site-specific,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a43,-,,mining,"Underground, concentrator",...,-,NOx,t,4802,"""In absence of site-level monitoring emission ...",2023-Sustainability-Performance_data.xlsx,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB515573c582,Unspecified,CMPf3265e9fca
2,ENV-e7e6a960-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",...,-,SOx,t,486,"""In absence of site-level monitoring emission ...",2023-Sustainability-Performance_data.xlsx,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,Unspecified,CMPf3265e9fca
3,ENV-e7e6a960-2023-2,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",...,-,NOx,t,7414,"""In absence of site-level monitoring emission ...",2023-Sustainability-Performance_data.xlsx,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB515573c582,Unspecified,CMPf3265e9fca
4,ENV-8b0264c9-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,Goldex,NU-MAIN-8b0264c9,-,,mining,"Underground, concentrator",...,-,SOx,t,38,"""In absence of site-level monitoring emission ...",2023-Sustainability-Performance_data.xlsx,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,Unspecified,CMPf3265e9fca
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,ENV-9de9bb0d-2023-19,2023,Site-specific,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,,mining,"Underground, concentrator",...,Total water discharged,-,m3,324347,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,Unspecified,CMP68bd334352
746,ENV-c5fefb01-2023-16,2023,Site-specific,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,"Open-pit, concentrator",...,Water withdrawal,-,m3,168115,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,Unspecified,CMP68bd334352
747,ENV-c5fefb01-2023-17,2023,Site-specific,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,"Open-pit, concentrator",...,Water withdrawal|Freshwater withdrawn,-,m3,168115,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,Unspecified,CMP68bd334352
748,ENV-c5fefb01-2023-18,2023,Site-specific,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,"Open-pit, concentrator",...,Total water consumption,-,m3,82144,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,Unspecified,CMP68bd334352


In [97]:
env_df.columns

Index(['row_id', 'year', 'reporting_level', 'reported_company_nrcan', 'name',
       'main_id', 'facility_group_id', 'company_id', 'facility_type',
       'mining_processing_type', 'commodity', 'compartment', 'flow_type',
       'subflow_type', 'substance_name', 'unit', 'value', 'comment', 'source',
       'source_id', 'substance_id', 'compartment_pathway', 'compartment_id'],
      dtype='object')

In [98]:
env_df.rename(columns={'row_id': 'env_id'}, inplace=True)
env_df.drop(columns=['reporting_level', 'reported_company_nrcan', 'name', 'facility_type', 'mining_processing_type', 'commodity','compartment', 'flow_type','subflow_type', 'substance_name', 'source', 'compartment_pathway'], inplace=True)

In [99]:
env_df

Unnamed: 0,env_id,year,main_id,facility_group_id,company_id,unit,value,comment,source_id,substance_id,compartment_id
0,ENV-1f126a43-2023-1,2023,ON-MAIN-1f126a43,-,,t,315,"""In absence of site-level monitoring emission ...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,CMPf3265e9fca
1,ENV-1f126a43-2023-2,2023,ON-MAIN-1f126a43,-,,t,4802,"""In absence of site-level monitoring emission ...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB515573c582,CMPf3265e9fca
2,ENV-e7e6a960-2023-1,2023,QC-MAIN-e7e6a960,-,,t,486,"""In absence of site-level monitoring emission ...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,CMPf3265e9fca
3,ENV-e7e6a960-2023-2,2023,QC-MAIN-e7e6a960,-,,t,7414,"""In absence of site-level monitoring emission ...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB515573c582,CMPf3265e9fca
4,ENV-8b0264c9-2023-1,2023,NU-MAIN-8b0264c9,-,,t,38,"""In absence of site-level monitoring emission ...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...,SUB2c1715b8f1,CMPf3265e9fca
...,...,...,...,...,...,...,...,...,...,...,...
745,ENV-9de9bb0d-2023-19,2023,QC-MAIN-9de9bb0d,-,,m3,324347,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352
746,ENV-c5fefb01-2023-16,2023,ON-MAIN-c5fefb01,-,,m3,168115,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352
747,ENV-c5fefb01-2023-17,2023,ON-MAIN-c5fefb01,-,,m3,168115,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352
748,ENV-c5fefb01-2023-18,2023,ON-MAIN-c5fefb01,-,,m3,82144,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352


### Concatenate all environment data into a single table

In [100]:
env_table = pd.concat(
    [ctrace_ghg_gdf, pollutant_gdf, ghg_gdf, env_df],
    ignore_index=True
)

In [101]:
env_table

Unnamed: 0,env_id,year,value,unit,source_id,substance_id,compartment_id,main_id,flow_type,facility_group_id,company_id,comment
0,1754055,2023,5610.000024,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-6e9b2449,ghg,,,
1,1754056,2023,6403.000027,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-63b394c3,ghg,,,
2,1754057,2023,229916.000984,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-599152a0,ghg,,,
3,1754058,2023,2639.000011,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-52224e1e,ghg,,,
4,1754060,2023,4225.000018,co2e_100yr,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-48fe2205,ghg,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
14520,ENV-9de9bb0d-2023-19,2023,324347,m3,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352,QC-MAIN-9de9bb0d,,-,,
14521,ENV-c5fefb01-2023-16,2023,168115,m3,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352,ON-MAIN-c5fefb01,,-,,
14522,ENV-c5fefb01-2023-17,2023,168115,m3,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352,ON-MAIN-c5fefb01,,-,,
14523,ENV-c5fefb01-2023-18,2023,82144,m3,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables,,CMP68bd334352,ON-MAIN-c5fefb01,,-,,


In [102]:
env_table.columns

Index(['env_id', 'year', 'value', 'unit', 'source_id', 'substance_id',
       'compartment_id', 'main_id', 'flow_type', 'facility_group_id',
       'company_id', 'comment'],
      dtype='object')

In [103]:
column_order = ['env_id', 'year', 'compartment_id', 'flow_type', 'substance_id', 'unit', 'value', 'comment', 'main_id','facility_group_id', 'company_id', 'source_id']

In [104]:
env_table = env_table[column_order]

In [105]:
env_table

Unnamed: 0,env_id,year,compartment_id,flow_type,substance_id,unit,value,comment,main_id,facility_group_id,company_id,source_id
0,1754055,2023,CMPf3265e9fca,ghg,,co2e_100yr,5610.000024,,ON-MAIN-6e9b2449,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
1,1754056,2023,CMPf3265e9fca,ghg,,co2e_100yr,6403.000027,,ON-MAIN-63b394c3,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
2,1754057,2023,CMPf3265e9fca,ghg,,co2e_100yr,229916.000984,,BC-MAIN-599152a0,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
3,1754058,2023,CMPf3265e9fca,ghg,,co2e_100yr,2639.000011,,ON-MAIN-52224e1e,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
4,1754060,2023,CMPf3265e9fca,ghg,,co2e_100yr,4225.000018,,ON-MAIN-48fe2205,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
...,...,...,...,...,...,...,...,...,...,...,...,...
14520,ENV-9de9bb0d-2023-19,2023,CMP68bd334352,,,m3,324347,,QC-MAIN-9de9bb0d,-,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
14521,ENV-c5fefb01-2023-16,2023,CMP68bd334352,,,m3,168115,,ON-MAIN-c5fefb01,-,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
14522,ENV-c5fefb01-2023-17,2023,CMP68bd334352,,,m3,168115,,ON-MAIN-c5fefb01,-,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
14523,ENV-c5fefb01-2023-18,2023,CMP68bd334352,,,m3,82144,,ON-MAIN-c5fefb01,-,,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables


## Production table

### Manually collected data

In [106]:
production_df

Unnamed: 0,row_id,year,level,geography,reported_company_nrcan,name,main_id,facility_group_id,company_id,facility_type,...,commodities_nrcan,commodity,reference_point,material_type,data_type,unit,value,comment,source,source_id
0,PROD-1f126a43-2023-1,2023,Site-specific,Canada,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a43,-,,mining,...,"Gold, silver",Ore,2,Ore processed,Production,t,1.959493e+07,,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
1,PROD-1f126a44-2023-1,2023,Site-specific,Canada,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a44,-,,mining,...,"Gold, silver",Gold,4,Intermediate metal,Production,oz,6.846400e+05,"""Production reflects Agnico Eagle's 50% intere...",2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
2,PROD-1f126a45-2023-1,2023,Site-specific,Canada,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a45,-,,mining,...,"Gold, silver",Silver,4,Intermediate metal,Production,koz,3.100000e+02,"""Production reflects Agnico Eagle's 50% intere...",2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
3,PROD-e7e6a960-2023-1,2023,Site-specific,Canada,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,...,Gold,Gold,2,Ore processed,Production,t,2.543485e+07,,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
4,PROD-e7e6a963-2023-1,2023,Site-specific,Canada,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a963,-,,mining,...,Gold,Gold,4,Intermediate metal,Production,oz,6.774460e+05,Like intermediate metal,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,PROD-c5fefb01-2023-1,2023,Site-specific,Canada,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,...,Gold,Gold,2,Head grade,Technical attribute,g/t,5.900000e+00,,2024-MDA-FINAL,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
339,PROD-c5fefb01-2023-2,2023,Site-specific,Canada,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,...,Gold,Gold,2,Recovery rate,Technical attribute,%,9.830000e+01,,2024-MDA-FINAL,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
340,PROD-c5fefb01-2023-3,2023,Site-specific,Canada,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,...,Gold,Gold,4,Intermediate metal,Production,Oz,3.553700e+04,,2024-MDA-FINAL,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
341,PROD-c5fefb01-2023-4,2023,Site-specific,Canada,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,,mining,...,Gold,Gold,2,Ore milled,Production,t,1.911480e+05,,2024-MDA-FINAL,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL


In [107]:
production_df.rename(columns={'row_id': 'prod_id'}, inplace=True)
production_df.drop(columns=['level', 'reported_company_nrcan', 'name', 
                            'facility_type', 'mining_processing_type', 'commodities_nrcan', 'source'], inplace=True)

In [108]:
production_df

Unnamed: 0,prod_id,year,geography,main_id,facility_group_id,company_id,commodity,reference_point,material_type,data_type,unit,value,comment,source_id
0,PROD-1f126a43-2023-1,2023,Canada,ON-MAIN-1f126a43,-,,Ore,2,Ore processed,Production,t,1.959493e+07,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
1,PROD-1f126a44-2023-1,2023,Canada,ON-MAIN-1f126a44,-,,Gold,4,Intermediate metal,Production,oz,6.846400e+05,"""Production reflects Agnico Eagle's 50% intere...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
2,PROD-1f126a45-2023-1,2023,Canada,ON-MAIN-1f126a45,-,,Silver,4,Intermediate metal,Production,koz,3.100000e+02,"""Production reflects Agnico Eagle's 50% intere...",SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
3,PROD-e7e6a960-2023-1,2023,Canada,QC-MAIN-e7e6a960,-,,Gold,2,Ore processed,Production,t,2.543485e+07,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
4,PROD-e7e6a963-2023-1,2023,Canada,QC-MAIN-e7e6a963,-,,Gold,4,Intermediate metal,Production,oz,6.774460e+05,Like intermediate metal,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,PROD-c5fefb01-2023-1,2023,Canada,ON-MAIN-c5fefb01,-,,Gold,2,Head grade,Technical attribute,g/t,5.900000e+00,,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
339,PROD-c5fefb01-2023-2,2023,Canada,ON-MAIN-c5fefb01,-,,Gold,2,Recovery rate,Technical attribute,%,9.830000e+01,,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
340,PROD-c5fefb01-2023-3,2023,Canada,ON-MAIN-c5fefb01,-,,Gold,4,Intermediate metal,Production,Oz,3.553700e+04,,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL
341,PROD-c5fefb01-2023-4,2023,Canada,ON-MAIN-c5fefb01,-,,Gold,2,Ore milled,Production,t,1.911480e+05,,SRC_WesdomeGoldMinesLtd_2024-MDA-FINAL


In [109]:
unit_conversion = {
    "t": 1,
    "kt": 1_000,
    "mt": 1_000_000,
    "kg": 1e-3,
    "g": 1e-6,
    "oz": 0.0000311035,
    "koz": 0.0311035,
    "moz": 31.1035,
    "oz au": 0.0000311035,
    "oz au eq": 0.0000311035,
    "mlbs": 453.592 / 1_000 * 1_000_000,
    "klbs": 453.592 / 1_000 * 1_000,
    "million lbs": 453.592 / 1_000 * 1_000_000,
    "lb": 0.000453592,
    "wmt": 1,
    "dmt": 1,
    "mwmt": 1_000_000,
    "mdmt": 1_000_000,
    "kct": 2e-7,
    "mcts": 2e-4,
}


In [110]:
# Apply a function to convert the 'value' column to tonnes
# But we need to do that only for the 'Production' data_type and don't do it for any other data_type
production_df['value_tonnes'] = production_df.apply(
    lambda row: row['value'] * unit_conversion.get(row['unit'], 1) if row['data_type'] == 'Production' else np.nan, 
    axis=1
)

### CTrace dataset

In [111]:
ctrace_prod_gdf

Unnamed: 0,ctrace_id,year,activity,activity_units,capacity,capacity_units,source_id,substance_id,compartment_id,main_id
6,1754055,2023,863000.0,t of copper ore,4624000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-6e9b2449
7,1754056,2023,985000.0,t of copper ore,35547000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-63b394c3
8,1754057,2023,14575000.0,t of copper ore,367000000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-599152a0
9,1754058,2023,406000.0,t of copper ore,11041000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-52224e1e
11,1754060,2023,650000.0,t of copper ore,4822000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-48fe2205
12,1754061,2023,88100000.0,t of copper ore,645000000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-6b4800fe
16,1754066,2023,72886000.0,t of copper ore,263100000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,BC-MAIN-bf503b6b
17,1754067,2023,0.0,t of copper ore,87879590.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-30c1828c
25,1754077,2023,1501481.0,t of copper ore,19882000.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,QC-MAIN-e51eda66
29,1754081,2023,527971.0,t of copper ore,87879590.0,tonnes,"Jolleys, M. et al (2024). Mineral Extraction s...",,CMPf3265e9fca,ON-MAIN-7001a391


In [112]:
# Save units before melting
activity_units = ctrace_prod_gdf['activity_units']
capacity_units = ctrace_prod_gdf['capacity_units']

# Melt
ctrace_prod_gdf = pd.melt(
    ctrace_prod_gdf,
    id_vars=['ctrace_id', 'year', 'source_id', 'main_id'],
    value_vars=['activity', 'capacity'],
    var_name='data_type',
    value_name='value'
)

# Set data_type labels
ctrace_prod_gdf['data_type'] = ctrace_prod_gdf['data_type'].replace({
    'activity': 'Production',
    'capacity': 'Capacity'
})

# Concatenate units in correct order (activity first, then capacity)
ctrace_prod_gdf['unit'] = list(activity_units) + list(capacity_units)

In [113]:
ctrace_prod_gdf

Unnamed: 0,ctrace_id,year,source_id,main_id,data_type,value,unit
0,1754055,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",ON-MAIN-6e9b2449,Production,863000.0,t of copper ore
1,1754056,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",ON-MAIN-63b394c3,Production,985000.0,t of copper ore
2,1754057,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",BC-MAIN-599152a0,Production,14575000.0,t of copper ore
3,1754058,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",ON-MAIN-52224e1e,Production,406000.0,t of copper ore
4,1754060,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",ON-MAIN-48fe2205,Production,650000.0,t of copper ore
5,1754061,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",BC-MAIN-6b4800fe,Production,88100000.0,t of copper ore
6,1754066,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",BC-MAIN-bf503b6b,Production,72886000.0,t of copper ore
7,1754067,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",QC-MAIN-30c1828c,Production,0.0,t of copper ore
8,1754077,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",QC-MAIN-e51eda66,Production,1501481.0,t of copper ore
9,1754081,2023,"Jolleys, M. et al (2024). Mineral Extraction s...",ON-MAIN-7001a391,Production,527971.0,t of copper ore


In [114]:
ctrace_prod_gdf.rename(columns={'ctrace_id': 'prod_id'}, inplace=True)

In [115]:
# We add the missing columns
ctrace_prod_gdf['geography'] = 'Canada'
ctrace_prod_gdf['reference_point'] = 'Ore processed'
ctrace_prod_gdf['value_tonnes'] = ctrace_prod_gdf['value'] # they are always in tonnes

### Concatenate all production data into a single table

In [116]:
# Concatenate production and ctrace data 
production_table = pd.concat([production_df, ctrace_prod_gdf], ignore_index=True)

In [117]:
production_table.columns

Index(['prod_id', 'year', 'geography', 'main_id', 'facility_group_id',
       'company_id', 'commodity', 'reference_point', 'material_type',
       'data_type', 'unit', 'value', 'comment', 'source_id', 'value_tonnes'],
      dtype='object')

In [118]:
column_order = ['prod_id', 'year', 'geography', 
                'commodity', 'reference_point', 'material_type', 'data_type', 
                'unit', 'value', 'value_tonnes', 'comment', 
                'main_id', 'facility_group_id','company_id', 'source_id']

In [119]:
production_table = production_table[column_order]

In [120]:
production_table

Unnamed: 0,prod_id,year,geography,commodity,reference_point,material_type,data_type,unit,value,value_tonnes,comment,main_id,facility_group_id,company_id,source_id
0,PROD-1f126a43-2023-1,2023,Canada,Ore,2,Ore processed,Production,t,1.959493e+07,1.959493e+07,,ON-MAIN-1f126a43,-,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
1,PROD-1f126a44-2023-1,2023,Canada,Gold,4,Intermediate metal,Production,oz,6.846400e+05,2.129470e+01,"""Production reflects Agnico Eagle's 50% intere...",ON-MAIN-1f126a44,-,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
2,PROD-1f126a45-2023-1,2023,Canada,Silver,4,Intermediate metal,Production,koz,3.100000e+02,9.642085e+00,"""Production reflects Agnico Eagle's 50% intere...",ON-MAIN-1f126a45,-,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
3,PROD-e7e6a960-2023-1,2023,Canada,Gold,2,Ore processed,Production,t,2.543485e+07,2.543485e+07,,QC-MAIN-e7e6a960,-,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
4,PROD-e7e6a963-2023-1,2023,Canada,Gold,4,Intermediate metal,Production,oz,6.774460e+05,2.107094e+01,Like intermediate metal,QC-MAIN-e7e6a963,-,,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,1754104,2023,Canada,,Ore processed,,Capacity,tonnes,2.453000e+06,2.453000e+06,,ON-MAIN-28f3f0fc,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
385,1754105,2023,Canada,,Ore processed,,Capacity,tonnes,8.787959e+07,8.787959e+07,,QC-MAIN-b2148882,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
386,25486533,2023,Canada,,Ore processed,,Capacity,tonnes,2.500000e+06,2.500000e+06,,ON-MAIN-f8313ebd,,,"Jolleys, M. et al (2024). Mineral Extraction s..."
387,25486534,2023,Canada,,Ore processed,,Capacity,tonnes,2.200000e+07,2.200000e+07,,ON-MAIN-a3c56a83,,,"Jolleys, M. et al (2024). Mineral Extraction s..."


## Protected & Indigenous table

In [121]:
protected_mapping = {
    "WDPA_PID": "protected_area_id",
    "NAME": "land_name",
    "DESIG": "land_type",
    "STATUS_YR": "date"
}

indigenous_mapping = {
    "indigenous_land_id": "protected_area_id",
    "Name": "land_name",
    "Category": "land_type",
    "Data_Date": "date"
}

In [122]:
protected_clean = wpda_gdf_polygons.rename(columns=protected_mapping)[["protected_area_id", "land_name", "land_type", "distance_km", "geometry", "main_id", "source_id"]].copy()

In [123]:
indigenous_clean = indigenous_can_gdf.rename(columns=indigenous_mapping)[["protected_area_id", "land_name", "land_type", "distance_km", "geometry", "main_id", "source_id"]].copy()

In [124]:
target_crs = "EPSG:4326"
protected_clean = protected_clean.to_crs(target_crs)
indigenous_clean = indigenous_clean.to_crs(target_crs)

In [125]:
protected_land_table = pd.concat([protected_clean, indigenous_clean], ignore_index=True)
protected_land_table = gpd.GeoDataFrame(protected_land_table, geometry="geometry", crs=wpda_gdf_polygons.crs)

In [126]:
protected_land_table

Unnamed: 0,protected_area_id,land_name,land_type,distance_km,geometry,main_id,source_id
0,611_B,Wood Buffalo National Park Of Canada,National Park,27.397,"POLYGON ((-112.87253 59.99877, -115.58450 59.9...",NT-MAIN-b99d4cb2,https://www.protectedplanet.net
1,615,Banff National Park Of Canada,National Park,2.153,"POLYGON ((-117.30592 52.07417, -115.17460 51.2...",AB-MAIN-8434dce0,https://www.protectedplanet.net
2,616,Nahanni National Park Reserve Of Canada,National Park,0.000,"MULTIPOLYGON (((-124.02111 60.89056, -128.7980...",NT-MAIN-b3681e03,https://www.protectedplanet.net
3,616,Nahanni National Park Reserve Of Canada,National Park,31.820,"MULTIPOLYGON (((-124.02111 60.89056, -128.7980...",YT-MAIN-03a159d7,https://www.protectedplanet.net
4,620,Pukaskwa National Park Of Canada,National Park,19.001,"MULTIPOLYGON (((-86.25133 48.57798, -85.62889 ...",ON-MAIN-6e9be24e,https://www.protectedplanet.net
...,...,...,...,...,...,...,...
8790,ZZ-IND-40a2ea30,Six Nations Indian Reserve No. 40,Indian Reserve,25.954,"MULTIPOLYGON Z (((-79.99308 43.08630 0.00003, ...",ON-MAIN-020a683c,http://www.landmarkmap.org
8791,ZZ-IND-40a2ea30,Six Nations Indian Reserve No. 40,Indian Reserve,20.924,"MULTIPOLYGON Z (((-79.99308 43.08630 0.00003, ...",ON-MAIN-b2e5c9bb,http://www.landmarkmap.org
8792,ZZ-IND-4e299f50,Mashteuiatsh,Indian Reserve,39.343,"MULTIPOLYGON Z (((-72.27116 48.58552 0.00003, ...",QC-MAIN-3aa117d8,http://www.landmarkmap.org
8793,ZZ-IND-74ac0e34,Nisga'a,First Nations Treaty Lands,16.458,"MULTIPOLYGON Z (((-129.07569 55.20853 0.00003,...",BC-MAIN-3ef4f421,http://www.landmarkmap.org


## Land cover table

In [127]:
land_cover_table = land_cover_gdf.copy()
land_cover_table['npv_biome_type'] = npv_gdf['biome_type']
land_cover_table['source_id'] = land_cover_table['source_id'].astype(str) + ';' + npv_gdf['source_id'].astype(str)

In [128]:
from db_creation_function import assign_row_id

In [129]:
land_cover_table = assign_row_id(land_cover_table,
                                 facility_id_col='main_id', 
                                 prefix='land_cover_id')

In [130]:
land_cover_table.drop(columns=['name', 'geometry'], inplace=True)
land_cover_table.rename(columns={'row_id': 'land_cover_id'}, inplace=True)
land_cover_table = land_cover_table[['land_cover_id', 'year', 'modis_land_cover',
       'esa_land_cover', 'npv_biome_type', 'main_id', 'source_id']]

In [131]:
land_cover_table

Unnamed: 0,land_cover_id,year,modis_land_cover,esa_land_cover,npv_biome_type,main_id,source_id
0,land_cover_id-5801b453-1,2021,Woody Savannas,Built-up,,QC-MAIN-5801b453,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
1,land_cover_id-d29e0839-1,2021,Woody Savannas,Bare/Sparse Vegetation,Cool Mixed Forest,QC-MAIN-d29e0839,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
2,land_cover_id-f1ff4920-1,2021,Woody Savannas,Tree Cover,Cool Mixed Forest,QC-MAIN-f1ff4920,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
3,land_cover_id-4ea8ac9d-1,2021,Grasslands,Moss & Lichen,,NU-MAIN-4ea8ac9d,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
4,land_cover_id-e7e6a960-1,2021,Grasslands,Bare/Sparse Vegetation,Cool Mixed Forest,QC-MAIN-e7e6a960,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
...,...,...,...,...,...,...,...
265,land_cover_id-c5fefb01-1,2021,Woody Savannas,Permanent Water Bodies,Cool Mixed Forest,ON-MAIN-c5fefb01,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
266,land_cover_id-feb13c30-1,2021,Woody Savannas,Tree Cover,Cool Evergreen Needleleaf Forest,BC-MAIN-feb13c30,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
267,land_cover_id-5e0d8b48-1,2021,Open Shrublands,Grassland,Cold Evergreen Needleleaf Forest,YT-MAIN-5e0d8b48,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...
268,land_cover_id-12c68d49-1,2021,Woody Savannas,Grassland,Cool Mixed Forest,ON-MAIN-12c68d49,https://doi.org/10.5067/MODIS/MCD12Q1.061 + ht...


## Environmental intensity table

In [132]:
env_int_df

Unnamed: 0,row_id,year,reporting_level,reported_company_nrcan,name,main_id,facility_group_id,company_id,facility_type,mining_processing_type,commodity,compartment,flow_type,subflow_type,substance,unit,value,comment,source,source_id
0,ENV_INT-GRP-0a2c0d69-2023-1,2023,Facility-group,Agnico Eagle Mines Limited,Meadowbank,-,GRP-0a2c0d69,,mining,"Open-pit, underground, concentrator",Gold,Water,water,Freshwater withdrawn for use intensity,-,m3 of water/t of ore processed,0.252115,,Agnico-Eagle-2023-Sustainability-Performance-D...,SRC_AgnicoEagleMinesLimited_Agnico-Eagle-2023-...
1,ENV_INT-GRP-0a2c0d69-2023-2,2023,Facility-group,Agnico Eagle Mines Limited,Meadowbank,-,GRP-0a2c0d69,,mining,"Open-pit, underground, concentrator",Gold,Water,water,Freshwater withdrawn for use intensity,-,m3 of water/oz of gold,2.244304,,Agnico-Eagle-2023-Sustainability-Performance-D...,SRC_AgnicoEagleMinesLimited_Agnico-Eagle-2023-...
2,ENV_INT-4ea8ac9d-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,LaRonde,NU-MAIN-4ea8ac9d,-,,mining,"Open-pit, underground, concentrator","Gold, zinc, copper, silver, cadmium",Water,water,Freshwater withdrawn for use intensity,-,m3 of water/t of ore processed,0.559341,,Agnico-Eagle-2023-Sustainability-Performance-D...,SRC_AgnicoEagleMinesLimited_Agnico-Eagle-2023-...
3,ENV_INT-4ea8ac9d-2023-2,2023,Site-specific,Agnico Eagle Mines Limited,LaRonde,NU-MAIN-4ea8ac9d,-,,mining,"Open-pit, underground, concentrator","Gold, zinc, copper, silver, cadmium",Water,water,Freshwater withdrawn for use intensity,-,m3 of water/oz of gold,4.849037,,Agnico-Eagle-2023-Sustainability-Performance-D...,SRC_AgnicoEagleMinesLimited_Agnico-Eagle-2023-...
4,ENV_INT-730aefe3-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,Macassa,NU-MAIN-730aefe3,-,,mining,Concentrator,"Gold, silver",Water,water,Freshwater withdrawn for use intensity,-,m3 of water/t of ore processed,0.437068,,Agnico-Eagle-2023-Sustainability-Performance-D...,SRC_AgnicoEagleMinesLimited_Agnico-Eagle-2023-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,ENV_INT-9de9bb0d-2023-2,2023,Site-specific,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,,mining,"Underground, concentrator",Gold,Air,ghg,scope1+2,-,tCO2eq/oz produced,0.170000,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
92,ENV_INT-9de9bb0d-2023-3,2023,Site-specific,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,,mining,"Underground, concentrator",Gold,Water,water,Water intensity,-,m3/t milled,0.130000,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
93,ENV_INT-9de9bb0d-2023-4,2023,Site-specific,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,,mining,"Underground, concentrator",Gold,Water,water,Water intensity,-,m3/oz produced,0.330000,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables
94,ENV_INT-9de9bb0d-2023-5,2023,Site-specific,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,,mining,"Underground, concentrator",Gold,Air,ghg,scope1+2,-,tCO2eq/t milled,0.070000,,2023-ESG-Data-Tables.xlsx,SRC_WesdomeGoldMinesLtd_2023-ESG-Data-Tables


## Reserves and resources table

In [137]:
reserves_df

Unnamed: 0,row_id,year,reporting_level,reported_company_nrcan,name,main_id,facility_group_id,facility_type,mining_processing_type,commodity,...,ore_unit,grade,grade_unit,metal_content,metal_content_unit,norm,recovery_rate,comment,source,Unnamed: 21
0,RES-d29e0839-2022-1,2022,Site-specific,Abcourt Mines Inc,Sleeping Giant,QC-MAIN-d29e0839,-,mining,Concentrator,Gold,...,Mt,7.14,g/t,173300.0,oz,,,,TR_Abcourt_Geant_Dormant_2023,
1,RES-d29e0839-2022-2,2022,Site-specific,Abcourt Mines Inc,Sleeping Giant,QC-MAIN-d29e0839,-,mining,Concentrator,Gold,...,Mt,8.74,g/t,248300.0,oz,,,,TR_Abcourt_Geant_Dormant_2023,
2,RES-e7e6a960-2024-1,2024,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,mining,"Open-pit, concentrator",Gold,...,kt,0.81,g/t,3333.0,kOz Au,,92.0,,2024_MRMR,
3,RES-e7e6a960-2024-2,2024,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,mining,"Open-pit, concentrator",Gold,...,kt,0.73,g/t,15718.0,kOz Au,,92.0,,2024_MRMR,
4,RES-e7e6a960-2024-3,2024,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,mining,"Open-pit, concentrator",Gold,...,kt,1.1,g/t,1201.0,kOz Au,,,,2024_MRMR,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,RES-dd723db4-2023-12,2023,Site-specific,Iron Ore Company of Canada Inc.,Carol Lake,NL-MAIN-dd723db4,-,mining,"Open-pit, concentrator",Phosphorus,...,,0.03,% P,,,,,Reported by Rio Tinto,rt-fact-book-addendum.xlsx,
390,RES-dd723db4-2023-13,2023,Site-specific,Iron Ore Company of Canada Inc.,Carol Lake,NL-MAIN-dd723db4,-,mining,"Open-pit, concentrator",Iron,...,,38.2,% Fe,,,,,Reported by Rio Tinto,rt-fact-book-addendum.xlsx,
391,RES-dd723db4-2023-14,2023,Site-specific,Iron Ore Company of Canada Inc.,Carol Lake,NL-MAIN-dd723db4,-,mining,"Open-pit, concentrator",Silicon dioxide,...,,37.8,%SiO2,,,,,Reported by Rio Tinto,rt-fact-book-addendum.xlsx,
392,RES-dd723db4-2023-15,2023,Site-specific,Iron Ore Company of Canada Inc.,Carol Lake,NL-MAIN-dd723db4,-,mining,"Open-pit, concentrator",Aluminum oxide,...,,0.2,% Al2O3,,,,,Reported by Rio Tinto,rt-fact-book-addendum.xlsx,


## Energy table

In [253]:
energy_df

Unnamed: 0,row_id,year,reporting_level,reported_company_nrcan,name,main_id,facility_group_id,company_id,facility_type,mining_processing_type,commodity,energy_type,unit,value,source,source_id
0,NRJ-e7e6a960-2023-1,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",Gold,Fuel consumption,GJ,4266454,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
1,NRJ-e7e6a960-2023-2,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",Gold,Diesel,GJ,3947008,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
2,NRJ-e7e6a960-2023-3,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",Gold,Light Fuel & Gasoline,GJ,71904,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
3,NRJ-e7e6a960-2023-4,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",Gold,Propane,GJ,182468,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
4,NRJ-e7e6a960-2023-5,2023,Site-specific,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,,mining,"Open-pit, concentrator",Gold,Explosives,GJ,65074,2023-Sustainability-Performance_data,SRC_AgnicoEagleMinesLimited_2023-Sustainabilit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,NRJ-c5fefb01-2023-7,2023,Site-specific,Wesdome Gold Mines Ltd.,Kiena,ON-MAIN-c5fefb01,-,,mining,"Open-pit, concentrator",Gold,Energy intensity,GJ/t milled,1.07,ESG_data_2023.xlsx,SRC_WesdomeGoldMinesLtd._ESG_data_2023
265,NRJ-c5fefb01-2023-8,2023,Site-specific,Wesdome Gold Mines Ltd.,Kiena,ON-MAIN-c5fefb01,-,,mining,"Open-pit, concentrator",Gold,Energy intensity,GJ/oz,5.78,ESG_data_2023.xlsx,SRC_WesdomeGoldMinesLtd._ESG_data_2023
266,NRJ-CMP-d5f1c66b-2023-18,2023,Company-level,Glencore,-,-,-,CMP-d5f1c66b,mining and manufacturing,x,x,Energy intensity of metals mining,GJ/t Cu-eq,40.3,2024+ESG+data+book.xlsx,SRC_Glencore_2024+ESG+data+book
267,NRJ-CMP-d5f1c66b-2023-19,2023,Company-level,Glencore,-,-,-,CMP-d5f1c66b,mining and manufacturing,x,x,Energy intensity of metals smelting,GJ/t Cu-eq,30.3,2024+ESG+data+book.xlsx,SRC_Glencore_2024+ESG+data+book


## Tailings table

In [254]:
tailings_gdf

Unnamed: 0,row_id,main_id,year,tailing_id,tsf_name,status,construction_year,raise_type,current_maximum_height,current_tailings_storage,planned_storage_5_years,hazard_categorization,classification_system,geometry,source_id
0,TAIL-12.0-2019-1,NU-MAIN-730aefe3,2019,12.0,North Cell,Active,2009.0,"Downstream, Upstream",31.0,14400000,14400000,Medium,Canadian Dam Association,POINT (-96.04333 65.02156),https://tailing.grida.no/
1,TAIL-13.0-2019-1,NU-MAIN-730aefe3,2019,13.0,South Cell,Active,2012.0,Downstream,49.0,10420000,10800000,Medium,Canadian Dam Association,POINT (-96.03339 65.01378),https://tailing.grida.no/
2,TAIL-14.0-2019-1,NU-MAIN-730aefe3,2019,14.0,In pit,Active,2009.0,,,850000,12500000,Low,,POINT (-96.03254 65.00190),https://tailing.grida.no/
3,TAIL-15.0-2019-1,NU-MAIN-8b0264c9,2019,15.0,Meliadine TSF,Active,2019.0,,5.0,89000,4354000,Medium,Canadian Dam Association,POINT (-92.14034 63.02083),https://tailing.grida.no/
4,TAIL-16.0-2019-1,QC-MAIN-c0660aec,2019,16.0,South TSF,Active,2007.0,Not Raised,5.0,1664000,2500000,Medium,Canadian Dam Association,POINT (-77.84198 48.05434),https://tailing.grida.no/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,TAIL-1126.0-2019-1,ON-MAIN-f080c409,2019,1126.0,R1 Tailings,Active,1985.0,Compacted\nOuter Shell,14.0,2900000,4000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
216,TAIL-1127.0-2019-1,ON-MAIN-f080c409,2019,1127.0,R2 Tailings,Active,1985.0,Compacted\nOuter Shell,13.0,18000000,18000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
217,TAIL-1128.0-2019-1,ON-MAIN-f080c409,2019,1128.0,R3 Tailings,Active,1985.0,Compacted\nOuter Shell,27.0,39000000,42000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/
218,TAIL-1129.0-2019-1,ON-MAIN-f080c409,2019,1129.0,R4 Tailings,Active,1990.0,Compacted\nOuter Shell,22.0,59000000,63000000,Extreme,Canadian Dam Association,POINT (-81.14058 46.47142),https://tailing.grida.no/


## Land occupation table

In [255]:
polygon_gdf

Unnamed: 0,tang_id,area_km2,source_id,geometry,main_id,tailing_id,distance_km,relation_type
0,6383,0.026130,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1795479.979 -83564.531 0.000, 1795...",,,,
1,6388,0.671399,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1796123.448 -80625.526 0.000, 1796...",,,,
2,6390,0.050909,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((1796791.634 -80270.697 0.000, 1796...",,,,
3,45954,2.622800,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((2466485.124 185586.547 0.000, 2466...",NS-MAIN-c9c77a59,,0.000000,one-to-many
4,45955,0.441869,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((2465809.682 185862.507 0.000, 2465...",NS-MAIN-c9c77a59,,1.186584,one-to-many
...,...,...,...,...,...,...,...,...
1915,3399,0.541024,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-717210.659 1915423.857 0.000, -71...",,,,
1916,3401,1.053173,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-715648.337 1916385.463 0.000, -71...",,,,
1917,26364,0.079894,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-717332.782 1916999.286 0.000, -71...",,,,
1918,43739,1.519488,https://doi.org/10.1038/s43247-023-00805-6,"POLYGON Z ((-713032.430 1919630.086 0.000, -71...",,,,


## By-product 

## Archetypes table

In [256]:
archetypes_df

Unnamed: 0,row_id,reported_company_nrcan,name,main_id,facility_group_id,facility_type,mining_processing_type,commodities_nrcan,deposit_type,mining_depth,mining_method,processing_method,source,source_id
0,ARCH-d29e0839-1,Abcourt Mines Inc,Sleeping Giant,QC-MAIN-d29e0839,-,mining,Concentrator,"Gold, silver",VMS,,Shrinkage stoping\nRoom-and-pillar\nLonghole s...,,MDO,SRC_AbcourtMinesInc_MDO
1,ARCH-1f126a43-1,Agnico Eagle Mines Limited,Canadian Malartic,ON-MAIN-1f126a43,-,mining,"Underground, concentrator","Gold, silver",Disseminated gold deposit,,"Open pit, conventional truck and shovel; remot...","Conventional crushing, grinding, gravity conce...",TR_2021,SRC_AgnicoEagleMinesLimited_TR_2021
2,ARCH-e7e6a960-1,Agnico Eagle Mines Limited,Detour Lake,QC-MAIN-e7e6a960,-,mining,"Open-pit, concentrator",Gold,Disseminated and structurally controlled lode ...,,"Open pit, conventional truck and shovel method","Crushing, grinding, gravity concentration, cya...",TR_2021,SRC_AgnicoEagleMinesLimited_TR_2021
3,ARCH-8b0264c9-1,Agnico Eagle Mines Limited,Goldex,NU-MAIN-8b0264c9,-,mining,"Underground, concentrator","Gold, silver",Vein / narrow vein\nSkarn\nHydrothermal,,Truck & Shovel / Loader\nLonghole stoping,,MDO,SRC_AgnicoEagleMinesLimited_MDO
4,ARCH-4ea8ac9d-1,Agnico Eagle Mines Limited,LaRonde,NU-MAIN-4ea8ac9d,-,mining,"Open-pit, underground, concentrator","Gold, zinc, copper, silver, cadmium",Polymetallic massive sulphide and lode gold de...,,Longitudinal and transverse longhole open stop...,,TR_2023,SRC_AgnicoEagleMinesLimited_TR_2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,ARCH-63b394c3-1,Vale,Totten,ON-MAIN-63b394c3,-,mining,Refinery,"Nickel, copper, platinum group metals, gold, s...",Magmatic,,Slot-Slash\nMechanized Cut & Fill\nTransverse ...,,MDO,SRC_Vale_MDO
73,ARCH-e05ed9fe-1,Victoria Gold Corporation,Eagle (Dublin Gulch),YT-MAIN-e05ed9fe,-,mining,"Open-pit, concentrator",Gold,Vein / narrow vein\nIntrusion related,,Truck & Shovel / Loader,,MDO,SRC_VictoriaGoldCorporation_MDO
74,ARCH-9de9bb0d-1,Wesdome Gold Mines Ltd,Eagle River,QC-MAIN-9de9bb0d,-,mining,"Underground, concentrator",Gold,Mesothermal\nVein / narrow vein,,Longhole stoping\nAlimak\nSub-level open stopi...,,MDO,SRC_WesdomeGoldMinesLtd_MDO
75,ARCH-c5fefb01-1,Wesdome Gold Mines Ltd,Kiena,ON-MAIN-c5fefb01,-,mining,"Open-pit, concentrator",Gold,Vein / narrow vein\nBreccia pipe / Stockwork\n...,,Longhole stoping\nLongitudinal retreat,,MDO,SRC_WesdomeGoldMinesLtd_MDO


## Water risk table

In [257]:
water_risk_gdf

Unnamed: 0,row_id,main_id,name,indicator,value,year,scenario,source_id,geometry
0,water_risk-5801b453-2020-baseline-1,QC-MAIN-5801b453,Abcourt-Barvue,Water Stress Label,Low (<10%),2020,baseline,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-77.67901 48.52154)
1,water_risk-5801b453-2020-baseline-2,QC-MAIN-5801b453,Abcourt-Barvue,Water Depletion Label,Low (<5%),2020,baseline,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-77.67901 48.52154)
2,water_risk-5801b453-2020-baseline-3,QC-MAIN-5801b453,Abcourt-Barvue,Interannual Variability Label,Medium - High (0.50-0.75),2020,baseline,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-77.67901 48.52154)
3,water_risk-5801b453-2020-baseline-4,QC-MAIN-5801b453,Abcourt-Barvue,Groundwater Table Decline Label,Insignificant Trend,2020,baseline,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-77.67901 48.52154)
4,water_risk-5801b453-2020-baseline-5,QC-MAIN-5801b453,Abcourt-Barvue,Coastal Eutrophication Potential Label,Low (<-5),2020,baseline,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-77.67901 48.52154)
...,...,...,...,...,...,...,...,...,...
8635,water_risk-891d3cb5-2080-pes-2,BC-MAIN-891d3cb5,Akie,Water Depletion Label,Low (<5%),2080,pes,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-124.86599 57.38088)
8636,water_risk-891d3cb5-2080-pes-3,BC-MAIN-891d3cb5,Akie,Interannual Variability Label,Low - Medium (0.25-0.50),2080,pes,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-124.86599 57.38088)
8637,water_risk-891d3cb5-2080-opt-1,BC-MAIN-891d3cb5,Akie,Water Stress Label,Low (<10%),2080,opt,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-124.86599 57.38088)
8638,water_risk-891d3cb5-2080-opt-2,BC-MAIN-891d3cb5,Akie,Water Depletion Label,Low (<5%),2080,opt,https://www.wri.org/data/aqueduct-global-maps-...,POINT (-124.86599 57.38088)


## Climate categories table

In [258]:
climate_category_gdf

Unnamed: 0,row_id,main_id,name,year,scenario,category,source_id,geometry
0,CC-5801b453-1991_2020-historical-1,QC-MAIN-5801b453,Abcourt-Barvue,1991_2020,historical,"Dfb - Cold, no dry season, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-77.67901 48.52154)
1,CC-d29e0839-1991_2020-historical-1,QC-MAIN-d29e0839,Sleeping Giant,1991_2020,historical,"Dfb - Cold, no dry season, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-77.97434 49.13259)
2,CC-f1ff4920-1991_2020-historical-1,QC-MAIN-f1ff4920,Akasaba West,1991_2020,historical,"Dfb - Cold, no dry season, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-77.58000 48.04000)
3,CC-4ea8ac9d-1991_2020-historical-1,NU-MAIN-4ea8ac9d,Amaruq,1991_2020,historical,"Dfc - Cold, no dry season, cold summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-96.69700 65.41500)
4,CC-e7e6a960-1991_2020-historical-1,QC-MAIN-e7e6a960,Canadian Malartic,1991_2020,historical,"Dfb - Cold, no dry season, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-78.13082 48.12222)
...,...,...,...,...,...,...,...,...
2425,CC-c5fefb01-2071_2099-ssp585-1,ON-MAIN-c5fefb01,Mishi,2071_2099,ssp585,"Dfb - Cold, no dry season, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-85.45250 48.11028)
2426,CC-feb13c30-2071_2099-ssp585-1,BC-MAIN-feb13c30,Record Ridge,2071_2099,ssp585,"Dsb - Cold, dry summer, warm summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-117.88400 49.08200)
2427,CC-5e0d8b48-2071_2099-ssp585-1,YT-MAIN-5e0d8b48,Casino,2071_2099,ssp585,"Dfc - Cold, no dry season, cold summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-138.83333 62.73333)
2428,CC-12c68d49-2071_2099-ssp585-1,ON-MAIN-12c68d49,Eagle's Nest,2071_2099,ssp585,"Dfa - Cold, no dry season, hot summer",https://doi.org/10.1038/s41597-023-02549-6,POINT (-86.30380 52.74202)


## Weather table

In [259]:
weather_gdf

Unnamed: 0,row_id,main_id,name,year,variable,value,unit,scenario,source_id,geometry
0,weather-5801b453-2025-ssp126-1,QC-MAIN-5801b453,Abcourt-Barvue,2025,Daily Temperature Range,11.632902,°C,ssp126,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-77.67901 48.52154)
1,weather-5801b453-2030-ssp126-1,QC-MAIN-5801b453,Abcourt-Barvue,2030,Daily Temperature Range,11.683678,°C,ssp126,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-77.67901 48.52154)
2,weather-5801b453-2035-ssp126-1,QC-MAIN-5801b453,Abcourt-Barvue,2035,Daily Temperature Range,11.695585,°C,ssp126,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-77.67901 48.52154)
3,weather-5801b453-2040-ssp126-1,QC-MAIN-5801b453,Abcourt-Barvue,2040,Daily Temperature Range,11.721719,°C,ssp126,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-77.67901 48.52154)
4,weather-5801b453-2045-ssp126-1,QC-MAIN-5801b453,Abcourt-Barvue,2045,Daily Temperature Range,11.564794,°C,ssp126,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-77.67901 48.52154)
...,...,...,...,...,...,...,...,...,...,...
34555,weather-891d3cb5-2080-ssp585-2,BC-MAIN-891d3cb5,Akie,2080,Annual Precipitation,807.612915,mm,ssp585,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-124.86599 57.38088)
34556,weather-891d3cb5-2085-ssp585-2,BC-MAIN-891d3cb5,Akie,2085,Annual Precipitation,790.696106,mm,ssp585,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-124.86599 57.38088)
34557,weather-891d3cb5-2090-ssp585-2,BC-MAIN-891d3cb5,Akie,2090,Annual Precipitation,800.068237,mm,ssp585,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-124.86599 57.38088)
34558,weather-891d3cb5-2095-ssp585-2,BC-MAIN-891d3cb5,Akie,2095,Annual Precipitation,817.156982,mm,ssp585,https://climate-scenarios.canada.ca/?page=CanD...,POINT (-124.86599 57.38088)


## Conflict table

In [260]:
conflict_gdf

Unnamed: 0,ej_atlas_id,main_id,case_name,start_date,end_date,conflict_description,conflict_details,population_affected,conflict_intensity,project_status,source_id
9,4135,NL-MAIN-2d8801d6,"Vale Mining in Voisey's Bay, Labrador, Canada",11/1994,,<p>Voiseyâ€™s Bay nickel mine in Labrador was ...,"Voiseyâ€™s Bay is a â€œ6,000 tonnes-per-day fa...",Rural,"MEDIUM (street protests, visible mobilization)",In operation,http://journals.librarypublishing.arizona.edu/...
10,4145,ON-MAIN-2e13aaa0,Port Colborne Class Action Lawsuit Against Val...,06/2000,04/2012,"<p>In Pearson (later Smith) v. Inco, the large...",Raw materials from Valeâ€™s Sudbury mining ope...,Urban,LOW (some local organising),In operation,http://journals.librarypublishing.arizona.edu/...
15,5599,QC-MAIN-d6748d40,Nouveau Mondeâ€™s Matawinie graphite mine in Q...,01/01/2017,,<p>Five kilometers southwest of the municipali...,The project includes 319 mining concessions (c...,Rural,"MEDIUM (street protests, visible mobilization)",Under construction,http://journals.librarypublishing.arizona.edu/...


## Population table

In [261]:
population_gdf

Unnamed: 0,row_id,main_id,name,year,buffer_size,total_population,source_id,geometry
0,population-5801b453-2025-1,QC-MAIN-5801b453,Abcourt-Barvue,2025,10km,1305.901860,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-77.67901 48.52154)
1,population-5801b453-2025-2,QC-MAIN-5801b453,Abcourt-Barvue,2025,50km,63977.830574,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-77.67901 48.52154)
2,population-5801b453-2030-1,QC-MAIN-5801b453,Abcourt-Barvue,2030,10km,1338.088985,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-77.67901 48.52154)
3,population-5801b453-2030-2,QC-MAIN-5801b453,Abcourt-Barvue,2030,50km,65434.154098,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-77.67901 48.52154)
4,population-d29e0839-2025-1,QC-MAIN-d29e0839,Sleeping Giant,2025,10km,0.000000,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-77.97434 49.13259)
...,...,...,...,...,...,...,...,...
1075,population-12c68d49-2030-2,ON-MAIN-12c68d49,Eagle's Nest,2030,50km,0.000000,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-86.30380 52.74202)
1076,population-891d3cb5-2025-1,BC-MAIN-891d3cb5,Akie,2025,10km,0.000000,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-124.86599 57.38088)
1077,population-891d3cb5-2025-2,BC-MAIN-891d3cb5,Akie,2025,50km,297.622460,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-124.86599 57.38088)
1078,population-891d3cb5-2030-1,BC-MAIN-891d3cb5,Akie,2030,10km,0.000000,https://doi.org/10.2905/2FF68A52-5B5B-4A22-8F4...,POINT (-124.86599 57.38088)


## Peatland table

In [262]:
peatland_gdf

Unnamed: 0,main_id,facility_name,longitude,latitude,peatland_presence,source_id,geometry
0,QC-MAIN-5801b453,Abcourt-Barvue,-77.679010,48.521540,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-77.67901 48.52154)
1,QC-MAIN-d29e0839,Sleeping Giant,-77.974340,49.132590,peat in soil mosaic,https://greifswaldmoor.de/global-peatland-data...,POINT (-77.97434 49.13259)
2,QC-MAIN-f1ff4920,Akasaba West,-77.580000,48.040000,peat dominated,https://greifswaldmoor.de/global-peatland-data...,POINT (-77.58000 48.04000)
3,NU-MAIN-4ea8ac9d,Amaruq,-96.697000,65.415000,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-96.69700 65.41500)
4,QC-MAIN-e7e6a960,Canadian Malartic,-78.130824,48.122223,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-78.13082 48.12222)
...,...,...,...,...,...,...,...
265,ON-MAIN-c5fefb01,Mishi,-85.452500,48.110280,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-85.45250 48.11028)
266,BC-MAIN-feb13c30,Record Ridge,-117.884000,49.082000,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-117.88400 49.08200)
267,YT-MAIN-5e0d8b48,Casino,-138.833330,62.733330,No data,https://greifswaldmoor.de/global-peatland-data...,POINT (-138.83333 62.73333)
268,ON-MAIN-12c68d49,Eagle's Nest,-86.303800,52.742020,peat dominated,https://greifswaldmoor.de/global-peatland-data...,POINT (-86.30380 52.74202)


# Exports 

## Exports tables to CSV

In [264]:
# Export all the tables to CSV
main_gdf.to_csv(r'data/Tables/CSV/main_table.csv', index=False)
production_table.to_csv(r'data/Tables/CSV/production_table.csv', index=False)
env_table.to_csv(r'data/Tables/CSV/env_table.csv', index=False)
land_cover_table.to_csv(r'data/Tables/CSV/land_cover_table.csv', index=False)
env_int_df.to_csv(r'data/Tables/CSV/env_int_table.csv', index=False)
reserves_df.to_csv(r'data/Tables/CSV/reserves_table.csv', index=False)
energy_df.to_csv(r'data/Tables/CSV/energy_table.csv', index=False)
tailings_gdf.to_csv(r'data/Tables/CSV/tailings_table.csv', index=False)
polygon_gdf.to_csv(r'data/Tables/CSV/land_occupation_table.csv', index=False)
archetypes_df.to_csv(r'data/Tables/CSV/archetypes_table.csv', index=False)
water_risk_gdf.to_csv(r'data/Tables/CSV/water_risk_table.csv', index=False)
climate_category_gdf.to_csv(r'data/Tables/CSV/climate_category_table.csv', index=False)
weather_gdf.to_csv(r'data/Tables/CSV/weather_table.csv', index=False)
conflict_gdf.to_csv(r'data/Tables/CSV/conflict_table.csv', index=False)
population_gdf.to_csv(r'data/Tables/CSV/population_table.csv', index=False)
peatland_gdf.to_csv(r'data/Tables/CSV/peatland_table.csv', index=False)

In [139]:
protected_land_table

Unnamed: 0,protected_area_id,land_name,land_type,distance_km,geometry,main_id,source_id
0,611_B,Wood Buffalo National Park Of Canada,National Park,27.397,"POLYGON ((-112.87253 59.99877, -115.58450 59.9...",NT-MAIN-b99d4cb2,https://www.protectedplanet.net
1,615,Banff National Park Of Canada,National Park,2.153,"POLYGON ((-117.30592 52.07417, -115.17460 51.2...",AB-MAIN-8434dce0,https://www.protectedplanet.net
2,616,Nahanni National Park Reserve Of Canada,National Park,0.000,"MULTIPOLYGON (((-124.02111 60.89056, -128.7980...",NT-MAIN-b3681e03,https://www.protectedplanet.net
3,616,Nahanni National Park Reserve Of Canada,National Park,31.820,"MULTIPOLYGON (((-124.02111 60.89056, -128.7980...",YT-MAIN-03a159d7,https://www.protectedplanet.net
4,620,Pukaskwa National Park Of Canada,National Park,19.001,"MULTIPOLYGON (((-86.25133 48.57798, -85.62889 ...",ON-MAIN-6e9be24e,https://www.protectedplanet.net
...,...,...,...,...,...,...,...
8790,ZZ-IND-40a2ea30,Six Nations Indian Reserve No. 40,Indian Reserve,25.954,"MULTIPOLYGON Z (((-79.99308 43.08630 0.00003, ...",ON-MAIN-020a683c,http://www.landmarkmap.org
8791,ZZ-IND-40a2ea30,Six Nations Indian Reserve No. 40,Indian Reserve,20.924,"MULTIPOLYGON Z (((-79.99308 43.08630 0.00003, ...",ON-MAIN-b2e5c9bb,http://www.landmarkmap.org
8792,ZZ-IND-4e299f50,Mashteuiatsh,Indian Reserve,39.343,"MULTIPOLYGON Z (((-72.27116 48.58552 0.00003, ...",QC-MAIN-3aa117d8,http://www.landmarkmap.org
8793,ZZ-IND-74ac0e34,Nisga'a,First Nations Treaty Lands,16.458,"MULTIPOLYGON Z (((-129.07569 55.20853 0.00003,...",BC-MAIN-3ef4f421,http://www.landmarkmap.org


In [151]:
# To avoid issues with large file size, we simplify the geometry of the protected land table before exporting
protected_land_table['geometry'] = protected_land_table['geometry'].simplify(tolerance=10, preserve_topology=True)

In [155]:
protected_land_table.to_parquet(r'data/Tables/CSV/protected_land_table.parquet', index=False)

In [None]:
#or gdf["geometry"] = gdf["geometry"].apply(lambda geom: geom.wkt)

## Export DB to SQLite

In [None]:
from matching_functions import create_and_populate_database

In [37]:
tables_dict = {
    "main": main_gdf,
    "tailings": tailings_gdf,
    "mincan": min_can_gdf,
    "conflict": conflict_gdf,
    "ghg": ghg_gdf,
    "pollution": pollutant_gdf,
    "climate_categories": climate_category_gdf,
    "weather": weather_gdf,
    "peatland": peatland_gdf,
    "population": population_gdf,
    "water_risk": water_risk_gdf,
    "land_cover": land_cover_gdf,
    "natural_potential_vegetation": npv_gdf,
    "indigenous_land": indigenous_can_gdf,
    "protected_land": wpda_gdf_polygons,
    "land_occupation": tang_canada_gdf
}

# Run the function
create_and_populate_database(
    db_path="data/Tables/metallican.sqlite",
    schema_path="schema.sql",
    tables_dict=tables_dict,
    keep_geometry_tables=["main", "tailings"]
)

ℹ️ No old database found at 'data/Tables/metallican.sqlite'
✅ Empty database structure created at 'data/Tables/metallican.sqlite'
🔄 Converting geometries...


  df["geometry"] = df["geometry"].to_wkt()


✅ Geometries handled (kept only where needed)
✅ New connection opened
✅ Inserted 305 rows into 'main'
✅ Inserted 227 rows into 'tailings'
✅ Inserted 939 rows into 'mincan'
✅ Inserted 20 rows into 'conflict'
✅ Inserted 184 rows into 'ghg'
✅ Inserted 24130 rows into 'pollution'
✅ Inserted 2745 rows into 'climate_categories'
✅ Inserted 39040 rows into 'weather'
✅ Inserted 305 rows into 'peatland'
✅ Inserted 1220 rows into 'population'
✅ Inserted 9760 rows into 'water_risk'
✅ Inserted 305 rows into 'land_cover'
✅ Inserted 305 rows into 'natural_potential_vegetation'
✅ Inserted 3258 rows into 'indigenous_land'
✅ Inserted 13468 rows into 'protected_land'
✅ Inserted 1885 rows into 'land_occupation'
✅ No foreign key problems found!
✅ Connection closed properly
