In [2]:
import pandas as pd
import os

# From BGS

In [4]:
df_bgs = pd.read_csv('data/BGS/Minerals_Data_Export_Canada_2020_2023.csv')

In [6]:
col_to_keep = ['yearbook_table_trans', 'units', 'bgs_commodity_trans', 'erml_group', 'erml_commodity', 'year', 'cgi_commodity_url', 'quantity']

In [7]:
df_bgs_can = df_bgs[col_to_keep]

In [8]:
df_bgs_can

Unnamed: 0,yearbook_table_trans,units,bgs_commodity_trans,erml_group,erml_commodity,year,cgi_commodity_url,quantity
0,Mine production of platinum group metals,kilograms (metal content),"platinum group metals, mine",Platinum group metals,"Platinum group metals (mine production, metal ...",2000-01-01,http://resource.geosciml.org/classifier/cgi/co...,6000.0
1,Production of magnesite,tonnes (metric),magnesite,Magnesite,Magnesite and magnesia,2000-01-01,http://resource.geosciml.org/classifier/cgi/co...,165000.0
2,Production of nepheline syenite,tonnes (metric),nepheline syenite,Nepheline syenite,Nepheline syenite,2000-01-01,http://resource.geosciml.org/classifier/cgi/co...,721000.0
3,Production of phosphate rock,tonnes (metric),phosphate rock,Phosphate rock,Phosphate rock,2000-01-01,http://resource.geosciml.org/classifier/cgi/co...,486000.0
4,Production of ferro-alloys,tonnes (metric),ferro-alloys,,,2000-01-01,,3300.0
...,...,...,...,...,...,...,...,...
1284,Production of ferro-alloys,tonnes (metric),ferro-alloys,,,2022-01-01,,7400.0
1285,Mine production of molybdenum,tonnes (metal content),"molybdenum, mine",Molybdenum,"Molybdenum (mine production, metal content)",2022-01-01,http://resource.geosciml.org/classifier/cgi/co...,1146.0
1286,Production of ferro-alloys,tonnes (metric),ferro-alloys,,,2022-01-01,,30000.0
1287,Production of refined copper,tonnes (metric),"copper, refined",Copper,"Copper, refined",2022-01-01,http://resource.geosciml.org/classifier/cgi/co...,278000.0


# From World Mining Data 2024

In [20]:
def get_canada_production_data_wmd(file_path):
    """
    Reads an Excel file with multiple sheets, each representing a commodity.
    Extracts Canada's production, world share, and unit for each commodity, returning a DataFrame.
    Commodities are listed in alphabetical order, with 0s for missing data.
    
    Parameters:
    - file_path (str): Path to the Excel file containing the data.
    
    Returns:
    - pd.DataFrame: DataFrame with columns: ['Commodity', 'Production Volume', 'World Share (%)', 'Unit']
    """
    # Load the Excel file
    excel_data = pd.ExcelFile(file_path)
    
    # Initialize a dictionary to store production volume, world share data, and units for Canada
    canada_production_data = {'Commodity': [], 'Production Volume': [], 'World Share (%)': [], 'Unit': []}
    
    # Loop through each sheet, extract Canada's data if present
    for sheet in excel_data.sheet_names:
        df = excel_data.parse(sheet, skiprows=1)  # Start reading from the second row with corrected column headers
        canada_row = df[df['Country'].str.contains("Canada", na=False)]
        
        # Get the unit if present, default to 'tonnes' if not specified
        unit = df['unit'].iloc[0] if 'unit' in df.columns else 'tonnes'
        
        # Add data or fill with 0 if Canada data is missing
        canada_production_data['Commodity'].append(sheet)
        canada_production_data['Unit'].append(unit)
        if not canada_row.empty:
            canada_production_data['Production Volume'].append(float(canada_row['Production 2022'].values[0]))
            canada_production_data['World Share (%)'].append(float(canada_row['Share in %'].values[0]))
        else:
            canada_production_data['Production Volume'].append(0)
            canada_production_data['World Share (%)'].append(0)

    # Convert the dictionary to a DataFrame and sort alphabetically by Commodity
    canada_production_df = pd.DataFrame(canada_production_data).sort_values(by="Commodity").reset_index(drop=True)
    
    return canada_production_df

In [22]:
file_path_wmd = 'data/WMD/6.5. Share_of_World_Mineral_Production_2022_by_Countries.xlsx'  

In [23]:
# We keep only non 0 production volume
canada_production_df = get_canada_production_data_wmd(file_path_wmd)
canada_production_df = canada_production_df[canada_production_df['Production Volume'] != 0]
canada_production_df

Unnamed: 0,Commodity,Production Volume,World Share (%),Unit
0,Aluminium,3033966.0,4.408193,metr. t
1,Antimony,2.0,0.002409,metr. t
4,Baryte,50000.0,0.603986,metr. t
10,Cadmium,1800.0,7.825066,metr. t
12,Cobalt,3063.0,1.844847,metr. t
13,Coking Coal,26461240.0,2.53975,metr. t
14,Copper,510782.0,2.298189,metr. t
15,Diamonds (Gem),17728144.0,22.325758,ct
19,Fluorspar,20000.0,0.216534,metr. t
22,Gold,205831.0,6.193863,kg


In [None]:
# We put everything in kg
canada_production_df_kg = canada_production_df.copy()
canada_production_df_kg.loc[canada_production_df_kg['Unit'] == 'metr. t', 'Production Volume'] *= 1000
canada_production_df_kg.loc[canada_production_df_kg['Unit'] == 'metr. t', 'Unit'] = 'kg'
canada_production_df_kg.rename(columns={'Production Volume': 'Production Volume (kg)'}, inplace=True)
canada_production_df_kg.drop(columns=['Unit'], inplace=True)
canada_production_df_kg

In [None]:
canada_production_df_kg.to_csv(r'data/World_mining_data_2024/canada_production_wmd2024_kg.csv', index=False)

# From USGS MCS2025

In [14]:
df_usgs_mcs = pd.read_csv('data/USGS/Mineral_commodity_summary_2025/MCS2025_World_Data.csv')

In [15]:
df_usgs_mcs

Unnamed: 0,SOURCE,COMMODITY,COUNTRY,TYPE,UNIT_MEAS,PROD_2023,PROD_EST_ 2024,PROD_NOTES,CAP_2023,CAP_EST_ 2024,CAP_NOTES,RESERVES_2024,RESERVE_NOTES
0,MCS2025,Abrasives,United States,"Plant capacity, fused aluminum oxide",metric tons,,,estimated for 2023,,,,,
1,MCS2025,Abrasives,United States and Canada,"Plant capacity, fused aluminum oxide",metric tons,,,,60000.0,60000.0,,,
2,MCS2025,Abrasives,Australia,"Plant capacity, fused aluminum oxide",metric tons,,,,50000.0,50000.0,,,
3,MCS2025,Abrasives,Austria,"Plant capacity, fused aluminum oxide",metric tons,,,,90000.0,90000.0,,,
4,MCS2025,Abrasives,Brazil,"Plant capacity, fused aluminum oxide",metric tons,,,,50000.0,50000.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1245,MCS2025,Zirconium and Hafnium,Senegal,"Mine production, zirconium, gross weight",thousand metric tons,48.0,60.0,reported,,,,2600,ZrO2 content
1246,MCS2025,Zirconium and Hafnium,Sierra Leone,"Mine production, zirconium, gross weight",thousand metric tons,28.0,20.0,reported,,,,290,ZrO2 content
1247,MCS2025,Zirconium and Hafnium,South Africa,"Mine production, zirconium, gross weight",thousand metric tons,289.0,300.0,reported,,,,5300,ZrO2 content
1248,MCS2025,Zirconium and Hafnium,Other Countries,"Mine production, zirconium, gross weight",thousand metric tons,86.0,110.0,,,,,5700,ZrO2 content


In [18]:
df_usgs_mcs_can = df_usgs_mcs[df_usgs_mcs['COUNTRY'].str.contains('Canada', case=False, na=False)]

In [19]:
df_usgs_mcs_can

Unnamed: 0,SOURCE,COMMODITY,COUNTRY,TYPE,UNIT_MEAS,PROD_2023,PROD_EST_ 2024,PROD_NOTES,CAP_2023,CAP_EST_ 2024,CAP_NOTES,RESERVES_2024,RESERVE_NOTES
1,MCS2025,Abrasives,United States and Canada,"Plant capacity, fused aluminum oxide",metric tons,,,,60000.0,60000.0,,,
16,MCS2025,Abrasives,United States and Canada,"Plant capacity, silicon carbide",metric tons,,,,,,,,
34,MCS2025,Aluminum,Canada,"Smelter production, aluminum",thousand metric tons,3200.0,3300.0,estimated 2023,3270.0,3270.0,,,
48,MCS2025,Antimony,Canada,"Mine production, antimony content",metric tons,,,,,,,78000.0,
92,MCS2025,Bauxite,Canada,"Refinery production, alumina - calcined equiva...",thousand metric dry tons,1500.0,1500.0,,,,,,
113,MCS2025,Bauxite,Canada,"Mine production, bauxite, dry tons",thousand metric dry tons,,,,,,,,
169,MCS2025,Cadmium,Canada,"Refinery production, estimated",metric tons,1800.0,1700.0,,,,,,
265,MCS2025,Cobalt,Canada,"Mine production, cobalt content, estimated",metric tons,4220.0,4500.0,,,,,220000.0,
279,MCS2025,Copper,Canada,"Mine production, recoverable copper content",thousand metric tons,500.0,450.0,,,,,8300.0,
298,MCS2025,Copper,Canada,"Refinery production, copper",thousand metric tons,315.0,320.0,,,,,,
