In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [5]:
production_file_path = r'data/NRCan/Production, shipments and value of shipments of metallic and non-metallic minerals - 16100022-eng/16100022.csv'
energy_use_file_path = r'data/NRCan/Mining industries, energy consumption by NAICS - 16100029-eng/16100029.csv'

# Production data from 2019 to 2023

Production, shipments and value of shipments of metallic and non-metallic minerals, annual. Provided by NRCan can be dowloaded from [here](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1610002201)

In [34]:
production_df = pd.read_csv(production_file_path)

In [35]:
production_df

Unnamed: 0,REF_DATE,GEO,DGUID,Products,Variables,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2019,Canada,2016A000011124,Barite (tonnes),Quantity produced,Tonnes,287,units,0,v1277417550,1.76.1,,x,,,0
1,2019,Canada,2016A000011124,"Total metals recoverable, non-metals and aggre...",Value of shipments,Thousands of dollars,284,units,0,v1277495960,1.1.4,43838530.0,,,,0
2,2019,Canada,2016A000011124,"Metals, recoverable (Thousands of dollars)",Value of shipments,Thousands of dollars,284,units,0,v1277417446,1.2.4,29694819.0,,,,0
3,2019,Atlantic region,2016A00011,Barite (tonnes),Quantity produced,Tonnes,287,units,0,v1277419798,2.76.1,,..,,,0
4,2019,Canada,2016A000011124,"Cobalt, recoverable (tonnes)",Quantity shipped,Tonnes,287,units,0,v1277417465,1.3.2,4365.0,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21147,2023,Nunavut,2016A000262,Limestone (Thousands of dollars),Value of shipments,Thousands of dollars,284,units,0,v1277419390,16.145.4,0.0,,p,,0
21148,2023,Nunavut,2016A000262,Marble (Thousands of dollars),Value of shipments,Thousands of dollars,284,units,0,v1277419391,16.146.4,0.0,,p,,0
21149,2023,Nunavut,2016A000262,Sandstone (Thousands of dollars),Value of shipments,Thousands of dollars,284,units,0,v1277419392,16.147.4,0.0,,p,,0
21150,2023,Nunavut,2016A000262,Shale (Thousands of dollars),Value of shipments,Thousands of dollars,284,units,0,v1277419393,16.148.4,0.0,,p,,0


In [36]:
# Filter for Canada and select only the relevant columns
production_df_clean = production_df[
    (production_df['GEO'] == 'Canada') & (production_df['Variables'] == 'Quantity produced') & (production_df['UOM'] != 'Thousands of dollars') 
][['REF_DATE', 'GEO', 'Products', 'Variables', 'UOM', 'VALUE']]

# Sort products alphabetically
production_df_clean = production_df_clean.sort_values(by='Products').reset_index(drop=True)
production_df_clean.fillna({'VALUE': 0}, inplace=True)
production_df_clean

Unnamed: 0,REF_DATE,GEO,Products,Variables,UOM,VALUE
0,2019,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0
1,2020,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0
2,2021,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0
3,2022,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0
4,2023,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0
...,...,...,...,...,...,...
356,2022,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,180256.0
357,2019,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,269353.0
358,2020,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,246995.0
359,2021,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,230187.0


In [37]:
# Lists for categorizing product types
metals = ['cobalt', 'copper', 'gold', 'ilmenite', 'iridium', 'iron', 'lead', 'lithium', 'molybdenum', 'nickel', 'niobium', 
          'palladium', 'platinum', 'platinum group', 'rhodium', 'ruthenium', 'silver', 'spodumene', 'tantalum', 'titanium', 'uranium', 'zinc']
non_metals = ['barite', 'diamonds', 'gemstones', 'graphite', 'gypsum', 'marl', 'mica', 'peat', 'potash', 
              'potassium', 'pumice', 'salt', 'serpentine', 'soapstone', 'sodium', 'wollastonite', 'zeolite']
aggregates = ['clay', 'lime', 'granite', 'magnesite', 'marble', 'nepheline', 'sand', 'slate', 'stone']

# Add the 'TYPE' column based on product keywords
def categorize_product(product_name):
    product_name = product_name.lower()
    if any(metal in product_name for metal in metals):
        return 'Metals'
    elif any(non_metal in product_name for non_metal in non_metals):
        return 'Non_metals'
    elif any(aggregate in product_name for aggregate in aggregates):
        return 'Aggregates'
    return 'Other'

In [38]:
# Apply the categorization function
production_df_clean['TYPE'] = production_df_clean['Products'].apply(categorize_product)

In [39]:
production_df_clean

Unnamed: 0,REF_DATE,GEO,Products,Variables,UOM,VALUE,TYPE
0,2019,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0,Non_metals
1,2020,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0,Non_metals
2,2021,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0,Non_metals
3,2022,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0,Non_metals
4,2023,Canada,Barite (tonnes),Quantity produced,Tonnes,0.0,Non_metals
...,...,...,...,...,...,...,...
356,2022,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,180256.0,Metals
357,2019,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,269353.0,Metals
358,2020,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,246995.0,Metals
359,2021,Canada,"Zinc, recoverable (tonnes)",Quantity produced,Tonnes,230187.0,Metals


In [40]:
production_df_clean.to_csv(r'data/NRCan/Cleaned/qty_produced_2019_2023.csv', index=False)

# Energy consumption 

Data on the quantity of energy purchased and the energy expenses are presented at the national level, by energy source (electricity, heavy fuel oil, diesel, natural gas, etc.) and by North American Industry Classification System (NAICS). Data provided by NRCan, it can be dowloaded from [here](https://open.canada.ca/data/en/dataset/85727787-6faf-4fdd-865e-b80ee9760753)

In [29]:
energy_df = pd.read_csv(energy_use_file_path)

In [31]:
# Filter data for Canada, physical units, and keep relevant columns
energy_df_clean = energy_df[
    (energy_df['GEO'] == 'Canada') & (energy_df['UOM'] != 'Thousands of dollars')
][['REF_DATE', 'GEO', 'North American Industry Classification System (NAICS)', 'Energy types', 'UOM', 'VALUE']]

energy_df_clean

Unnamed: 0,REF_DATE,GEO,North American Industry Classification System (NAICS),Energy types,UOM,VALUE
1,2019,Canada,Metal ore mining [2122],"Coal, quantity purchased (tonnes)",Tonnes,207878.0
3,2019,Canada,Metal ore mining [2122],"Petroleum coke, quantity purchased (tonnes)",Tonnes,0.0
5,2019,Canada,Metal ore mining [2122],"Coke, quantity purchased (tonnes)",Tonnes,133875.0
7,2019,Canada,Metal ore mining [2122],"Natural gas, quantity purchased (cubic metres)",Cubic metres,120071766.0
9,2019,Canada,Metal ore mining [2122],"Gasoline - aviation, quantity purchased (litres)",Litres,13440643.0
...,...,...,...,...,...,...
3661,2022,Canada,"Shale, clay and refractory mineral mining and ...","Butane, quantity purchased (litres)",Litres,0.0
3663,2022,Canada,"Shale, clay and refractory mineral mining and ...","Electricity, quantity purchased (kilowatt-hour)",Kilowatt-hours,
3665,2022,Canada,"Shale, clay and refractory mineral mining and ...","Steam, quantity purchased (gigajoules)",Gigajoules,0.0
3667,2022,Canada,"Shale, clay and refractory mineral mining and ...","Wood, quantity purchased (metric tonnes)",Metric tonnes,0.0


In [41]:
energy_df_clean.to_csv(r'data/NRCan/Cleaned/energy_consumption_2019_2023.csv', index=False)

In [33]:
energy_df_clean['North American Industry Classification System (NAICS)'].unique()

array(['Metal ore mining [2122]', 'Iron ore mining [21221]',
       'Gold and silver ore mining [21222]',
       'Copper, nickel, lead and zinc ore mining [21223]',
       'Lead-zinc ore mining [212231]',
       'Nickel-copper ore mining [212232]',
       'Copper-zinc ore mining [212233]',
       'Other metal ore mining [21229]', 'Uranium ore mining [212291]',
       'All other metal ore mining [212299]',
       'Non-metallic mineral mining (excluding aggregates, clay and refractory mineral mining and quarrying) [2123]',
       'Other non-metallic mineral mining and quarrying [21239]',
       'Diamond mining [212392]', 'Salt mining [212393]',
       'Gypsum mining [212395]', 'Potash mining [212396]',
       'Peat extraction [212397]',
       'All other non-metallic mineral mining and quarrying [212398]',
       'Aggregates, clay and refractory mineral mining and quarrying (excluding non-metallic mineral mining) [2123]',
       'Stone mining and quarrying [21231]',
       'Granite minin

Possible to compute energy recipe (e.g. production/energy consumption) for iron, diamond, potash. For others, the co-production or by-production makes it difficult to know.