In [1]:
import pandas as pd

In [2]:
can_export_df = pd.read_csv(r'data/Prices/Canada_export_2013_2023.csv')

In [3]:
can_export_df

Unnamed: 0,Period,Commodity,Province,Country,State,Value ($),Quantity,Unit of measure
0,01/01/2013,"2601.11.10 - Iron ores concentrated, non-agglo...",Canada,United Kingdom,,4877724,42326,Weight in metric tonne
1,01/01/2016,"2601.12.00 - Iron ores and concentrates, o/t r...",Canada,Australia,,60,0,Weight in metric tonne
2,01/01/2018,2601.20.00 - Roasted iron pyrites (pyrites cin...,Canada,Germany,,19,0,Weight in metric tonne
3,01/01/2019,2620.99.90 - Ash and residues containing metal...,Canada,"Korea, South",,1,0,Weight in kilograms
4,01/01/2021,"2604.00.90 - Nickel ores and concentrates, nes",Canada,"South Africa, Republic of",,200,0,Weight in kilograms
...,...,...,...,...,...,...,...,...
3698,01/01/2024,"2617.90.00 - Ores and concentrates, nes",Canada,China,,314316832,248696575,Weight in kilograms
3699,01/01/2015,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,6676825,280708000,Weight in kilograms
3700,01/01/2018,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,5887808,280717000,Weight in kilograms
3701,01/01/2013,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,4272901,288301000,Weight in kilograms


In [4]:
import pandas as pd
import numpy as np

def compute_price_per_kg_summary(df):
    """
    Process trade data to compute $/kg ratios and summarize by commodity.

    Parameters
    ----------
    df : pd.DataFrame
        Must contain columns:
        ['Period', 'Commodity', 'Value ($)', 'Quantity', 'Unit of measure']

    Returns
    -------
    df_prices : pd.DataFrame
        Original dataframe with added columns:
        - Quantity_kg
        - Price_per_kg

    summary : pd.DataFrame
        Aggregated by 'Commodity' with columns:
        - mean_price_per_kg
        - std_price_per_kg
        - n_records
        - start_date
        - end_date
        - date_range_comment
    """

    df = df.copy()

    # --- 1. Clean and standardize units ---
    df["Unit of measure"] = df["Unit of measure"].str.strip().str.lower()

    unit_factors = {
        "weight in metric tonne": 1000,
        "weight in kilograms": 1,
        "weight in grams": 0.001,
        "weight in pounds": 0.453592,
    }

    df["Quantity_kg"] = df.apply(
        lambda x: x["Quantity"] * unit_factors.get(x["Unit of measure"], np.nan),
        axis=1
    )

    # --- 2. Convert period to datetime ---
    df["Period"] = pd.to_datetime(df["Period"], errors="coerce", dayfirst=True)

    # --- 3. Filter out invalid values ---
    df = df[(df["Quantity_kg"] > 0) & (~df["Value ($)"].isna())]

    # --- 4. Compute price ratio ---
    df["Price_per_kg"] = df["Value ($)"] / df["Quantity_kg"]

    # --- 5. Summarize by commodity ---
    summary = (
        df.groupby("Commodity")
        .agg(
            mean_price_per_kg=("Price_per_kg", "mean"),
            std_price_per_kg=("Price_per_kg", "std"),
            n_records=("Price_per_kg", "count"),
            start_date=("Period", "min"),
            end_date=("Period", "max"),
        )
        .reset_index()
    )

    summary["date_range_comment"] = summary.apply(
        lambda x: f"{x['start_date'].date()} to {x['end_date'].date()}"
        if pd.notna(x["start_date"]) and pd.notna(x["end_date"])
        else "Unknown",
        axis=1
    )

    # Round for readability
    summary = summary.round({"mean_price_per_kg": 4, "std_price_per_kg": 4})

    return df, summary


In [5]:
df, summary = compute_price_per_kg_summary(can_export_df)

In [6]:
df

Unnamed: 0,Period,Commodity,Province,Country,State,Value ($),Quantity,Unit of measure,Quantity_kg,Price_per_kg
0,2013-01-01,"2601.11.10 - Iron ores concentrated, non-agglo...",Canada,United Kingdom,,4877724,42326,weight in metric tonne,42326000.0,0.115242
5,2013-01-01,2616.90.90 - Precious metal ores and concentra...,Canada,United States,Nevada,2737,1,weight in kilograms,1.0,2737.000000
6,2013-01-01,2616.90.90 - Precious metal ores and concentra...,Canada,Netherlands,,24,1,weight in kilograms,1.0,24.000000
7,2014-01-01,2620.99.90 - Ash and residues containing metal...,Canada,Madagascar,,16,1,weight in kilograms,1.0,16.000000
8,2015-01-01,2601.20.00 - Roasted iron pyrites (pyrites cin...,Canada,Chile,,82,1,weight in metric tonne,1000.0,0.082000
...,...,...,...,...,...,...,...,...,...,...
3698,2024-01-01,"2617.90.00 - Ores and concentrates, nes",Canada,China,,314316832,248696575,weight in kilograms,248696575.0,1.263857
3699,2015-01-01,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,6676825,280708000,weight in kilograms,280708000.0,0.023786
3700,2018-01-01,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,5887808,280717000,weight in kilograms,280717000.0,0.020974
3701,2013-01-01,2618.00.00 - Granulated slag (slag sand) from ...,Canada,United States,Michigan,4272901,288301000,weight in kilograms,288301000.0,0.014821


In [7]:
summary

Unnamed: 0,Commodity,mean_price_per_kg,std_price_per_kg,n_records,start_date,end_date,date_range_comment
0,"2601.11.10 - Iron ores concentrated, non-agglo...",0.1351,0.097,194,2013-01-01,2024-01-01,2013-01-01 to 2024-01-01
1,"2601.11.90 - Iron ores not concentrated, non-a...",0.175,0.1348,95,2013-01-01,2024-01-01,2013-01-01 to 2024-01-01
2,"2601.12.00 - Iron ores and concentrates, o/t r...",0.1573,0.0519,250,2013-01-01,2024-01-01,2013-01-01 to 2024-01-01
3,2601.20.00 - Roasted iron pyrites (pyrites cin...,0.2509,0.4451,6,2013-01-01,2019-01-01,2013-01-01 to 2019-01-01
4,2602.00.00 - Manganese ores and conc etc,6.238,12.3893,15,2014-01-01,2024-01-01,2014-01-01 to 2024-01-01
5,"2603.00.10 - Copper ores and concentrates, cop...",29.6705,300.3357,217,2013-01-01,2024-01-01,2013-01-01 to 2024-01-01
6,"2603.00.40 - Copper ores and concentrates, sil...",797.341,208.9266,83,2017-01-01,2024-01-01,2017-01-01 to 2024-01-01
7,"2603.00.50 - Copper ores and concentrates, gol...",62894.6262,18776.4027,83,2017-01-01,2024-01-01,2017-01-01 to 2024-01-01
8,"2603.00.90 - Copper ores and concentrates, nes",25.0965,22.8053,62,2013-01-01,2024-01-01,2013-01-01 to 2024-01-01
9,"2604.00.40 - Nickel ores and concentrates, nic...",19.6335,8.2158,46,2014-01-01,2024-01-01,2014-01-01 to 2024-01-01


In [8]:
summary.to_csv(r'data/Prices/canada_export_price_per_kg_summary.csv', index=False)