In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
pd.options.display.max_rows = 20
pd.options.display.max_columns = 20

This notebook calculates unit-values (value per kg) for each CPC activity per exporter–importer pair, then aggregates per exporter–product to derive:
- **unit_value** = total value ÷ total quantity (sum)
- **mean_unit** = mean(unit_value across importers)
- **min_unit** = min(unit_value across importers)
- **max_unit** = max(unit_value across importers)


# 1. Load data

In [3]:
country_codes_fp = 'country_codes_V202501.csv'
cpc_hs_fp = 'cpc_hs_mapping.xlsx'
baci_fp = 'BACI_HS22_Y2023_V202501.csv'

In [4]:
country_codes = pd.read_csv(country_codes_fp)
country_codes

Unnamed: 0,country_code,country_name,country_iso2,country_iso3
0,4,Afghanistan,AF,AFG
1,8,Albania,AL,ALB
2,12,Algeria,DZ,DZA
3,16,American Samoa,AS,ASM
4,20,Andorra,AD,AND
...,...,...,...,...
233,876,Wallis and Futuna Isds,WF,WLF
234,882,Samoa,WS,WSM
235,887,Yemen,YE,YEM
236,891,Serbia and Montenegro (...2005),CS,SCG


In [5]:
cpc_hs = pd.read_excel(cpc_hs_fp, dtype={'CPC': str, 'HS': str})
cpc_hs

Unnamed: 0,CPC,CPC_description,HS,HS_description
0,1111,"Wheat, seed",100191,"Cereals: wheat and meslin, other than durum wh..."
1,1112,"Wheat, other",100199,"Cereals: wheat and meslin, other than durum wh..."
2,1121,"Maize (corn), seed",100510,"Cereals: maize (corn), seed"
3,1122,"Maize (corn), other",100590,"Cereals: maize (corn), other than seed"
4,113,Rice,100640,"Cereals: rice, broken"
...,...,...,...,...
655,9441,Site remediation and clean-up services,999999,Commodities not specified according to kind
656,971,"Washing, cleaning and dyeing services",999999,Commodities not specified according to kind
657,97110,Coin-operated laundry services,999999,Commodities not specified according to kind
658,9715,Dyeing and colouring services,999999,Commodities not specified according to kind


In [6]:
baci = pd.read_csv(baci_fp, dtype={'i': int, 'j': int, 'k': str, 'v': float, 'q': float})
baci

Unnamed: 0,t,i,j,k,v,q
0,2023,4,20,200290,8.013,5.830
1,2023,4,31,252620,9.808,77.150
2,2023,4,31,680221,33.788,76.655
3,2023,4,31,761510,6.604,2.368
4,2023,4,31,860900,1.500,2.200
...,...,...,...,...,...,...
11232734,2023,894,854,903180,8.144,0.101
11232735,2023,894,858,240120,619.897,99.000
11232736,2023,894,858,630900,0.981,0.446
11232737,2023,894,858,820840,4.743,0.191


# 2. Compute unit value for BACI data

In [7]:
# Sum total value and quantity per exporter and HS
grouped = (
    baci
    .groupby(['i', 'j', 'k'], as_index=False)
    .agg(v_sum=('v', 'sum'), q_sum=('q', 'sum'))
)

In [8]:
# Filter out zero or missing quantity to avoid division by zero
grouped = grouped[grouped['q_sum'] > 0].copy()

In [9]:
grouped['unit_value'] = grouped['v_sum'] / grouped['q_sum']
grouped

Unnamed: 0,i,j,k,v_sum,q_sum,unit_value
0,4,20,200290,8.013,5.830,1.374443
1,4,31,252620,9.808,77.150,0.127129
2,4,31,680221,33.788,76.655,0.440780
3,4,31,761510,6.604,2.368,2.788851
4,4,31,860900,1.500,2.200,0.681818
...,...,...,...,...,...,...
11232734,894,854,903180,8.144,0.101,80.633663
11232735,894,858,240120,619.897,99.000,6.261586
11232736,894,858,630900,0.981,0.446,2.199552
11232737,894,858,820840,4.743,0.191,24.832461


In [10]:
grouped_stats = (
    grouped
    .groupby(['i','k'], as_index=False)
    .agg(
        v_sum=('v_sum', 'sum'),
        q_sum=('q_sum', 'sum'),
        mean_unit=('unit_value', 'mean'),
        min_unit=('unit_value', 'min'),
        max_unit=('unit_value', 'max')
    )
)
grouped_stats['unit_value'] = grouped_stats['v_sum'] / grouped_stats['q_sum']
grouped_stats

Unnamed: 0,i,k,v_sum,q_sum,mean_unit,min_unit,max_unit,unit_value
0,4,010221,15.775,2.600,6.067308,6.067308,6.067308,6.067308
1,4,010619,5.157,2.060,17.577805,2.355610,32.800000,2.503398
2,4,010641,126.005,15.060,8.366866,8.366866,8.366866,8.366866
3,4,020120,1.000,0.216,4.629630,4.629630,4.629630,4.629630
4,4,020130,2.000,0.465,4.301075,4.301075,4.301075,4.301075
...,...,...,...,...,...,...,...,...
561475,894,970510,165.945,3.619,44.974351,4.211864,236.750000,45.853827
561476,894,970522,406.675,13.946,31.225558,1.538462,124.468813,29.160691
561477,894,970529,290.200,6.933,72.624637,2.930769,459.062500,41.857782
561478,894,970539,37.631,1.656,4391.362725,8.754843,8164.000000,22.724034


# 3. Merge CPC-HS mapping with the BACI data

In [11]:
cpc_hs['HS'] = cpc_hs['HS'].astype(str)
grouped_stats['k'] = grouped_stats['k'].astype(str)

In [12]:
grouped_stats = grouped_stats.merge(
    cpc_hs[['CPC', 'HS']],
    left_on='k',
    right_on='HS',
    how='inner'
)
grouped_stats

Unnamed: 0,i,k,v_sum,q_sum,mean_unit,min_unit,max_unit,unit_value,CPC,HS
0,4,100310,348.522,1829.000,0.190553,0.190553,0.190553,0.190553,1151,100310
1,4,100390,296.180,1283.000,0.230850,0.230850,0.230850,0.230850,1152,100390
2,4,100410,883.618,1893.000,0.466782,0.466782,0.466782,0.466782,1171,100410
3,4,100590,52.248,106.000,0.312000,0.124000,0.500000,0.492906,1122,100590
4,4,100640,9.920,24.800,0.400000,0.400000,0.400000,0.400000,113,100640
...,...,...,...,...,...,...,...,...,...,...
51153,894,940370,3.738,5.831,0.789230,0.078065,1.523084,0.641056,38140,940370
51154,894,940410,38.358,57.964,1.790684,0.403077,6.045833,0.661756,3815,940410
51155,894,940610,66.817,45.378,1.339327,0.267841,2.410812,1.472454,3870,940610
51156,894,940690,1495.198,669.982,6.809968,1.569955,11.635500,2.231699,38703,940690


Rows we lost, rows we didn't have in the CPC-HS mapping

# 4. Merge with country codes

In [13]:
merged = grouped_stats.merge(
    country_codes[['country_code', 'country_name', 'country_iso2']],
    left_on='i',
    right_on='country_code',
    how='left'
).rename(columns={'country_name': 'exporter',
                 'country_iso2': 'exporter_iso2'
                 })

In [14]:
merged

Unnamed: 0,i,k,v_sum,q_sum,mean_unit,min_unit,max_unit,unit_value,CPC,HS,country_code,exporter,exporter_iso2
0,4,100310,348.522,1829.000,0.190553,0.190553,0.190553,0.190553,1151,100310,4,Afghanistan,AF
1,4,100390,296.180,1283.000,0.230850,0.230850,0.230850,0.230850,1152,100390,4,Afghanistan,AF
2,4,100410,883.618,1893.000,0.466782,0.466782,0.466782,0.466782,1171,100410,4,Afghanistan,AF
3,4,100590,52.248,106.000,0.312000,0.124000,0.500000,0.492906,1122,100590,4,Afghanistan,AF
4,4,100640,9.920,24.800,0.400000,0.400000,0.400000,0.400000,113,100640,4,Afghanistan,AF
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51153,894,940370,3.738,5.831,0.789230,0.078065,1.523084,0.641056,38140,940370,894,Zambia,ZM
51154,894,940410,38.358,57.964,1.790684,0.403077,6.045833,0.661756,3815,940410,894,Zambia,ZM
51155,894,940610,66.817,45.378,1.339327,0.267841,2.410812,1.472454,3870,940610,894,Zambia,ZM
51156,894,940690,1495.198,669.982,6.809968,1.569955,11.635500,2.231699,38703,940690,894,Zambia,ZM


# 5. csv output

In [15]:
final_df = merged[['CPC', 'HS', 'i', 'exporter_iso2', 'exporter', 'unit_value', 'mean_unit', 'min_unit', 'max_unit']].copy()
final_df

Unnamed: 0,CPC,HS,i,exporter_iso2,exporter,unit_value,mean_unit,min_unit,max_unit
0,1151,100310,4,AF,Afghanistan,0.190553,0.190553,0.190553,0.190553
1,1152,100390,4,AF,Afghanistan,0.230850,0.230850,0.230850,0.230850
2,1171,100410,4,AF,Afghanistan,0.466782,0.466782,0.466782,0.466782
3,1122,100590,4,AF,Afghanistan,0.492906,0.312000,0.124000,0.500000
4,113,100640,4,AF,Afghanistan,0.400000,0.400000,0.400000,0.400000
...,...,...,...,...,...,...,...,...,...
51153,38140,940370,894,ZM,Zambia,0.641056,0.789230,0.078065,1.523084
51154,3815,940410,894,ZM,Zambia,0.661756,1.790684,0.403077,6.045833
51155,3870,940610,894,ZM,Zambia,1.472454,1.339327,0.267841,2.410812
51156,38703,940690,894,ZM,Zambia,2.231699,6.809968,1.569955,11.635500


In [16]:
output_fp = 'ecoinvent_cpc_unit_values.csv'
final_df.to_csv(output_fp, index=False)

# 6. Check units and add conversion factors

In [17]:
import bw2data as bd

In [18]:
bd.projects.set_current('bw25_intro')

In [19]:
bd.databases

Databases dictionary with 11 object(s):
	PV 2035
	PV 2050
	SIB
	biosphere
	ecoinvent-3.10-cutoff
	ei_cutoff_3.10_remind_SSP2-Base_2020 2025-05-01
	ei_cutoff_3.10_remind_SSP2-Base_2050 2025-05-01
	no more fossil fuels
	testing_mining
	testing_organic_RFB
	testing_tailings

In [20]:
db = bd.Database('ecoinvent-3.10-cutoff')

In [21]:
cpc_codes = set(final_df['CPC'].astype(int))
len(cpc_codes)

467

In [22]:
unit_info = {}
for act in db:
    if 'classifications' in act and act['classifications']:
        for cls_type, cls_value in act['classifications']:
            if cls_type == 'CPC':
                try:
                    code = int(cls_value.split(':')[0].strip())
                    if code in cpc_codes:
                        unit = act.get('unit', '').lower()
                        ref_product = act.get('reference product', '').lower()
                        entry = {
                            'name': act['name'],
                            'reference product': ref_product,
                            'unit': unit,
                            'location': act.get('location', ''),
                            'key': act.key
                        }
                        if code not in unit_info:
                            unit_info[code] = []
                        unit_info[code].append(entry)
                except ValueError:
                    continue

In [23]:
unit_info

{12020: [{'name': 'market group for natural gas, high pressure',
   'reference product': 'natural gas, high pressure',
   'unit': 'cubic meter',
   'location': 'Europe without Switzerland',
   'key': ('ecoinvent-3.10-cutoff', '41ef53848a24daa67db2886fbfeb42dd')},
  {'name': 'natural gas, liquefied, import from TT',
   'reference product': 'natural gas, liquefied',
   'unit': 'cubic meter',
   'location': 'CA',
   'key': ('ecoinvent-3.10-cutoff', 'e14a3c9317e5af97a0f804bc0b0899c0')},
  {'name': 'natural gas, liquefied, import from QA',
   'reference product': 'natural gas, liquefied',
   'unit': 'cubic meter',
   'location': 'GB',
   'key': ('ecoinvent-3.10-cutoff', 'a47cd6dd6545a72c1a385d77c3811a74')},
  {'name': 'petroleum and gas production, offshore',
   'reference product': 'natural gas, high pressure',
   'unit': 'cubic meter',
   'location': 'EC',
   'key': ('ecoinvent-3.10-cutoff', '1edc78c831c2500853c52852dfd82bc7')},
  {'name': 'natural gas, high pressure, import from US',
   

# 7. Handling non-kg units for the JSON

In [24]:
conversion_rules = {
    'ngas_highpressure': {
        'unit': 'cubic meter',
        'keyword': 'natural gas, high pressure',
        'factor': 0.735 # kg/m3 | Density stated in EI description
    }, 
    'ngas_liquified': {
        'unit': 'cubic meter',
        'keyword': 'natural gas, liquified',
        'factor': 0.735 # kg/m3 | EI: The reference flow refers to 1 standard cubic meter of natural gas in the gaseous state
    }, 
    'ngas_liquified': {
        'unit': 'cubic meter',
        'keyword': 'natural gas, liquefied',
        'factor': 0.735 # kg/m3 | EI: The reference flow refers to 1 standard cubic meter of natural gas in the gaseous state
    },
    'ngas_lowpressure': {
        'unit': 'cubic meter',
        'keyword': 'natural gas, low pressure',
        'factor': 0.735 # kg/m3 | EI: a density of 0.735kg/m3 for natural gas are applied, in line with the global statistics (referring to standard cubic meters, Sm3, measured at 15°C and 1013 mbar)
    }, 
    'ngas_vented': {
        'unit': 'cubic meter',
        'keyword': 'natural gas, vented',
        'factor': 0.735
    }, 
    'ngas_sweetening': {
        'unit': 'cubic meter',
        'keyword': 'sweetening, natural gas',
        'factor': 0.735
    },
    'biogas': {
        'unit': 'cubic meter',
        'keyword': 'biogas',
        'factor': 0.735
    },
    'electrolyte_nickel': {
        'unit': 'cubic meter',
        'keyword': 'electrolyte, nickel-rich',
        'factor': 1300 # Estimate - More dense than pure water
    },
    'hardwood': {
        'unit': 'cubic meter',
        'keyword': 'hardwood',
        'factor': 800 # Estimate
    },
    'softwood': {
        'unit': 'cubic meter',
        'keyword': 'softwood',
        'factor': 500 # Estimate 
    },
    'timber': {
        'unit': 'cubic meter',
        'keyword': 'timber',
        'factor': 650 # Estimate
    },
    'building_machine': {
        'unit': 'unit',
        'keyword': 'building machine',
        'factor': 7000 # 7000kg, 100% steel
    },
    'cookstove': {
        'unit': 'unit',
        'keyword': 'cookstove',
        'factor': 57.9 # EI: The appliance is 57.9 kg in mass 
    },
    'ng_biomethane': {
        'unit': 'cubic meter',
        'keyword': 'mixed natural gas and biomethane, high pressure',
        'factor': 0.752 # Biomethane density
    },
    'concrete': {
        'unit': 'cubic meter',
        'keyword': 'concrete',
        'factor': 2300 # Estimate 
    },
    'cement': {
        'unit': 'cubic meter',
        'keyword': 'cement',
        'factor': 1500 # Estimate 
    },
    'marine_engine': {
        'unit': 'unit',
        'keyword': 'marine engine',
        'factor':  1000 # EI: per 1000 kg of engine, based on an average marine engine composition.
    },
    'mattress': {
        'unit': 'unit',
        'keyword': 'mattress',
        'factor': 25 #  EI: dimension of 1 m width and 2 m length. Estimate: 1x2x0.25m3 x 50 kg/m3 = 25kg/unit
    },
    'cage': {
        'unit': 'meter',
        'keyword': 'cage',
        'factor': 150 # Estimate from materials in EI
    },
    'greenhouse_glass': {
        'unit': 'square meter-year',
        'keyword': 'greenhouse, glass walls',
        'factor': 1.2 # Estimate from materials in EI
    },
    'condensate_lightoil': {
        'unit': 'cubic meter',
        'keyword': 'condensate from light oil boiler',
        'factor': 1000 # Estimate: watertreatment
    },
    'ultraviolet_lamp': {
        'unit': 'unit',
        'keyword': 'ultraviolet lamp',
        'factor': 0.3824 # Estimate from materials in EI
    },
    'compact_fluorescent_lamp': {
        'unit': 'unit',
        'keyword': 'compact fluorescent lamp',
        'factor': 0.075 # EI: average compact fluorescent lamp which is circa 75 g in mass
    },
    'locomotive': {
        'unit': 'unit',
        'keyword': 'locomotive',
        'factor': 84000 # EI: It has a lifespan of 40 years and a total weight of 84 tons
    },
    'hard_coal': {
        'unit': 'megajoule',
        'keyword': 'hard coal',
        'factor': 0.03184713375 # EI: Low heating value is 31.4 MJ/kg
    },
    'lignite': {
        'unit': 'megajoule',
        'keyword': 'lignite',
        'factor': 0.05128205128 # EI: Low heating value is 19.5 MJ/kg
    },
    'peat_moss': {
        'unit': 'cubic meter',
        'keyword': 'peat moss',
        'factor': 100 # EI: A density of 100 kg/m³ is considered for dry peat moss in loose form
    },
    'strand_board': {
        'unit': 'cubic meter',
        'keyword': 'oriented strand board',
        'factor': 607 # EI
    },
    'energy_feed': {
        'unit': 'megajoule',
        'keyword': 'energy feed, gross',
        'factor': 0.05 # I take a gross average
    },
    'soybean_beverage': {
        'unit': 'litre',
        'keyword': 'soybean beverage',
        'factor': 1 # Estimate
    },
    'coal_gas_mj': {
        'unit': 'megajoule',
        'keyword': 'coal gas',
        'factor': 0.03496503496 # EI: Hard coal coke is assumed to have a low heating value 28.6 MJ/kg and bulk density is 530 kg/m3
    },
    'biomethane_mj': {
        'unit': 'megajoule',
        'keyword': 'biomethane',
        'factor': 0.02150537634 # EI: biomethane, low pressure' is a non-fossil fuel with a calorific value of 46.50 MJ/kg
    },
    'biogas_mj': {
        'unit': 'megajoule',
        'keyword': 'biogas',
        'factor': 0.05059392872 # EI: biogas' is a non-fossil fuel with a net calorific value of 22.73 MJ/m3. Density assumed: 1.15 kg/m3 -> 19.77 MJ/kg 
    },
    'synthetic_gas_mj': {
        'unit': 'megajoule',
        'keyword': 'synthetic gas', # EI: synthetic gas' is a non-fossil fuel with a calorific value of 6.21 MJ/m3. Density is 1.15 kg/Nm3 -> 5.4 MJ/kg
        'factor': 0.18518518518 # 
    },
    'md_fibreboard': {
        'unit': 'cubic meter',
        'keyword': 'medium density fibreboard',
        'factor': 684 # EI: 684 kg/m3
    },
    'fibreboard_soft_latex': {
        'unit': 'cubic meter',
        'keyword': 'fibreboard, soft, latex bonded',
        'factor': 240 # EI: 240 kg/m3
    },
    'fibreboard_soft': {
        'unit': 'cubic meter',
        'keyword': 'fibreboard, soft',
        'factor': 140 # EI: 140 kg/m3
    },
    'fibreboard_hard': {
        'unit': 'cubic meter',
        'keyword': 'fibreboard, hard',
        'factor': 956 # EI: 956 kg/m3
    },
    'plywood ': {
        'unit': 'cubic meter',
        'keyword': 'plywood',
        'factor': 600 # Estimate: 600kg/m3
    },
    'eur_pallet': {
        'unit': 'unit',
        'keyword': 'eur-flat pallet',
        'factor': 22 #  It represents an average pallet with a mass of 22 kg that it is mainly made out of wood.
    },
    'beverage_carton': {
        'unit': 'square meter',
        'keyword': 'beverage carton',
        'factor': 0.25 # Estimate: 250g/m2
    },
    'coke_mj': {
        'unit': 'megajoule',
        'keyword': 'coke',
        'factor': 0.03496503496 # EI: coke' is a fossil fuel with a calorific value of 28.6 MJ/kg
    },
    'window_frame_1.5': {
        'unit': 'square meter',
        'keyword': 'window frame, wood, u=1.5 w/m2k',
        'factor': 80.2 # EI: 1 m2 of visible wooden window frame weighs 80.2 kg
    },
    'door_inner_glasswood': {
        'unit': 'square meter',
        'keyword': 'door, inner, glass-wood',
        'factor': 27.6 # EI: 1 m2 of the wooden inner door weighs 27.6 kg
    },
    'joist_wood': {
        'unit': 'meter',
        'keyword': 'joist, engineered wood',
        'factor': 10 # Estimate: 10kg
    },
    'door_outer_woodglass': {
        'unit': 'square meter',
        'keyword': 'door, outer, wood-glass',
        'factor': 36.5 # EI:  m2 of the aluminium planked massive wood-glass outer door weighs 36.5 kg
    },
    'door_inner_wood': {
        'unit': '',
        'keyword': 'door, inner, wood',
        'factor': 27.6 # EI: 1 m2 of the wooden inner door weighs 27.6 kg
    },
    'diesel_fishingvessel_mj': {
        'unit': 'megajoule',
        'keyword': 'diesel, burned in fishing vessel',
        'factor': 0.02336448598 # EI: diesel' is a fossil fuel with a calorific value of 42.8 MJ/kg
    },
    'uranium_enriched': {
        'unit': 'unit',
        'keyword': 'uranium, enriched',
        'factor': 0.2 # Estimate
    },
    'spent_nuclear_fuel': {
        'unit': 'cubic meter',
        'keyword': 'conditioned spent nuclear fuel',
        'factor': 4000 # Estimate
    },
    'compressed_air_800': {
        'unit': 'cubic meter',
        'keyword': 'compressed air, 800 kpa gauge',
        'factor': 9.5 # Calculated with rho = P/RT -> 9.5 kg/m3
    },
    'compressed_air_1000': {
        'unit': 'cubic meter',
        'keyword': 'compressed air, 1000 kpa gauge',
        'factor': 11.89 # Calculated with rho = P/RT -> 11.89 kg/m3
    },
    'compressed_air_1200': {
        'unit': 'cubic meter',
        'keyword': 'compressed air, 1200 kpa gauge',
        'factor': 14.26 # Calculated with rho = P/RT -> 14.26 kg/m3
    },
    'compressed_air_700': {
        'unit': 'cubic meter',
        'keyword': 'compressed air, 700 kpa gauge',
        'factor': 8.32 # Calculated with rho = P/RT -> 8.32 kg/m3
    },
    'compressed_air_600': {
        'unit': 'cubic meter',
        'keyword': 'compressed air, 600 kpa gauge',
        'factor': 7.13 # Calculated with rho = P/RT -> 7.13 kg/m3
    },
    'meranti': {
        'unit': 'cubic meter',
        'keyword': 'meranti',
        'factor': 675 # Estimate: https://www.wood-database.com/dark-red-meranti/
    },    
    'paranapine': {
        'unit': 'cubic meter',
        'keyword': 'paraná pine',
        'factor': 545 # Estimate: https://www.wood-database.com/parana-pine/
    },  
    'azobe': {
        'unit': 'cubic meter',
        'keyword': 'azobe',
        'factor': 1065 # Estimate: https://www.wood-database.com/ekki/
    },  
    'eucalyptus': {
        'unit': 'cubic meter',
        'keyword': 'eucalyptus',
        'factor': 1130 # https://www.wood-database.com/?s=eucalyptus
    },  
    'synthetic_gas': {
        'unit': 'cubic meter',
        'keyword': 'synthetic gas',
        'factor': 1.15 # EI: Density is 1.15 kg/Nm3
    },  
    'biomethane': {
        'unit': 'cubic meter',
        'keyword': 'biomethane',
        'factor': 0.752 # EI: Density is 0.752 kg/Nm3
    },  
    'bf_gas_mj': {
        'unit': 'megajoule',
        'keyword': 'blast furnace gas',
        'factor': 0.5 # Estimate: 2 MJ/kg
    },  
    'particleboard': {
        'unit': 'cubic meter',
        'keyword': 'particleboard',
        'factor': 680 # EI: 680kg/m³
    },  
    'cladding_softwood': {
        'unit': 'square meter',
        'keyword': 'wood cladding, softwood',
        'factor': 6.9 # EI: For every 1 m2, there is an equivalent of 6.9 kg of wood cladding
    },  
    'door_inner_wood': {
        'unit': 'square meter',
        'keyword': 'door, inner, wood',
        'factor': 27.6 # EI: 1 m2 of the wooden inner door weighs 27.6 kg
    },  
    'low_radioactive_waste': {
        'unit': 'cubic meter',
        'keyword': 'low level radioactive waste',
        'factor': 500 # ESTIMATe
    },  
    'high_radioactive_waste': {
        'unit': 'cubic meter',
        'keyword': 'high level radioactive waste',
        'factor': 4000 # ESTIMATe
    },  
    'green_manure': {
        'unit': 'hectare',
        'keyword': 'green manure',
        'factor': 2300 # EI: The dry matter yield is 2300 kg/ha. 
    },  
    'tailing_uranium': {
        'unit': 'cubic meter',
        'keyword': 'tailing, from uranium milling',
        'factor': 1600 #  ESTIMATE
    },  
    'gluing_mill': {
        'unit': 'unit',
        'keyword': 'gluing mill',
        'factor': 0 # # BACI does not account for infrastructure  
    },  
    'ion-exchanger': {
        'unit': 'unit',
        'keyword': 'ion-exchanger for water treatment',
        'factor': 9900 # Estimate from the materials in EI
    },  
    'solar_glass': {
        'unit': 'square meter',
        'keyword': 'anti-reflex-coating, etching, solar glass',
        'factor': 0.15 # Estimate from the materials in EI
    },  
    'hydraulic_fracturing_fluid': {
        'unit': 'cubic meter',
        'keyword': 'hydraulic fracturing fluid',
        'factor': 1000 # proxy: water
    },  
    'glazing_0.5': {
        'unit': 'square meter',
        'keyword': 'glazing, triple, u<0.5 w/m2k',
        'factor': 30 # EI: 1 m2 visible glazing area has a final weight of 30 kg. 
    },  
    'glazing_11': {
        'unit': 'square meter',
        'keyword': 'glazing, double, u<1.1 w/m2k',
        'factor': 20 # EI: 1 m2 visible glazing area has a final weight of 20 kg.
    },  
    'horticultural_fleece': {
        'unit': 'square meter',
        'keyword': 'horticultural fleece',
        'factor': 0.017 #  EI: he fleece density is 17g/m2
    },  
    'window_16': {
        'unit': 'square meter',
        'keyword': 'window frame, poly vinyl chloride, u=1.6 w/m2k',
        'factor': 94.5 # EI: 1 m2 of visible plastic window frame weighs 94.5 kg. 
    },  
    'container_plastic': {
        'unit': 'unit',
        'keyword': 'container, for collection of post-consumer waste plastic for recycling',
        'factor': 0.02 # Estimate from the materials in EI
    },  
    'used_window_plastic': {
        'unit': 'square meter',
        'keyword': 'used window frame, plastic',
        'factor': 86.53 #  Estimate from the materials in EI
    },  
    'sealing_tape': {
        'unit': 'meter',
        'keyword': 'sealing tape, aluminium/pe, 50 mm wide',
        'factor': 0.057 #  EI: Weight 0.057 kg/m
    },  
    'layered_board': {
        'unit': 'cubic meter',
        'keyword': 'three and five layered board',
        'factor': 600 # Same ESTIMATE as plywood
    },  
    'bev_carton_1L': {
        'unit': 'unit',
        'keyword': 'beverage carton, 1 l',
        'factor': 0.03435 #  EI: The beverage carton's volume is 1 L and its mass is 34.35 g
    },  
    'pe_pipe_75': {
        'unit': 'meter',
        'keyword': 'polyethylene pipe, corrugated, dn 75',
        'factor': 0.33 #  EI: Extruded polyethylene tube with a weight of 0.33 kg/m
    },  
    'pe_pipe_200': {
        'unit': 'meter',
        'keyword': 'polyethylene pipe, dn 200, sdr 41',
        'factor': 3 # EI: Weight 3 kg/m
    },  
    'insulation_400': {
        'unit': 'meter',
        'keyword': 'insulation spiral-seam duct, rockwool, dn 400, 30 mm',
        'factor': 3.3 # EI: Weight 3.3 kg/m
    },  
    'stone_wool_factory': {
        'unit': 'unit',
        'keyword': 'stone wool factory',
        'factor': 0 # BACI does not account for infrastructure  
    },  
    'residual_wood': {
        'unit': 'cubic meter',
        'keyword': 'residual wood, dry',
        'factor': 500 # ESTIMATE  
    }, 
    'dust_multicyclone': {
        'unit': 'unit',
        'keyword': 'dust collector, multicyclone',
        'factor': 1500 #  EI bill of materials: 1500kg steel 
    }, 
    'dust_collector_industrial': {
        'unit': 'unit',
        'keyword': 'dust collector, electrostatic precipitator, for industrial use',
        'factor': 1682 #  EI bill of materials: 1500kg steel + 182kg   
    }, 
    'dust_collector_domestic': {
        'unit': 'unit',
        'keyword': 'dust collector, electrostatic precipitator, for domestic use',
        'factor': 6 # Estimate from EI bill of materials
    }, 
    'gh_walls_roof': {
        'unit': 'square meter-year',
        'keyword': 'greenhouse, plastic walls and roof',
        'factor': 1.5 # Estimate from bill of materials  
    }, 
    'used_window_wood': {
        'unit': 'square meter',
        'keyword': 'used window frame, wood',
        'factor': -77.4 # Estimated from materials in EI   
    }, 
    'used_silencer_315': {
        'unit': 'unit',
        'keyword': 'used silencer steel, dn 315',
        'factor': 20 # EI: Weight of element 20 kg  
    }, 
    'used_air_roof_400': {
        'unit': 'unit',
        'keyword': 'used exhaust air roof hood steel, dn 400',
        'factor': 17 # EI: Weight per element 17 kg
    }, 
    'used_room_overflow': {
        'unit': 'unit',
        'keyword': 'used room-connecting overflow element steel, approx. 40 m3/h',
        'factor': 1.1 #   Weight per element 1.1 kg
    }, 
    'used_silencer_125': {
        'unit': 'unit',
        'keyword': 'used silencer steel, dn 125',
        'factor': 8.3 # EI: Weight of element 8.3 kg
    }, 
    'used_air_intake_370': {
        'unit': 'unit',
        'keyword': 'used outside air intake stainless steel, dn 370',
        'factor': 32 #  Weight per element 32 kg
    }, 
    'used_air_plasticsteel': {
        'unit': 'unit',
        'keyword': 'used exhaust air valve in-wall housing, plastic/steel',
        'factor': 0.5 # Weight per element 0.5 kg 
    }, 
    'used_air_120m': {
        'unit': 'unit',
        'keyword': 'used air distribution terminal panel steel, 120 m3/h',
        'factor': 9.5 # Weight per element 9.5 kg
    }, 
    'used_duct_125': {
        'unit': 'meter',
        'keyword': 'used flexible duct aluminium/pet, dn of 125',
        'factor': 0.205 # Weight 0.205 kg/m
    }, 
    'used_tape_50': {
        'unit': 'meter',
        'keyword': 'used sealing tape aluminium/pe, 50 mm wide',
        'factor': 0.057 # Weight 0.057 kg/m
    }, 
    'zinc_coat_pieces': {
        'unit': 'square meter',
        'keyword': 'zinc coat, pieces',
        'factor': 16.67 # EI: Data given per tonne of coated product had to be transformed dividing it by the mean surface area of 60 m2/t
    }, 
    'zinc_coat_coils': {
        'unit': 'square meter',
        'keyword': 'zinc coat, coils',
        'factor': 15.625 #  Data given per tonne of coated product are transformed by dividing them by the mean surface area of 64 m2/t 
    }, 
    'vessel_25': {
        'unit': 'unit',
        'keyword': 'expansion vessel, 25l',
        'factor': 6 # Estimated from EI materials  
    }, 
    'vessel_80': {
        'unit': 'unit',
        'keyword': 'expansion vessel, 80l',
        'factor': 15.67 # Estimated from EI materials  
    }, 
    'sewage_sludge': {
        'unit': 'cubic meter',
        'keyword': 'sewage sludge',
        'factor': 1000 # Estimate: water   
    }, 
    'sawing_slurry': {
        'unit': 'litre',
        'keyword': ' spent sawing slurry from si-wafer cutting',
        'factor': 1.75 # EI: The specific weight of input slurry 1.75kg/l (wet mass).
    }, 
    'chromium_coat': {
        'unit': 'square meter',
        'keyword': 'hard chromium coat, electroplating, steel substrate, 0.14 mm thickness',
        'factor': 2.7 #   Estimate from EI material inputs
    }, 
    'leachate': {
        'unit': 'cubic meter',
        'keyword': 'leachate',
        'factor': 1000 # estimate  
    }, 
    'wastewater': {
        'unit': 'cubic meter',
        'keyword': 'wastewater',
        'factor': 1000 #   
    }, 
    'coat_steel': {
        'unit': 'square meter',
        'keyword': 'selective coat, stainless steel sheet, black chrome',
        'factor': 8.715 # Estimate from EI material inputs   
    }, 
    'powder_coat_steel': {
        'unit': 'square meter',
        'keyword': 'powder coat, steel',
        'factor': 0.45 # Estimate from EI material inputs 
    },
    'tin_sheet_2mm': {
        'unit': 'square meter',
        'keyword': 'tin plated chromium steel sheet, 2 mm',
        'factor': 15.6 # EI material inputs  
    }, 
    'spiral_steel_125': {
        'unit': 'meter',
        'keyword': 'spiral-seam duct, steel, dn 125',
        'factor': 1.9 # EI: Weight 1.9 kg/m
    }, 
    'spiral_steel_400': {
        'unit': 'meter',
        'keyword': 'spiral-seam duct, steel, dn 400',
        'factor': 6 # Weight 6 kg/m
    }, 
    'powder_coat_aluminium': {
        'unit': 'square meet',
        'keyword': 'powder coat, aluminium sheet',
        'factor': 0.18 # EI material inputs  
    }, 
    'aluminium_wire': {
        'unit': 'meter',
        'keyword': 'aluminium around steel bi-metal stranded cable, 3x3.67mm external diameter wire',
        'factor': 0.07 # EI material inputs   
    }, 
    'window_frame_woodmetal_16': {
        'unit': 'square meter',
        'keyword': 'window frame, wood-metal, u=1.6 w/m2k',
        'factor': 83.4 # EI: 1 m2 of visible wooden window frame weighs 83.4 kg.
    }, 
    'door_outer_woodaluminium': {
        'unit': 'square meter',
        'keyword': 'door, outer, wood-aluminium',
        'factor': 38.8 # EI: 1 m2 of the aluminium planked massive wood outer door weighs 38.8 kg
    }, 
    'window_aluminium_16': {
        'unit': 'square meter',
        'keyword': 'window frame, aluminium, u=1.6 w/m2k',
        'factor': 50.7 # EI: 1 m2 of visible aluminium window frame weighs 50.7 kg.
    }, 
    'cladding_aluminium': {
        'unit': 'square meter',
        'keyword': 'cladding, crossbar-pole, aluminium',
        'factor': 86.7 # EI: The aluminium cladding weighs 86.7 kg per m2.
    }, 
    'storage_tank_organics': {
        'unit': 'unit',
        'keyword': 'liquid storage tank, chemicals, organics',
        'factor': 0 # BACI does not include infrastructure
    },
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # },
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
    # '': {
    #     'unit': '',
    #     'keyword': '',
    #     'factor':  #   
    # }, 
}

In [25]:
entries = []
handled_cpcs = set()
base_handled = set()
matched_activities = set()

for _, row in final_df.iterrows():
    cpc_code = int(row['CPC'])
    base_entry = {
        'supplier': {
            'classification': {'CPC': [cpc_code]},
            'location': row['exporter_iso2'],
            'matrix': 'technosphere'
        },
        'unit_value': float(row['unit_value']),
        'min': float(row['min_unit']),
        'max': float(row['max_unit']),
        'mean': float(row['mean_unit']),
        'unit': 'kilogram'
    }
    entries.append(base_entry)
    base_handled.add(cpc_code)

    matched_any = False
    for entry in unit_info.get(cpc_code, []):
        for rule in conversion_rules.values():
            if rule['unit'] in entry['unit'] and rule['keyword'] in entry['reference product']:
                converted = base_entry.copy()
                converted['unit_value'] = base_entry['unit_value'] * rule['factor']
                converted['min'] = base_entry['min'] * rule['factor']
                converted['max'] = base_entry['max'] * rule['factor']
                converted['mean'] = base_entry['mean'] * rule['factor']
                converted['unit'] = rule['unit']
                converted['supplier'] = converted['supplier'].copy()
                converted['supplier']['reference product'] = entry['reference product']
                converted['supplier']['location'] = entry['location']
                entries.append(converted)
                matched_activities.add(entry['key'])
                matched_any = True
    if matched_any:
        handled_cpcs.add(cpc_code)

In [26]:
# Summary of CPCs where we applied conversion rules
print("✅ CPCs with conversion rules applied:")
for c in sorted(handled_cpcs):
    print(f"- {c}")

# Build unmatched_per_cpc: only activities with non-kg units that no rule matched
unmatched_per_cpc = {}
for cpc_code, acts in unit_info.items():
    # select non-kg activities
    non_kg_acts = [a for a in acts if a['unit'] != 'kilogram']
    if not non_kg_acts:
        continue
    for a in non_kg_acts:
        matched = False
        for rule in conversion_rules.values():
            if rule['unit'] == a['unit'] and rule['keyword'] in a['reference product']:
                matched = True
                break
        if not matched:
            unmatched_per_cpc.setdefault(cpc_code, []).append(a)

# Print only CPCs with truly unmatched activities
print("⚠️  CPCs with non-kg units but no conversion rule:")
for cpc_code, acts in sorted(unmatched_per_cpc.items()):
    print(f"CPC {cpc_code}:")
    for a in acts:
        print(f"  - {a['name']} -> {a['reference product']} [{a['unit']}]" )

✅ CPCs with conversion rules applied:
- 342
- 444
- 448
- 1202
- 3110
- 3120
- 3142
- 3733
- 3741
- 3815
- 3845
- 3870
- 3999
- 4312
- 4391
- 4393
- 4653
- 4951
- 11020
- 11040
- 11050
- 12020
- 17200
- 23319
- 24490
- 31101
- 31102
- 31220
- 31431
- 31432
- 31439
- 31441
- 31449
- 31450
- 31600
- 31700
- 32153
- 33100
- 33370
- 33620
- 33690
- 33720
- 34250
- 34659
- 34663
- 34790
- 35110
- 35499
- 36320
- 36920
- 36990
- 37114
- 37510
- 37540
- 37990
- 38703
- 39270
- 39282
- 39283
- 39310
- 39363
- 39365
- 39370
- 39920
- 39990
- 41117
- 41122
- 41283
- 41533
- 41603
- 42120
- 42190
- 42210
⚠️  CPCs with non-kg units but no conversion rule:
CPC 39920:
  - treatment of spent sawing slurry from Si-wafer cutting -> spent sawing slurry from si-wafer cutting [litre]
  - market for spent sawing slurry from Si-wafer cutting -> spent sawing slurry from si-wafer cutting [litre]
  - treatment of spent sawing slurry from Si-wafer cutting -> spent sawing slurry from si-wafer cutting [litre]
CPC

# 8. JSON output

In [27]:
with open('ecoinvent_cpc_costs.json', 'w') as f:
    json.dump(entries, f, indent=2)
print(f"Exported JSON config with {len(entries)} entries.")

Exported JSON config with 241730 entries.
