# Change the ecoinvent version of the mapping file

In [1]:
import pandas as pd
import bw2data as bd
from mescal import *
import itertools

In [2]:
bd.projects.set_current("ecoinvent3.9.1")

In [3]:
def load_change_report_annex(v_from, v_to):
    df = pd.read_excel(f"data/Change Report Annex v{v_from} - v{v_to}.xlsx", sheet_name="Qualitative Changes", usecols=[f'Reference Product - {v_from}', f'Reference Product Unit - {v_from}', f'Activity Name - {v_from}', f'Geography - {v_from}', f'Reference Product - {v_to}', f'Reference Product Unit - {v_to}', f'Activity Name - {v_to}', f'Geography - {v_to}', f'Dataset in version {v_from} has been deleted'])
    df.rename(columns={f'Activity Name - {v_from}': 'Activity Name', f'Geography - {v_from}': 'Geography', f'Reference Product - {v_from}': 'Reference Product', f'Reference Product Unit - {v_from}': 'Unit', f'Activity Name - {v_to}': 'Activity Name - new', f'Geography - {v_to}': 'Geography - new', f'Reference Product - {v_to}': 'Reference Product - new', f'Reference Product Unit - {v_to}': 'Unit - new', f'Dataset in version {v_from} has been deleted':'Deleted'}, inplace=True)
    df['Version from'] = v_from
    df['Version to'] = v_to
    return df

In [4]:
change_report_38_39 = load_change_report_annex('3.8', '3.9')
change_report_39_391 = load_change_report_annex('3.9', '3.9.1')
#change_report_391_310 = load_change_report_annex('3.9.1', '3.10')
mapping = pd.read_csv("energyscope_data/CA-QC/mapping_3.8_linked.csv")

## Changing databases names

In [5]:
def change_ei_version_napping_file(row, version_from, version_to):
    if row.Database[-11:] == '_comp_CA-QC':  # if it is the complementary database
        if 'urban delivery' in row.Activity:
            row.Database = 'urban delivery_truck'
        elif 'regional delivery' in row.Activity:
            row.Database = 'regional delivery_truck'
        else:
            raise ValueError(f"Unknown truck database for {row.Name}")
    else:
        row.Database = row.Database.replace(version_from, version_to)
    return row

In [6]:
mapping = mapping.apply(change_ei_version_napping_file, axis=1, args=('3.8', '3.9.1'))

## Changing the ecoinvent version

In [7]:
change_report = pd.concat([change_report_38_39, change_report_39_391])

In [8]:
change_report = change_report.drop_duplicates()

In [9]:
def handle_multi_processes_ecoinvent(df):
    updated_df = pd.DataFrame(data=[], columns=df.columns)
    for i in range(len(df)):
        if ';' in str(df['Reference Product'].iloc[i]):
            
            products = df['Reference Product'].iloc[i].split(';\n')
            units = df['Unit'].iloc[i].split(';\n')
            
            if str(df['Reference Product - new'].iloc[i]) == 'nan':
                new_products = ['nan'] * len(products)
            else:
                new_products = df['Reference Product - new'].iloc[i].split(';\n')
            if str(df['Unit - new'].iloc[i]) == 'nan':
                new_units = ['nan'] * len(units)
            else:
                new_units = df['Unit - new'].iloc[i].split(';\n')
            
            for product, unit, new_product, new_unit in zip(products, units, new_products, new_units):
                updated_df.loc[len(updated_df)] = [df['Activity Name'].iloc[i], df['Geography'].iloc[i], product, unit, df['Activity Name - new'].iloc[i], df['Geography - new'].iloc[i], new_product, new_unit, df['Deleted'].iloc[i], df['Version from'].iloc[i], df['Version to'].iloc[i]]
        else:
            updated_df.loc[len(updated_df)] = df.iloc[i].tolist()
    
    return updated_df

In [10]:
change_report = handle_multi_processes_ecoinvent(change_report)

In [11]:
change_report = change_report.reset_index(drop=True)

In [12]:
df = change_report[
    (change_report['Geography'] == 'GLO')
    & (change_report['Geography - new'] == 'GLO')
]
for i in range(len(df)):
    # Add new row for the same activity but with RoW as location
    new_row = df.iloc[i].tolist()
    new_row[1], new_row[5] = 'RoW', 'RoW'
    change_report.loc[len(change_report)] = new_row

In [13]:
# Only keep rows with changes in reference product, activity name or geography
change_report = change_report.drop(change_report[
    (change_report['Reference Product - new'] == change_report['Reference Product'])
    & (change_report['Activity Name - new'] == change_report['Activity Name'])
    & (change_report['Geography - new'] == change_report['Geography'])
].index)

In [14]:
changed_activities = [list(e) for e in {tuple(item) for item in change_report[['Reference Product', 'Activity Name', 'Geography']].values.tolist()}]

In [15]:
def update_mapping_file(df: pd.DataFrame, unit_to_change: list = None) -> tuple[pd.DataFrame, int, list]:
    
    updated_df = pd.DataFrame(data=[], columns=df.columns)
    counter = 0
    
    if unit_to_change is None:
        unit_to_change = []
    
    for i in range(len(df)):
        
        activity_name = df['Activity'].iloc[i]
        activity_prod = df['Product'].iloc[i]
        activity_geo = df['Location'].iloc[i]
        tech_name = df['Name'].iloc[i]
        tech_type = df['Type'].iloc[i]
        database = df['Database'].iloc[i]
        
        if activity_geo in ['CAZ']:
            activity_geo = 'RoW'
        
        if [activity_prod, activity_name, activity_geo] in changed_activities:
            counter += 1
            activity_name_new, activity_prod_new, activity_geo_new, unit, unit_new, deleted = change_report[
                (change_report['Reference Product'] == activity_prod) 
                & (change_report['Activity Name'] == activity_name) 
                & (change_report['Geography'] == activity_geo)
            ][['Activity Name - new', 'Reference Product - new', 'Geography - new', 'Unit', 'Unit - new', 'Deleted']].iloc[0]
            
            if unit != unit_new:
                print(f"WARNING: unit changed for {activity_prod} - {activity_name} - {activity_geo}")
                unit_to_change.append([(tech_name, tech_type), (activity_prod, activity_name, activity_geo), unit, unit_new])
            
            if (str(activity_name) == 'nan') & (deleted==1):
                raise ValueError(f"Activity {activity_prod} - {activity_name} - {activity_geo} has been deleted in the last ecoinvent version and should be replaced.")
            
            else:
                updated_df.loc[i] = [tech_name, tech_type, activity_prod_new, activity_name_new, activity_geo_new, database]
                
                print(tech_name, tech_type)
                print(f"Old: {activity_prod} - {activity_name} - {activity_geo}")
                print(f"New: {activity_prod_new} - {activity_name_new} - {activity_geo_new}")
            
        else:
            updated_df.loc[i] = df.iloc[i]
    
    return updated_df, counter, unit_to_change

In [16]:
updated_mapping, counter, unit_to_change = update_mapping_file(mapping)
while counter > 0:
    updated_mapping, counter, unit_to_change = update_mapping_file(updated_mapping, unit_to_change)
    print(counter)

BATTERY Construction
Old: battery, Li-ion, rechargeable, prismatic - market for battery, Li-ion, rechargeable, prismatic - GLO
New: battery, Li-ion, LiMn2O4, rechargeable, prismatic - market for battery, Li-ion, LiMn2O4, rechargeable, prismatic - GLO
CEMENT_PROD Operation
Old: cement, alternative constituents 21-35% - cement production, alternative constituents 21-35% - RoW
New: cement, CEM II/B - cement production, CEM II/B - RoW
CEMENT_PROD_HP Operation
Old: cement, alternative constituents 21-35% - cement production, alternative constituents 21-35% - RoW
New: cement, CEM II/B - cement production, CEM II/B - RoW
ELEC_STO Construction
Old: battery, Li-ion, rechargeable, prismatic - market for battery, Li-ion, rechargeable, prismatic - GLO
New: battery, Li-ion, LiMn2O4, rechargeable, prismatic - market for battery, Li-ion, LiMn2O4, rechargeable, prismatic - GLO
NG_COMP_HE Operation
Old: natural gas, high pressure - market for natural gas, high pressure - CA-QC
New: natural gas, high pr

## Verification and saving of the mapping file

In [17]:
name_premise_db = 'ecoinvent_cutoff_3.9.1_remind_SSP2-Base_2020_with_CPC'

In [18]:
db = load_extract_db(name_premise_db, create_pickle=True)

In [19]:
db_dict_name = database_list_to_dict(db, 'name')

In [20]:
# Verification
unlinked = []
for i in range(len(updated_mapping)):
    activity_name = updated_mapping['Activity'].iloc[i]
    activity_prod = updated_mapping['Product'].iloc[i]
    activity_loc = updated_mapping['Location'].iloc[i]
    activity_database = updated_mapping['Database'].iloc[i]
    if activity_database == name_premise_db:
        if (activity_name, activity_prod, activity_loc, activity_database) in db_dict_name:
            pass
        else:
            unlinked.append((activity_name, activity_prod, activity_loc, activity_database))
            print(f'Cant find {activity_name} - {activity_prod} - {activity_loc} - {activity_database}')

In [21]:
if len(unlinked) == 0:
    updated_mapping.to_csv("energyscope_data/CA-QC/mapping_3.9.1.csv", index=False)

# Adapt the unit conversion file corresponding to the new mapping file

In [23]:
unit_conversion = pd.read_csv("energyscope_data/CA-QC/unit_conversion_3.8.csv")

In [24]:
for i in range(len(unit_to_change)):
    unit_esm, unit_lca = unit_conversion[
        (unit_conversion.Name == unit_to_change[i][0][0])
        & (unit_conversion.Type == unit_to_change[i][0][1])
    ][['From', 'To']].values[0]
    
    if unit_lca != unit_to_change[i][2]:
        raise ValueError(f'LCA unit for {unit_to_change[i][0][0]} - {unit_to_change[i][0][1]} is not the same as the one in the mapping file. {unit_lca} != {unit_to_change[i][2]}')
    else:
        if unit_to_change[i][0] == ('OCGT_LARGE', 'Operation'):
            new_value = 3.6e6 * 2.5  # 2.5 MWh NG per MWh electricity
        
        elif unit_to_change[i][0] == ('OCGT_SMALL', 'Operation'):
            new_value = 3.6e6 * 2.5  # 2.5 MWh NG per MWh electricity
        
        else:
            raise ValueError(f"Unknown unit conversion for {unit_to_change[i][0]}")
        
        # delete current row 
        unit_conversion = unit_conversion.drop(unit_conversion[
            (unit_conversion.Name == unit_to_change[i][0][0])
            & (unit_conversion.Type == unit_to_change[i][0][1])
        ].index)
        
        # add new row
        unit_conversion.loc[len(unit_conversion)] = [unit_to_change[i][0][0], unit_to_change[i][0][1], new_value, unit_to_change[i][3], unit_esm]

In [25]:
unit_conversion.sort_values(by=['Name', 'Type']).to_csv("energyscope_data/CA-QC/unit_conversion_3.9.1.csv", index=False)