# Consolidating several mappings

In [1]:
import pandas as pd
import numpy as np
import bw2data as bd
from mescal import *

In [2]:
bd.projects.set_current('ei3.8-mescal')

In [3]:
ES_region = 'CA-QC' # 'CH' or 'CA-QC'

In [4]:
tech_CH = pd.read_csv('energyscope_data/CA-QC/hidden/tech_CH.csv') # mapping from ecoinvent 3.8 for CH
tech_QC = pd.read_csv('energyscope_data/CA-QC/hidden/tech_QC.csv') # mapping from ecoinvent 3.8 and premise specific for QC
comp_CH = pd.read_excel('energyscope_data/CA-QC/hidden/techno_compositions_CH.xlsx') # list of compositions of technologies with premise mapping for CH
comp_QC = pd.read_excel('energyscope_data/CA-QC/hidden/techno_compositions_QC.xlsx') # list of compositions of technologies with premise mapping for QC
# dict_ES = pd.read_csv('energyscope_data/CA-QC/hidden/Technology_Dictionary_v2.csv')
# region_tech_ES = pd.read_excel('energyscope_data/CA-QC/hidden/Technologies_ES_version.xlsx')
layers_in_out_CH = pd.read_csv(f"energyscope_data/CA-QC/hidden/layers_in_out_CH.csv")
layers_in_out_QC = pd.read_csv(f"energyscope_data/CA-QC/hidden/layers_in_out_QC.csv")

In [5]:
assumptions_diff_CH = pd.read_excel('energyscope_data/CA-QC/hidden/assumptions_diff_CH.xlsx')
assumptions_diff_QC = pd.read_excel('energyscope_data/CA-QC/hidden/assumptions_diff_QC.xlsx')
efficiency = pd.read_csv(f"energyscope_data/CA-QC/hidden/efficiency_{ES_region[-2:]}.csv")
mob_model_private_CH = pd.read_csv(
    "energyscope_data/CA-QC/hidden/MODELS_OF_TECHNOLOGIES_OF_PRIVATEMOB_ALL_DISTANCES_CH.csv", sep=',')
mob_model_private_QC = pd.read_csv(
    "energyscope_data/CA-QC/hidden/MODELS_OF_TECHNOLOGIES_OF_PRIVATEMOB_ALL_DISTANCES_QC.csv", sep=',')
mob_model_public_QC = pd.read_csv(
    "energyscope_data/CA-QC/hidden/MODELS_OF_TECHNOLOGIES_OF_PUBLICMOB_ALL_DISTANCES_QC.csv", sep=',')
mob_model_freight_QC = pd.read_csv(
    "energyscope_data/CA-QC/hidden/MODELS_OF_TECHNOLOGIES_OF_FREIGHTMOB_ALL_DISTANCES_QC.csv", sep=',')

In [6]:
# # Allows to keep formulas in Excel files
# from openpyxl import load_workbook
# wb_CH = load_workbook(filename = 'energyscope_data/hidden/tech_unit_conversion_CH.xlsx')
# unit_conv_CH = pd.DataFrame(wb_CH[wb_CH.sheetnames[0]].values)
# wb_QC = load_workbook(filename = 'energyscope_data/hidden/tech_unit_conversion_QC.xlsx')
# unit_conv_QC = pd.DataFrame(wb_QC[wb_QC.sheetnames[0]].values)

In [7]:
# # setting first row as header
# new_header_CH = unit_conv_CH.iloc[0]
# unit_conv_CH = unit_conv_CH[1:]
# unit_conv_CH.columns = new_header_CH
# new_header_QC = unit_conv_QC.iloc[0]
# unit_conv_QC = unit_conv_QC[1:]
# unit_conv_QC.columns = new_header_QC

In [8]:
unit_conv_CH = pd.read_excel('energyscope_data/CA-QC/hidden/tech_unit_conversion_CH.xlsx')
unit_conv_QC = pd.read_excel('energyscope_data/CA-QC/hidden/tech_unit_conversion_QC.xlsx')

In [9]:
if 'Validation' in tech_CH.columns:
    tech_CH.drop(columns='Validation', inplace=True)
if 'Validation' in tech_QC.columns:
    tech_QC.drop(columns='Validation', inplace=True)

# Model file

In [10]:
layers_in_out_QC = layers_in_out_QC.melt(id_vars=['ES_name'], value_vars=layers_in_out_QC.columns[1:])
layers_in_out_QC = layers_in_out_QC[layers_in_out_QC['value'] != 0]
layers_in_out_QC.rename(columns={'ES_name': 'Flow', 'variable': 'Name', 'value': 'Amount'}, inplace=True)

if ES_region == 'CA-QC':
    model = layers_in_out_QC
elif ES_region == 'CH':
    model = layers_in_out_CH
else:
    raise ValueError('ES_region should be either CH or CA-QC')
model[['Name', 'Flow', 'Amount']].to_csv(f'energyscope_data/{ES_region}/model.csv', index=False)

## Mapping file

In [11]:
len(tech_CH.ES_name.unique())

267

### QC

We start from the consolidated file of CH and add/replace what is in the tech_QC additional mapping, and filter what was only for CH using the list of technologies from ES-QC.

In [12]:
# region_tech_ES.dropna(subset=['ES_version'], inplace=True) # OTHER_BIOMASS to remove

In [13]:
# list_tech_QC = list(region_tech_ES[region_tech_ES.ES_version.str.contains('CA')].tech_name)
list_tech_QC = list(layers_in_out_QC.Name.unique())

In [14]:
sub_comp_CH = list(set([x for xs in comp_CH.iloc[:, 1:].values.tolist() for x in xs])) # list of all subcomponents for CH
sub_comp_QC = list(set([x for xs in comp_QC.iloc[:, 1:].values.tolist() for x in xs])) # list of all subcomponents for QC

In [15]:
def replace_mob_submodel_by_model_CH(row):
    if row.ES_name in list(mob_model_private_CH.Model_1):
        return mob_model_private_CH[mob_model_private_CH.Model_1 == row.ES_name].Main_tech.iloc[0]
    elif row.ES_name in list(mob_model_private_CH.Model_2):
        return mob_model_private_CH[mob_model_private_CH.Model_2 == row.ES_name].Main_tech.iloc[0]
    return row.ES_name

In [16]:
def replace_mob_model_names(row):
    if row.ES_name == 'CAR_HY_GASOLINE':
        return 'CAR_HEV'
    if row.ES_name == 'CAR_PHEV_GASOLINE':
        return 'CAR_PHEV'
    if row.ES_name == 'TRAIN_FREIGHT':
        return 'TRAIN_FREIGHT_ELEC'
    if row.ES_name == 'TRAIN_FREIGHT_LOC':
        return 'TRAIN_FREIGHT_ELEC_LOC'
    if row.ES_name == 'TRAIN_FREIGHT_WAG':
        return 'TRAIN_FREIGHT_ELEC_WAG'
    return row.ES_name

In [17]:
def flatten(xss):
    return [x for xs in xss for x in xs]

In [18]:
tech_CH['ES_name'] = tech_CH.apply(replace_mob_submodel_by_model_CH, axis=1)
unit_conv_CH['ES_name'] = unit_conv_CH.apply(replace_mob_submodel_by_model_CH, axis=1)
assumptions_diff_CH['ES_name'] = assumptions_diff_CH.apply(replace_mob_submodel_by_model_CH, axis=1)

In [19]:
tech_QC['ES_name'] = tech_QC.apply(replace_mob_model_names, axis=1)
unit_conv_QC['ES_name'] = unit_conv_QC.apply(replace_mob_model_names, axis=1)
assumptions_diff_QC['ES_name'] = assumptions_diff_QC.apply(replace_mob_model_names, axis=1)

In [20]:
tech_CH['ES_name'] = tech_CH.apply(replace_mob_model_names, axis=1)
unit_conv_CH['ES_name'] = unit_conv_CH.apply(replace_mob_model_names, axis=1)
assumptions_diff_CH['ES_name'] = assumptions_diff_CH.apply(replace_mob_model_names, axis=1)

In [21]:
tech_CH.drop_duplicates(inplace=True)
unit_conv_CH.drop_duplicates(inplace=True)
assumptions_diff_CH.drop_duplicates(inplace=True)

In [22]:
list_tech_QC_updated = list_tech_QC.copy()
for i in list_tech_QC:
    if i in flatten(list(mob_model_private_QC[['Model_1', 'Model_2', 'Model_3', 'Model_4']].values)):
        list_tech_QC_updated.remove(i)
        list_tech_QC_updated.append(mob_model_private_QC[mob_model_private_QC[['Model_1', 'Model_2', 'Model_3', 'Model_4']].isin([i]).any(axis=1)].Main_tech.iloc[0])
    if i in flatten(list(mob_model_public_QC[['Model_1', 'Model_2', 'Model_3']].values)):
        list_tech_QC_updated.remove(i)
        list_tech_QC_updated.append(mob_model_public_QC[mob_model_public_QC[['Model_1', 'Model_2', 'Model_3']].isin([i]).any(axis=1)].Main_tech.iloc[0])
    if i in flatten(list(mob_model_freight_QC[['Model_1', 'Model_2']].values)):
        list_tech_QC_updated.remove(i)
        list_tech_QC_updated.append(mob_model_freight_QC[mob_model_freight_QC[['Model_1', 'Model_2']].isin([i]).any(axis=1)].Main_tech.iloc[0])

In [23]:
list_tech_QC_updated = sorted(list(set(list_tech_QC_updated)))

In [24]:
list_tech_QC = list_tech_QC_updated

In [25]:
# Remove technologies that are not in ES-QC
tech_not_QC = []

# Operation
for tech in list(tech_CH[tech_CH.type == 'Operation'].ES_name):
    
    if tech not in list_tech_QC:
        tech_not_QC.append(tech)
    else:
        pass

# Construction
for tech in list(tech_CH[tech_CH.type == 'Construction'].ES_name):
    
    if tech in sub_comp_CH:
        if tech not in sub_comp_QC:
            tech_not_QC.append(tech)
        else:
            pass

In [26]:
tech_CH_filtered = tech_CH.drop(index=tech_CH[tech_CH.ES_name.isin(tech_not_QC)].index)

In [27]:
# Remove the LCI datasets that need to be updated from the CH list
update_constr = []
for tech in list(tech_CH[tech_CH.type == 'Construction'].ES_name):
    if (tech in list(tech_CH[tech_CH.type == 'Construction'].ES_name)) & (tech in list(tech_QC[tech_QC.type == 'Construction'].ES_name)):
        update_constr.append(tech)

update_op = []
for tech in list(tech_CH[tech_CH.type == 'Operation'].ES_name):
    if (tech in list(tech_CH[tech_CH.type == 'Operation'].ES_name)) & (tech in list(tech_QC[tech_QC.type == 'Operation'].ES_name)):
        update_op.append(tech)

In [28]:
tech_CH_filtered.drop(index=tech_CH_filtered[(tech_CH_filtered.ES_name.isin(update_constr)) & (tech_CH_filtered.type == 'Construction')].index, inplace=True)
tech_CH_filtered.drop(index=tech_CH_filtered[(tech_CH_filtered.ES_name.isin(update_op)) & (tech_CH_filtered.type == 'Operation')].index, inplace=True)

In [29]:
tech_consolidated_QC = pd.concat([tech_CH_filtered, tech_QC])

In [30]:
tech_consolidated_QC.duplicated(subset=['ES_name', 'type']).sum()

0

## Unit conversion file

### CH

In [31]:
unit_conv_CH = unit_conv_CH[['ES_name', 'ei_constr_unit', 'ES_constr_unit', 'ei_use_unit', 'ES_use_unit', 'capacity', 'conversion', 'ei_constr_unit_size', 'ES_constr_unit_size', 'Assumptions & Sources']]

In [32]:
# Drop the rows where both the capacity and conversion factors are None
unit_conv_CH.drop(unit_conv_CH[(unit_conv_CH.conversion.values == None) & (unit_conv_CH.capacity.values == None)].index, inplace=True)

### QC

In [33]:
unit_conv_QC = unit_conv_QC[unit_conv_CH.columns]

In [34]:
unit_conv_QC.dropna(how='all', axis=0, inplace=True)

In [35]:
# In order to overwrite some conversion factors (same technologies but different factors between CH and QC), we remove from the CH file the factors that are present in both files
unit_conv_CH_overwrite = unit_conv_CH.copy()
for tech in list(unit_conv_QC.ES_name.unique()):
    if tech in list(unit_conv_CH.ES_name.unique()):
        unit_conv_CH_overwrite.drop(unit_conv_CH[unit_conv_CH.ES_name == tech].index, inplace=True)

In [36]:
unit_conv_QC_consolidated = pd.concat([unit_conv_CH_overwrite.drop(unit_conv_CH_overwrite[unit_conv_CH_overwrite.ES_name.isin(tech_not_QC)].index), unit_conv_QC])

# Duplicate mapping for mobility models

In [37]:
if ES_region == 'CA-QC':
    tech_ecoinvent = tech_consolidated_QC.copy(deep=True)
    tech_unit_conversion = unit_conv_QC_consolidated.copy(deep=True)
    comp = comp_QC.copy(deep=True)
    assumptions_diff = assumptions_diff_QC.copy(deep=True)
elif ES_region == 'CH':
    tech_ecoinvent = tech_CH.copy(deep=True)
    tech_unit_conversion = unit_conv_CH.copy(deep=True)
    comp = comp_CH.copy(deep=True)
    assumptions_diff = assumptions_diff_CH.copy(deep=True)
else:
    raise ValueError('ES_region should be either CH or CA-QC')

In [38]:
tech_ecoinvent.reset_index(drop=True, inplace=True)
tech_unit_conversion.reset_index(drop=True, inplace=True)
comp.reset_index(drop=True, inplace=True)

In [39]:
# Gather all non-nan components into a list
comp['Components'] = [[e for e in row if e == e] for row in comp.iloc[:, 1:].values.tolist()]
comp_dict = dict(zip(comp.ES_name, comp.Components))
N_sub_comp_max = 4  # maximum number of subcomponents in the compositions file

In [40]:
def gen_df_mob_models(df):
    df_mobility_models = pd.DataFrame(columns=tech_ecoinvent.columns)

    for i in range(len(df)):
        tech = df.Main_tech.iloc[i]

        if tech in list(tech_ecoinvent.ES_name):

            j = 1
            model = str(df[df.Main_tech == tech][f'Model_{j}'].iloc[0])
            while (model != 'nan') & (j < df.shape[1]):
                if str(df_mobility_models.index.max()) == 'nan':
                    idx = 1
                else:
                    idx = df_mobility_models.index.max() + 1
                df_mobility_models.loc[idx] = [model] + list(tech_ecoinvent[tech_ecoinvent.ES_name == tech].iloc[0, 1:])  # operation
                tech_unit_conversion.loc[tech_unit_conversion.index.max() + 1] = [model] + list(tech_unit_conversion[tech_unit_conversion.ES_name == tech].iloc[0,1:])  # update unit conversion Excel files with additional rows for mobility models
                # dict_ES.loc[dict_ES.index.max() + 1] = [model] + list(dict_ES[dict_ES['Programming name'] == tech].iloc[0,1:])  # update technology dictionary Excel file with additional rows for mobility models
                assumptions_diff.loc[assumptions_diff.index.max() + 1] = [model] + list(assumptions_diff[assumptions_diff.ES_name == tech].iloc[0,1:])  # update unit conversion Excel files with additional rows for mobility models
                if tech in list(efficiency.ES_name.unique()):
                    efficiency.loc[efficiency.index.max() + 1] = [model] + list(efficiency[efficiency.ES_name == tech].iloc[0,1:])  # update efficiency Excel files with additional rows for mobility models

                if tech in comp_dict.keys():

                    N_sub_comp = len(comp_dict[tech])
                    subscript_comp_list = []

                    for i, sub_comp in enumerate(comp_dict[tech]):
                        subscript_comp = sub_comp.replace(tech, '')
                        subscript_comp_list.append(subscript_comp)
                        df_mobility_models.loc[df_mobility_models.index.max() + 1] = [model + subscript_comp] + list(tech_ecoinvent[tech_ecoinvent.ES_name == sub_comp].iloc[0,1:])  # construction component idx
                        tech_unit_conversion.loc[tech_unit_conversion.index.max() + 1] = [model + subscript_comp] + list(tech_unit_conversion[tech_unit_conversion.ES_name == sub_comp].iloc[0,1:])  # update unit conversion Excel files
                        assumptions_diff.loc[assumptions_diff.index.max() + 1] = [model + subscript_comp] + list(assumptions_diff[assumptions_diff.ES_name == sub_comp].iloc[0,1:])  # update unit conversion Excel files
                        if sub_comp in list(efficiency.ES_name.unique()):
                            efficiency.loc[efficiency.index.max() + 1] = [model + subscript_comp] + list(efficiency[efficiency.ES_name == sub_comp].iloc[0,1:])  # update efficiency Excel files

                    comp.loc[comp.index.max() + 1] = [model] + [model + a for a in subscript_comp_list] + [np.nan] * (N_sub_comp_max - N_sub_comp) + [[model + a for a in subscript_comp_list]]  # update the compositions Excel files
                    comp_dict[model] = [model + a for a in subscript_comp_list]

                else:
                    df_mobility_models.loc[idx + 1] = [model] + list(
                        tech_ecoinvent[tech_ecoinvent.ES_name == tech].iloc[1, 1:])  # construction
                j += 1
                if j < df.shape[1]:
                    model = str(df[df.Main_tech == tech][f'Model_{j}'].iloc[0])

    return df_mobility_models

In [41]:
if ES_region == 'CH':
    mob_model_private = mob_model_private_CH
elif ES_region == 'CA-QC':
    mob_model_private = mob_model_private_QC
    mob_model_public = mob_model_public_QC
    mob_model_freight = mob_model_freight_QC
else:
    raise ValueError('ES_region should be either CH or CA-QC')

In [42]:
if ES_region == 'CA-QC':
    basic_tech_to_remove = list(mob_model_private.Main_tech) + list(mob_model_public.Main_tech) + list(
        mob_model_freight.Main_tech)
else:
    basic_tech_to_remove = list(mob_model_private.Main_tech)

for tech in basic_tech_to_remove:
    if tech in comp_dict.keys():  # add the subcomponents to the list of technologies to remove
        for sub_comp in comp_dict[tech]:
            basic_tech_to_remove.append(sub_comp)

In [43]:
# Create df of mapping with mobility models
df_mobility_models_private = gen_df_mob_models(mob_model_private)
if ES_region == 'CA-QC':
    df_mobility_models_public = gen_df_mob_models(mob_model_public)
    df_mobility_models_freight = gen_df_mob_models(mob_model_freight)

# Remove the mobility basic technologies
tech_ecoinvent.drop(tech_ecoinvent[tech_ecoinvent.ES_name.isin(basic_tech_to_remove)].index, inplace=True)
tech_unit_conversion.drop(tech_unit_conversion[tech_unit_conversion.ES_name.isin(basic_tech_to_remove)].index, inplace=True)
comp.drop(comp[comp.ES_name.isin(basic_tech_to_remove)].index, inplace=True)
assumptions_diff.drop(assumptions_diff[assumptions_diff.ES_name.isin(basic_tech_to_remove)].index, inplace=True)
efficiency.drop(efficiency[efficiency.ES_name.isin(basic_tech_to_remove)].index, inplace=True)

In [44]:
mob_model_comp = []  # list of components for mobility technologies composition (to remove)

if ES_region == 'CH':
    mob_tech_list = list(mob_model_private.Main_tech)
else:
    mob_tech_list = list(mob_model_private.Main_tech) + list(mob_model_public.Main_tech) + list(
        mob_model_freight.Main_tech)

for mob_tech in mob_tech_list:
    if mob_tech in comp_dict.keys():
        for sub_comp in comp_dict[mob_tech]:
            mob_model_comp.append(sub_comp)
    else:
        pass

tech_ecoinvent.drop(tech_ecoinvent[tech_ecoinvent.ES_name.isin(mob_model_comp)].index, inplace=True)

In [45]:
# Concatenate the overall df's
if ES_region == 'CH':
    tech_ecoinvent = pd.concat([tech_ecoinvent,
                                df_mobility_models_private])
else:
    tech_ecoinvent = pd.concat([tech_ecoinvent,
                                df_mobility_models_private,
                                df_mobility_models_public,
                                df_mobility_models_freight])
tech_ecoinvent = tech_ecoinvent.sort_values('ES_name').reset_index(drop=True)

# Mapping file with both technologies and resources

In [46]:
res = pd.read_csv(f"energyscope_data/CA-QC/hidden/res_ecoinvent_{ES_region[-2:]}.csv")
flows = pd.read_csv('energyscope_data/CA-QC/hidden/flows_ecoinvent.csv') 

In [47]:
db_flows = Database(db_names=list(flows.Database.unique()))

Loaded ecoinvent3.8 cut-off from pickle!
Loaded biofuels from pickle!
Loaded Carma CCS from pickle!


In [48]:
flows['Type'] = len(flows) * ['Flow']

In [49]:
# Define the user-defined ranking (only ecoinvent regions, no IAMs)
if ES_region == 'CA-QC':
    my_ranking = [
        'CA-QC',  # Quebec
        'CA',  # Canada
        'CA-ON',  # Other canadian provinces 
        'CA-AB',
        'CA-BC',
        'CA-MB',
        'CA-NB',
        'CA-NF',
        'CA-NS',
        'CA-NT',
        'CA-NU',
        'CA-PE',
        'RNA',  # North America
        'US',  # United States
        'GLO',  # Global average 
        'RoW',  # Rest of the world
    ]
elif ES_region == 'CH':
    my_ranking = [
        'CH', 
        'RER', 
        'IAI Area, EU27 & EFTA',
        'GLO',
        'RoW'
    ]
else:
    raise ValueError('ES_region should be either CH or CA-QC')

In [50]:
esm = ESM(
    mapping=flows,
    locations_ranking=my_ranking,
    esm_location=ES_region,
    main_database=db_flows,
    model=model,
    unit_conversion=pd.DataFrame(),
    mapping_esm_flows_to_CPC_cat=pd.DataFrame(),
    esm_db_name='',
)

In [51]:
# flows = change_location_mapping_file(
#     flows,
#     my_ranking,
#     db_flows,
#     ES_region,
# )
esm.change_location_mapping_file()

In [52]:
res.drop(columns=['Description'], inplace=True)
res.dropna(subset=['product_name'], inplace=True)
res['type'] = len(res) * ['Resource']
mapping = pd.concat([tech_ecoinvent, res], ignore_index=True).rename(
    columns={'ES_name': 'Name', 'type': 'Type', 'product_name': 'Product', 'activity_name': 'Activity', 'region': 'Location', 'unit': 'Unit', 'database': 'Database'})
mapping = pd.concat([mapping, esm.mapping])

## New premise names for mobility

In [53]:
def change_mobility_name(row):
    # Remove year
    if row.Name.startswith('LCV_') | row.Name.startswith('SEMI_SH_') | row.Name.startswith('TRUCK_SH_'):
        pass # do not change carculator names
    else:
        row.Activity = row.Activity.replace(', 2020', '')

    # Cars and SUVs inventories
    row.Product = row.Product.replace('-TEMP', '')
    row.Activity = row.Activity.replace('-TEMP', '')
    row.Product = row.Product.replace('EURO-6d', 'EURO-6ab')
    row.Activity = row.Activity.replace('EURO-6d', 'EURO-6ab')
    
    # Battery electric vehicles
    if ('urban delivery' in row.Activity) | ('regional delivery' in row.Activity):
        pass  # should not be applied to carculator datasets 
    else:
        row.Activity = row.Activity.replace('NMC-622 battery, ', '')
    
    return row

In [54]:
mapping = mapping.apply(change_mobility_name, axis=1)

## Adapting trucks to carculator names

In [55]:
def change_truck_name(row):
    if row.Name.startswith('LCV_') | row.Name.startswith('SEMI_SH_') | row.Name.startswith('TRUCK_SH_'):
        if row.Type == 'Operation':
            row.Product = row.Product.replace('freight, lorry', 'truck')
            row.Activity = row.Activity.replace('freight, lorry', 'truck')
        
        elif row.Type == 'Construction':
            row.Product = row.Product.replace('Light duty ', '')
            row.Activity = row.Activity.replace('Light duty ', '')
            
            row.Product = row.Product.replace('Medium duty ', '')
            row.Activity = row.Activity.replace('Medium duty ', '')
        
        row.Product = row.Product.replace(' gross weight', '')
        row.Activity = row.Activity.replace(' gross weight', '')
        
        row.Product = row.Product.replace('EURO-VI', 'Euro-6')
        row.Activity = row.Activity.replace('EURO-VI', 'Euro-6')
        
        row.Location = row.Location.replace('RER', 'CH')
        
        if 'urban delivery' in row.Activity:
            row.Database = row.Database.replace('lci-long_haul_trucks', 'urban delivery_truck')
        elif 'regional delivery' in row.Activity:
            row.Database = row.Database.replace('lci-long_haul_trucks', 'regional delivery_truck')
        else:
            raise ValueError('Truck type not recognized')
        
    return row

In [56]:
mapping = mapping.apply(change_truck_name, axis=1)

## Filtering the mapping file

In [57]:
to_remove = [
    # Removing NG and H2 transport LCI datasets (operation) to be fair with electricity transport technologies that have no operation LCI datasets either 
    ('HP_NG_GRID', 'Operation'), 
    ('EHP_NG_GRID', 'Operation'),
    ('LP_NG_GRID', 'Operation'),
    ('MP_NG_GRID', 'Operation'),
    ('HP_H2_GRID', 'Operation'),
    ('EHP_H2_GRID', 'Operation'),
    ('LP_H2_GRID', 'Operation'),
    ('MP_H2_GRID', 'Operation'),
    
    # Removed because not used in practice
    ('H2_COMP_100', 'Construction'),
    ('H2_COMP_100_350', 'Construction'),
    ('H2_COMP_200', 'Construction'),
    ('H2_COMP_200_350', 'Construction'),
    ('H2_COMP_100', 'Operation'),
    ('H2_COMP_100_350', 'Operation'),
    ('H2_COMP_200', 'Operation'),
    ('H2_COMP_200_350', 'Operation'),
    ('AFC_OG', 'Construction'),
    ('PAFC_OG', 'Construction'),
    ('PEMFC_OG', 'Construction'),
    ('SOFC_OG', 'Construction'),
    ('AEC_OG', 'Construction'),
    ('PEMEC_OG', 'Construction'),
    ('SOEC_OG', 'Construction'),
    ('AEC_OG', 'Construction'),
    ('AFC_OG', 'Operation'),
    ('PAFC_OG', 'Operation'),
    ('PEMFC_OG', 'Operation'),
    ('SOFC_OG', 'Operation'),
    ('AEC_OG', 'Operation'),
    ('PEMEC_OG', 'Operation'),
    ('SOEC_OG', 'Operation'),
    ('AEC_OG', 'Operation'),
    ('NG_GEN', 'Construction'),
    ('NG_GEN', 'Operation'),
    ('DIESEL_GEN', 'Construction'),
    ('DIESEL_GEN', 'Operation'),
    ('PV_EHV', 'Construction'),
    ('PV_EHV', 'Operation'),
    ('H2_Haber_Bosch', 'Construction'),
    ('H2_Haber_Bosch', 'Operation'),
    ('DEC_DEEP_GEO', 'Construction'),
    ('DEC_DEEP_GEO', 'Operation'),
    ('BIOMASS_ETHANOL', 'Construction'),
    ('BIOMASS_ETHANOL', 'Operation'),
]

if ES_region == 'CH':
    to_remove += [
        ('PROPANE', 'Resource'),
        ('BIO_DIESEL', 'Resource'),
        ('BIO_DIESEL', 'Flow'),
    ]

In [58]:
for i in range(len(to_remove)):
    if to_remove[i][1] == 'Construction':
        if to_remove[i][0] in list(comp.ES_name):
            comp_list = comp[comp.ES_name == to_remove[i][0]].Components.iloc[0]
            comp.drop(comp[comp.ES_name == to_remove[i][0]].index, inplace=True) # remove from composition file
            for sub_comp in comp_list:
                to_remove.append((sub_comp, 'Construction'))

In [59]:
mapping.set_index(['Name', 'Type'], inplace=True)
mapping = mapping[~mapping.index.isin(to_remove)]
mapping.reset_index(inplace=True)

In [60]:
mapping.to_csv(f"energyscope_data/{ES_region}/mapping_3.8.csv", index=False)

# Composition file

In [61]:
comp.rename(columns={'ES_name': 'Name'}, inplace=True)
comp[['Name', 'Components']].to_csv(f"energyscope_data/{ES_region}/technology_compositions.csv", index=False)

# Unit conversion and assumptions files

In [62]:
res_unit_conversion = pd.read_excel("energyscope_data/CA-QC/hidden/res_unit_conversion.xlsx")
other_unit_conversion = pd.read_csv("energyscope_data/CA-QC/hidden/other_unit_conversion.csv")
lifetime = assumptions_diff.copy(deep=True)

In [63]:
tech_unit_conversion_melted = tech_unit_conversion[['ES_name', 'capacity', 'conversion', 'ei_constr_unit', 'ES_constr_unit', 'ei_use_unit', 'ES_use_unit']].rename(
    columns={'ES_name': 'Name', 'capacity': 'Construction', 'conversion': 'Operation'}
).melt(
    id_vars='Name',
    value_vars=['Construction', 'Operation'],
    var_name='Type',
    value_name='Value'
).sort_values('Name').dropna(subset='Value')

In [64]:
tech_unit_conversion_melted_constr = tech_unit_conversion_melted[tech_unit_conversion_melted.Type == 'Construction']
tech_unit_conversion_melted_op = tech_unit_conversion_melted[tech_unit_conversion_melted.Type == 'Operation']
tech_unit_conversion_melted_constr = tech_unit_conversion_melted_constr.merge(tech_unit_conversion[['ES_name', 'ei_constr_unit', 'ES_constr_unit']], left_on='Name', right_on='ES_name').rename(columns={'ei_constr_unit': 'LCA', 'ES_constr_unit': 'ESM'}).drop(columns='ES_name')
tech_unit_conversion_melted_op = tech_unit_conversion_melted_op.merge(tech_unit_conversion[['ES_name', 'ei_use_unit', 'ES_use_unit']], left_on='Name', right_on='ES_name').rename(columns={'ei_use_unit': 'LCA', 'ES_use_unit': 'ESM'}).drop(columns='ES_name')
tech_unit_conversion_melted = pd.concat([tech_unit_conversion_melted_constr, tech_unit_conversion_melted_op], ignore_index=True).sort_values('Name')

In [65]:
res_unit_conversion_melted = res_unit_conversion[['ES_name', 'conversion', 'ei_unit', 'ES_unit']].rename(
    columns={'ES_name': 'Name', 'conversion': 'Resource'}
).melt(
    id_vars='Name', 
    value_vars=['Resource'],
    var_name='Type', 
    value_name='Value'
).sort_values('Name').dropna(subset='Value')

In [66]:
res_unit_conversion_melted = res_unit_conversion_melted.merge(res_unit_conversion[['ES_name', 'ei_unit', 'ES_unit']], left_on='Name', right_on='ES_name').rename(columns={'ei_unit': 'LCA', 'ES_unit': 'ESM'}).drop(columns='ES_name')

In [67]:
unit_conversion = pd.concat([tech_unit_conversion_melted, 
                             res_unit_conversion_melted, 
                             other_unit_conversion[['Name', 'Value', 'Type', 'ESM', 'LCA']],
                             ], ignore_index=True).sort_values('Name')

In [68]:
def change_unit_convention(row):
    if row.ESM == 'GWh':
        row.ESM = 'kWh'
        row.Value /= 1e6
    elif row.ESM == 'kt':
        row.ESM = 'kg'
        row.Value /= 1e6
    elif row.ESM == 'Mpkm':
        row.ESM = 'pkm'
        row.Value /= 1e6
    elif row.ESM == 'Mtkm':
        row.ESM = 'tkm'
        row.Value /= 1e6
    elif row.ESM == 'GW':
        row.ESM = 'kW'
        row.Value /= 1e6
    elif row.ESM == 'kt/h':
        row.ESM = 'kg/h'
        row.Value /= 1e6
    elif row.ESM == 'Mtkm/h':
        row.ESM = 'tkm/h'
        row.Value /= 1e6
    elif row.ESM == 'Mpkm/h':
        row.ESM = 'pkm/h'
        row.Value /= 1e6
    elif row.ESM in ['unit', 'm3', 'kg', 'MJ', 'kWh']:
        pass
    else:
        raise ValueError(f'Unit {row.ESM} not recognized')
    return row

In [69]:
unit_conversion = unit_conversion.apply(change_unit_convention, axis=1)

In [70]:
unit_conversion['ESM'] = unit_conversion['ESM'].apply(ecoinvent_unit_convention)
unit_conversion['LCA'] = unit_conversion['LCA'].apply(ecoinvent_unit_convention)

## Filtering

In [71]:
unit_conversion.set_index(['Name', 'Type'], inplace=True)
unit_conversion = unit_conversion[~unit_conversion.index.isin(to_remove)]
unit_conversion.reset_index(inplace=True)

In [72]:
lifetime.set_index(['ES_name'], inplace=True)
lifetime = lifetime[~lifetime.index.isin([to_remove[i][0] if to_remove[i][1] == 'Construction' else None for i in range(len(to_remove))])]
lifetime.reset_index(inplace=True)

In [73]:
efficiency.set_index(['ES_name'], inplace=True)
efficiency = efficiency[~efficiency.index.isin([to_remove[i][0] if to_remove[i][1] == 'Operation' else None for i in range(len(to_remove))])]
efficiency.reset_index(inplace=True)
efficiency.sort_values('ES_name', inplace=True)

In [74]:
unit_conversion.to_csv(f"energyscope_data/{ES_region}/unit_conversion_3.8.csv", index=False)

In [75]:
lifetime[['ES_name', 'lifetime_ES', 'lifetime_ei']].rename(columns={'ES_name': 'Name', 'lifetime_ES': 'ESM', 'lifetime_ei': 'LCA'}).to_csv(f"energyscope_data/{ES_region}/lifetime.csv", index=False)

In [76]:
efficiency.rename(columns={'ES_name': 'Name'}).to_csv(f"energyscope_data/{ES_region}/efficiency.csv", index=False)

# Relink mapping file

In [77]:
premise_changes = pd.read_csv("data/premise_change_report.csv")

In [78]:
name_premise_db = 'ecoinvent_cutoff_3.8_remind_SSP2-Base_2020'
name_premise_comp_db = name_premise_db + f'_comp_{ES_region}'

In [79]:
premise_db = Database(name_premise_db, create_pickle=True)

Loaded ecoinvent_cutoff_3.8_remind_SSP2-Base_2020 from pickle!


In [80]:
mapping_linked_to_premise = premise_db.create_complementary_database(
    df_mapping=mapping, 
    main_db_name=name_premise_db, 
    complement_db_name=name_premise_comp_db, 
    premise_changes=premise_changes
)

Loaded fuel_cell from pickle!
Loaded ecoinvent_cutoff_3.8_remind_SSP2-Base_2020 from pickle!
Loaded h2_electrolysis from pickle!
Loaded ecoinvent3.8 cut-off from pickle!
Loaded hydrogen-smr-natgas from pickle!
Loaded Hydrogen from biogas SMR and ATR from pickle!
Loaded biogas from pickle!
Loaded Hydrogen from woody biomass gasification from pickle!
Loaded lci-buses from pickle!
Loaded carbon fiber from pickle!
Loaded batteries from pickle!
Loaded lci-pass_cars from pickle!
Loaded lithium from pickle!
Loaded graphite from pickle!
Loaded cobalt from pickle!
Loaded Carma CCS from pickle!
Loaded synfuel from electrolysis from pickle!
Loaded cement CCS-CCU from pickle!
Loaded Methanol-based fuels from electrolysis from pickle!
Loaded Hydrogen from coal Gasification from pickle!
Loaded biofuels from pickle!
Loaded synfuel from wood gasification from pickle!
Loaded direct air capture from pickle!
Loaded geothermal from pickle!
Loaded Methanol-based fuels from coal from pickle!
Getting activit

100%|██████████| 939/939 [00:00<00:00, 121737.50it/s]


Adding exchange data to activities


100%|██████████| 30812/30812 [00:01<00:00, 27464.67it/s]


Filling out exchange data


100%|██████████| 939/939 [00:00<00:00, 1091.35it/s]


Loaded urban delivery_truck from brightway!
Loaded syngas from pickle!
Loaded h2_pyrolysis from pickle!
Loaded PV from pickle!
Loaded lci-long_haul_trucks from pickle!
Getting activity data


100%|██████████| 935/935 [00:00<00:00, 93306.55it/s]


Adding exchange data to activities


100%|██████████| 30702/30702 [00:00<00:00, 31748.65it/s]


Filling out exchange data


100%|██████████| 935/935 [00:00<00:00, 1628.57it/s]


Loaded regional delivery_truck from brightway!
Loaded wave_energy from pickle!
Loaded Methanol-based fuels from wood from pickle!
No inventory in the premise database for ('LCV_BIODIESEL_B100_MD', 'Construction')
No inventory in the premise database for ('LCV_BIODIESEL_B100_MD', 'Operation')
No inventory in the premise database for ('LCV_BIODIESEL_B100_SD', 'Construction')
No inventory in the premise database for ('LCV_BIODIESEL_B100_SD', 'Operation')
No inventory in the premise database for ('LCV_BIODIESEL_B20_MD', 'Operation')
No inventory in the premise database for ('LCV_BIODIESEL_B20_MD', 'Construction')
No inventory in the premise database for ('LCV_BIODIESEL_B20_SD', 'Operation')
No inventory in the premise database for ('LCV_BIODIESEL_B20_SD', 'Construction')
No inventory in the premise database for ('LCV_CNG_MD', 'Construction')
No inventory in the premise database for ('LCV_CNG_MD', 'Operation')
No inventory in the premise database for ('LCV_CNG_SD', 'Operation')
No inventory

Writing activities to SQLite3 database:
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


Title: Writing activities to SQLite3 database:
  Started: 10/04/2024 17:22:45
  Finished: 10/04/2024 17:22:45
  Total time elapsed: 00:00:00
  CPU %: 66.50
  Memory %: 23.30


In [81]:
# create a concatenated database of all databases in the mapping dataframe (including background requirements, except biosphere databases)
base_db = Database(db_names=list(mapping_linked_to_premise.Database.unique()))

Loaded ecoinvent_cutoff_3.8_remind_SSP2-Base_2020 from pickle!
Getting activity data


100%|██████████| 85/85 [00:00<?, ?it/s]


Adding exchange data to activities


100%|██████████| 1686/1686 [00:00<00:00, 22570.25it/s]


Filling out exchange data


100%|██████████| 85/85 [00:00<00:00, 261.47it/s]


Loaded ecoinvent_cutoff_3.8_remind_SSP2-Base_2020_comp_CA-QC from brightway!


In [82]:
esm = ESM(
    mapping=mapping_linked_to_premise,
    locations_ranking=my_ranking,
    main_database=base_db,
    esm_location=ES_region,
    unit_conversion=pd.DataFrame(),
    model=pd.DataFrame(),
    mapping_esm_flows_to_CPC_cat=pd.DataFrame(),
    esm_db_name='',
)

In [83]:
# Update mapping dataframe with better locations
esm.change_location_mapping_file()
mapping_linked_to_premise = esm.mapping

No location found in your ranking for hydrogen, gaseous, 20 bar - hydrogen production, gaseous, 20 bar, from AEC electrolysis, from grid electricity
--> Have to keep the initial location: CH
No location found in your ranking for biomethane, high pressure - biomethane, gaseous, 5 bar, from sewage sludge fermentation, at fuelling station
--> Have to keep the initial location: RER
No location found in your ranking for hydrogen, gaseous, low pressure - hydrogen production, steam methane reforming, from biomethane
--> Have to keep the initial location: RER
No location found in your ranking for hydrogen, gaseous, low pressure - hydrogen production, steam methane reforming, from biomethane, with CCS
--> Have to keep the initial location: RER
No location found in your ranking for hydrogen, gaseous, 25 bar - hydrogen production, gaseous, 25 bar, from gasification of woody biomass in entrained flow gasifier, at gasification plant
--> Have to keep the initial location: RER
No location found in yo

In [84]:
unlinked = base_db.test_mapping_file(mapping_linked_to_premise)

Mapping successfully linked to the database


In [85]:
if len(unlinked) == 0:
    mapping_linked_to_premise.to_csv(f"energyscope_data/{ES_region}/mapping_3.8_linked.csv", index=False)
else:
    print(f"Unlinked flows: {unlinked}")