# Add CPC categories to a database 

In [1]:
from mescal import *
import pandas as pd
import bw2data as bd

In [2]:
bd.projects.set_current("ei3.8-mescal")

In [3]:
mapping_product_to_CPC = pd.read_csv('data/mapping_product_to_CPC.csv')

In [4]:
name_premise_db = "ecoinvent_cutoff_3.8_remind_SSP2-Base_2020"
premise_db = load_extract_db(name_premise_db)

In [5]:
name_premise_comp_db = name_premise_db + '_comp_QC'
premise_comp_db = load_extract_db(name_premise_comp_db)

## Create a new database with additional CPC categories

In [6]:
name_premise_with_CPC_db = name_premise_db+'_with_CPC'
name_premise_comp_with_CPC_db = name_premise_comp_db+'_with_CPC'

In [7]:
create_new_database_with_CPC_categories(db=premise_db, new_db_name=name_premise_with_CPC_db, mapping_product_to_CPC=mapping_product_to_CPC)

In [8]:
create_new_database_with_CPC_categories(db=premise_comp_db, new_db_name=name_premise_comp_with_CPC_db, mapping_product_to_CPC=mapping_product_to_CPC)

Relink the complementary database with the newly created main database

In [9]:
premise_comp_db_with_CPC = load_extract_db(name_premise_comp_with_CPC_db, create_pickle=False)

In [10]:
relink_database(premise_comp_db_with_CPC, name_premise_db, name_premise_with_CPC_db)

## Change the mapping file accordingly

In [11]:
mapping_technologies = pd.read_csv('energyscope_data/mapping_linked.csv')

In [12]:
mapping_technologies['Database'] += '_with_CPC'

In [13]:
mapping_technologies.to_csv('energyscope_data/mapping_linked.csv', index=False)

## Test the new database

In [14]:
premise_db_with_CPC = load_extract_db(name_premise_with_CPC_db)

In [15]:
db = premise_db_with_CPC + premise_comp_db_with_CPC

In [16]:
premise_db_with_CPC_dict = {(a['name'], a['reference product'], a['location']): a for a in db}

In [17]:
# Relink your mapping with the premise database before running the following cells
mapping_linked_to_premise = pd.read_csv('energyscope_data/mapping_QC_linked.csv')

In [18]:
act_wo_CPC_cat = []
df = mapping_linked_to_premise[mapping_linked_to_premise['Type'] == 'Operation']

for i in range(len(df)):
    activity = df.iloc[i]['Activity']
    product = df.iloc[i]['Product']
    location = df.iloc[i]['Location']
    
    act = premise_db_with_CPC_dict[(activity, product, location)]
    try:
        CPC = dict(act['classifications'])['CPC']  
    except KeyError:
        print(f'No CPC category for: {product} - {activity} - {location}')
        act_wo_CPC_cat.append((activity, product, location))
    
    for exc in get_technosphere_flows(act):
        act_exc = premise_db_with_CPC_dict[(exc['name'], exc['product'], exc['location'])]
        try:
            CPC = dict(act_exc['classifications'])['CPC']
        except KeyError:
            print(f'No CPC category for: {act_exc["reference product"]} - {act_exc["name"]} - {act_exc["location"]}')
            act_wo_CPC_cat.append((act_exc['name'], act_exc['reference product'], act_exc['location']))

In [19]:
list(set([a[1] for a in act_wo_CPC_cat]))