In [1]:
from utilities.utils import SSPModelForCalibration, HelperFunctions
from utilities.diff_reports import DiffReportUtils
from sisepuede.manager.sisepuede_examples import SISEPUEDEExamples
import pandas as pd
import warnings
import os
warnings.filterwarnings("ignore")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Initialize helper functions
helper_functions = HelperFunctions()

In [4]:
# Define paths
SCRIPTS_DIR_PATH = os.getcwd()
ROOT_DIR_PATH = os.path.dirname(SCRIPTS_DIR_PATH)
OUTPUT_DIR_PATH = os.path.join(ROOT_DIR_PATH, 'output/pso')
MISC_DIR_PATH = os.path.join(SCRIPTS_DIR_PATH, 'misc')
DUMMY_DIR_PATH = os.path.join(MISC_DIR_PATH, 'dummy')
SECTORAL_REPORT_MAPPING_DIR_PATH = os.path.join(MISC_DIR_PATH, 'sectoral_report_mapping')
SECTORAL_REPORTS_DIR_PATH = os.path.join(MISC_DIR_PATH, 'sectoral_reports')

In [5]:
# Define parameters
iso_alpha_3 = 'HRV'
energy_model_flag = False
run_id = '20250318181548'
ssp_edgar_cw_file_name = 'sisepuede_edgar_active_crosswalk.csv'


In [6]:
RUN_DIR_PATH = os.path.join(OUTPUT_DIR_PATH, run_id)

In [7]:
# input_df = pd.read_csv(os.path.join(RUN_DIR_PATH, f'best_input_df_{run_id}.csv'))
# input_df.head()

In [8]:
# # Load input dataset
# examples = SISEPUEDEExamples()
# cr = examples("input_data_frame")

# # Add missing columns and reformat the input datas
# df_input = input_df.rename(columns={'period': 'time_period'})
# df_input = helper_functions.add_missing_cols(cr, df_input.copy())
# df_input = df_input.drop(columns='iso_code3')

# # Subset df_input to the input rows amount
# df_input = df_input.iloc[:10]

# df_input

In [9]:
# # Initialize the SSP model
# ssp_model = SSPModelForCalibration(energy_model_flag=False)

In [10]:
# output_df = ssp_model.run_ssp_simulation(input_df)

In [11]:
# output_df

In [12]:
# output_df.to_csv('misc/dummy/ssp_output_dummy.csv', index=False)

In [13]:
# Load the output dataset
if energy_model_flag:
    output_df_path = os.path.join(DUMMY_DIR_PATH, 'ssp_output_w_energy_dummy.csv')
else:
    output_df_path = os.path.join(DUMMY_DIR_PATH, 'ssp_output_no_energy_dummy.csv')

output_df = pd.read_csv(output_df_path)
output_df.head()

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,128992.475401,286925.570284,59534.988647,155176.845408,200241.524314,109285.29166,144565.284932,96468.731604,66425.612333,...,860714.235864,5781814.0,65829.816853,279108.581486,50211.9748,175242.943363,0.0,0.0,2050396.0,725849.288265
1,1,128305.935425,285398.458926,59218.124042,154350.943684,199175.773688,108703.639735,143795.86139,95955.293587,66072.073584,...,857478.660027,5906717.0,52166.876709,331312.854435,70450.376552,215192.338859,2736.0,2449.0,2713049.0,699533.070417
2,2,127621.990578,283877.120067,58902.45719,153528.163879,198114.051612,108124.186462,143029.346278,95443.796372,65719.871216,...,660924.245615,5951148.0,38967.956801,296921.771405,82224.830575,210243.344677,2736.0,2449.0,2331104.0,761148.281366
3,3,126940.639041,282361.549661,58587.98725,152708.503803,197056.355263,107546.930298,142265.737557,94934.238599,65369.004292,...,957822.768981,5919376.0,17239.772927,378730.362142,93529.211867,213539.618942,2736.0,2449.0,2111415.0,729155.981592
4,4,126261.879236,280851.744198,58274.713494,151891.96156,196002.68219,106971.869908,141505.03346,94426.619087,65019.472,...,892950.504956,5887725.0,21001.25236,359927.486275,85408.876964,264501.812874,2736.0,2449.0,2234268.0,737682.556359


In [14]:
edgar_ssp_cw_path = os.path.join(SECTORAL_REPORT_MAPPING_DIR_PATH, ssp_edgar_cw_file_name)

dru = DiffReportUtils(iso_alpha_3, edgar_ssp_cw_path, SECTORAL_REPORTS_DIR_PATH, energy_model_flag)

In [15]:
edgar_emission_db_path = os.path.join(SECTORAL_REPORT_MAPPING_DIR_PATH, 'CSC-GHG_emissions-April2024_to_calibrate.csv')
edgar_emission_df = dru.edgar_emission_db_etl(edgar_emission_db_path)
edgar_emission_df.head()

Unnamed: 0,iso_alpha_3,edgar_class,edgar_emission,year
0,HRV,AG - Livestock:CH4,1.682916,2015
1,HRV,AG - Livestock:N2O,0.110193,2015
2,HRV,AG - Crops:CH4,0.000227,2015
3,HRV,AG - Crops:CO2,0.095857,2015
4,HRV,AG - Crops:N2O,1.038055,2015


In [16]:
report_dict = dru.run_report_generator(edgar_emission_df, output_df)

sectoral_emission_report = report_dict['sectoral_emission_report']
subsector_emission_report = report_dict['subsector_emission_report']
model_failed_flag = report_dict['model_failed_flag']

In [17]:
og_report = sectoral_emission_report.copy()
og_report.head()

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,4.069753,HRV,2.15734,2015,2.157341,0.886467,3.657318,2.157341,0.078574,1.14973
1,scoe,EN - Building,7.547604,HRV,3.247697,2015,3.247698,1.323986,18.489194,3.247698,0.118287,1.446377
2,trns,EN - Transportation,3.826507,HRV,5.958179,2015,5.95818,-0.357772,4.544028,5.95818,0.217008,1.939918
3,agrc,AG - Crops:CH4,0.214505,HRV,0.000227,2015,0.000228,941.29607,0.045915,0.000228,8e-06,0.000227
4,agrc,AG - Crops:CO2,0.100414,HRV,0.095857,2015,0.095858,0.047525,2.1e-05,0.095858,0.003491,0.091537


In [18]:
og_report

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,4.069753,HRV,2.15734,2015,2.157341,0.886467,3.657318,2.157341,0.078574,1.14973
1,scoe,EN - Building,7.547604,HRV,3.247697,2015,3.247698,1.323986,18.48919,3.247698,0.118287,1.446377
2,trns,EN - Transportation,3.826507,HRV,5.958179,2015,5.95818,-0.357772,4.544028,5.95818,0.217008,1.939918
3,agrc,AG - Crops:CH4,0.214505,HRV,0.000227,2015,0.000228,941.29607,0.04591488,0.000228,8e-06,0.000227
4,agrc,AG - Crops:CO2,0.100414,HRV,0.095857,2015,0.095858,0.047525,2.075444e-05,0.095858,0.003491,0.091537
5,agrc,AG - Crops:N2O,0.247102,HRV,1.038055,2015,1.038056,-0.761957,0.6256084,1.038056,0.037808,0.711996
16,frst,LULUCF - Forest Land:CO2,-1.100202,HRV,-6.547104,2015,-6.547103,-0.831956,29.66873,6.547105,0.238458,2.021164
17,ippu,IN - Industrial Processes:CH4,0.034023,HRV,0.001673,2015,0.001674,19.323149,0.001046427,0.001674,6.1e-05,0.001672
18,ippu,IN - Industrial Processes:CO2,7.361243,HRV,2.415891,2015,2.415892,2.047009,24.4565,2.415892,0.087991,1.228438
19,ippu,IN - Industrial Processes:HFC,0.609568,HRV,0.514149,2015,0.51415,0.185584,0.009104622,0.51415,0.018726,0.414854


In [19]:
# Sort by subsector and edgar_class
og_report = og_report.sort_values(by=['subsector', 'edgar_class']).reset_index(drop=True)

# Add numeric_id column so we can merge with the opt report
og_report['numeric_id'] = og_report.index

# Filter out the columns we need
og_report = og_report[['numeric_id', 'subsector', 'edgar_class', 'rel_error', 'squared_diff']]
og_report

Unnamed: 0,numeric_id,subsector,edgar_class,rel_error,squared_diff
0,0,agrc,AG - Crops:CH4,941.29607,0.04591488
1,1,agrc,AG - Crops:CO2,0.047525,2.075444e-05
2,2,agrc,AG - Crops:N2O,-0.761957,0.6256084
3,3,frst,LULUCF - Forest Land:CO2,-0.831956,29.66873
4,4,inen,EN - Manufacturing/Construction,0.886467,3.657318
5,5,ippu,IN - Industrial Processes:CH4,19.323149,0.001046427
6,6,ippu,IN - Industrial Processes:CO2,2.047009,24.4565
7,7,ippu,IN - Industrial Processes:HFC,0.185584,0.009104622
8,8,ippu,IN - Industrial Processes:N2O,1.698437,2.582262
9,9,ippu,IN - Industrial Processes:PFC,7.248265,5.253735e-11


In [20]:
opt_detailed_report_path = os.path.join(OUTPUT_DIR_PATH, run_id, f'best_detailed_diff_report_{run_id}.csv')
# opt_detailed_report_path = os.path.join(DUMMY_DIR_PATH, 'sectoral_emission_report_dummy.csv')
opt_report = pd.read_csv(opt_detailed_report_path)
opt_report.head()

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,agrc,AG - Crops:CH4,0.128703,HRV,0.000227,2015,0.000228,564.377642,0.01650595,0.000228,8e-06,0.000227
1,agrc,AG - Crops:CO2,0.095757,HRV,0.095857,2015,0.095858,-0.00105,1.013865e-08,0.095858,0.003491,0.091537
2,agrc,AG - Crops:N2O,0.465428,HRV,1.038055,2015,1.038056,-0.551635,0.3279026,1.038056,0.037808,0.711996


In [21]:
opt_report

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,agrc,AG - Crops:CH4,0.128703,HRV,0.000227,2015,0.000228,564.377642,0.01650595,0.000228,8e-06,0.000227
1,agrc,AG - Crops:CO2,0.095757,HRV,0.095857,2015,0.095858,-0.00105,1.013865e-08,0.095858,0.003491,0.091537
2,agrc,AG - Crops:N2O,0.465428,HRV,1.038055,2015,1.038056,-0.551635,0.3279026,1.038056,0.037808,0.711996


In [22]:
# Sort the report by subsector and edgar_class so we don't mess up with the numeric_id
opt_report = opt_report.sort_values(by=['subsector', 'edgar_class']).reset_index(drop=True)

# Create numeric_id column so we can merge with the og report
opt_report['numeric_id'] = opt_report.index

# Filter out the columns we need
opt_report = opt_report[['numeric_id', 'subsector', 'edgar_class', 'edgar_emission_epsilon', 'norm_weight', 'rel_error', 'squared_diff']]
opt_report

Unnamed: 0,numeric_id,subsector,edgar_class,edgar_emission_epsilon,norm_weight,rel_error,squared_diff
0,0,agrc,AG - Crops:CH4,0.000228,8e-06,564.377642,0.01650595
1,1,agrc,AG - Crops:CO2,0.095858,0.003491,-0.00105,1.013865e-08
2,2,agrc,AG - Crops:N2O,1.038056,0.037808,-0.551635,0.3279026


In [23]:
merged_df = pd.merge(og_report, opt_report, how='inner', on=['numeric_id', 'subsector', 'edgar_class'])
new_col_names = {
    'rel_error_x': 'rel_error_og',
    'squared_diff_x': 'squared_diff_og',
    'rel_error_y': 'rel_error_opt',
    'squared_diff_y': 'squared_diff_opt',
    'edgar_emission_epsilon': 'edgar_emission_value',
    'norm_weight': 'emission_contribution_per'
}

merged_df.rename(columns= new_col_names, inplace=True)

# Add weighted_rel_error column
merged_df['weighted_rel_error_opt'] = merged_df['rel_error_opt'] * merged_df['emission_contribution_per']
merged_df['weighted_rel_error_og'] = merged_df['rel_error_og'] * merged_df['emission_contribution_per']


In [24]:
# Drop the numeric_id column
merged_df = merged_df.drop(columns='numeric_id')

# Round squared_diff_og and squared_diff_opt to 3 decimal places
# merged_df['squared_diff_og'] = merged_df['squared_diff_og'].round(6)
# merged_df['squared_diff_opt'] = merged_df['squared_diff_opt'].round(6)

# Calculate the relative change between the og and opt squared_diff
merged_df['relative_change_squared_diff'] = (merged_df['squared_diff_opt'] - merged_df['squared_diff_og']) / merged_df['squared_diff_og']

# Reorder the columns
merged_df = merged_df[['subsector', 
                       'edgar_class', 
                       'edgar_emission_value', 
                       'emission_contribution_per', 
                       'rel_error_og', 
                       'rel_error_opt', 
                       'weighted_rel_error_og', 
                       'weighted_rel_error_opt', 
                       'squared_diff_og', 
                       'squared_diff_opt', 
                       'relative_change_squared_diff']]
merged_df

Unnamed: 0,subsector,edgar_class,edgar_emission_value,emission_contribution_per,rel_error_og,rel_error_opt,weighted_rel_error_og,weighted_rel_error_opt,squared_diff_og,squared_diff_opt,relative_change_squared_diff
0,agrc,AG - Crops:CH4,0.000228,8e-06,941.29607,564.377642,0.00777,0.004659,0.045915,0.01650595,-0.64051
1,agrc,AG - Crops:CO2,0.095858,0.003491,0.047525,-0.00105,0.000166,-4e-06,2.1e-05,1.013865e-08,-0.999511
2,agrc,AG - Crops:N2O,1.038056,0.037808,-0.761957,-0.551635,-0.028808,-0.020856,0.625608,0.3279026,-0.475866


In [25]:
# Calculate the sum of the absolute value of the weighted_rel_error_opt and weighted_rel_error_og
total_w_rel_error_opt = merged_df['weighted_rel_error_opt'].abs().sum()
total_w_rel_error_og = merged_df['weighted_rel_error_og'].abs().sum()

# Create a dataframe to store the total weighted relative error
total_w_rel_error_df = pd.DataFrame({
    'total_w_rel_error_og': [total_w_rel_error_og],
    'total_w_rel_error_opt': [total_w_rel_error_opt]
    
})

total_w_rel_error_df

Unnamed: 0,total_w_rel_error_og,total_w_rel_error_opt
0,0.036744,0.025519


In [26]:
opt_evaluation_path = os.path.join(OUTPUT_DIR_PATH, run_id, f'opt_evaluation_{run_id}.xlsx')

with pd.ExcelWriter(opt_evaluation_path) as writer:
    merged_df.to_excel(writer, sheet_name='evaluation_report', index=False)
    total_w_rel_error_df.to_excel(writer, sheet_name='accumulated_error', index=False)