In [1]:
from utilities.utils import SSPModelForCalibration, HelperFunctions
from utilities.diff_reports import DiffReportUtils
from sisepuede.manager.sisepuede_examples import SISEPUEDEExamples
import pandas as pd
import warnings
import os
warnings.filterwarnings("ignore")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Initialize helper functions
helper_functions = HelperFunctions()

In [4]:
# Define paths
SCRIPTS_DIR_PATH = os.getcwd()
ROOT_DIR_PATH = os.path.dirname(SCRIPTS_DIR_PATH)
DATA_DIR_PATH = os.path.join(ROOT_DIR_PATH, 'data')
OUTPUT_DIR_PATH = os.path.join(ROOT_DIR_PATH, 'output')
MISC_DIR_PATH = os.path.join(SCRIPTS_DIR_PATH, 'misc')
DUMMY_DIR_PATH = os.path.join(MISC_DIR_PATH, 'dummy')
SECTORAL_REPORT_MAPPING_DIR_PATH = os.path.join(MISC_DIR_PATH, 'sectoral_report_mapping')
SECTORAL_REPORTS_DIR_PATH = os.path.join(MISC_DIR_PATH, 'sectoral_reports')

In [5]:
# Define parameters
iso_alpha_3 = 'UGA'
region = 'uganda'
energy_model_flag = False
run_id = '20250530015729'
use_edgar_db_flag = False
ssp_edgar_cw_file_name = 'sisepuede_edgar_active_crosswalk.csv'
RUN_DIR_PATH = os.path.join(OUTPUT_DIR_PATH, region, run_id)
INPUT_DATA_PATH = os.path.join(DATA_DIR_PATH, "input_ssp_uganda_250522.csv")

## Create an Output file that is used as the baseline to generate the baseline report.

### You don't need to run this if you already have it.

In [6]:
# input_df = pd.read_csv(INPUT_DATA_PATH)
# input_df.head()

In [7]:
# # Load input dataset
# examples = SISEPUEDEExamples()
# cr = examples("input_data_frame")

# # Add missing columns and reformat the input datas
# df_input = input_df.rename(columns={'period': 'time_period'})
# df_input = helper_functions.add_missing_cols(cr, df_input.copy())
# df_input = df_input.drop(columns='iso_code3', errors='ignore')

# # Subset df_input to the input rows amount
# df_input = df_input.iloc[:10]

# df_input

In [8]:
# # Initialize the SSP model
# ssp_model = SSPModelForCalibration(energy_model_flag=False)

In [9]:
# output_df = ssp_model.run_ssp_simulation(input_df)

In [10]:
# output_df.head()

In [11]:
# if energy_model_flag:
#     output_df.to_csv(f'misc/dummy/ssp_{region}_output_dummy_energy.csv', index=False)

# else:
#     output_df.to_csv(f'misc/dummy/ssp_{region}_output_dummy.csv', index=False)

## Here we create the reports and we compare them to check how much improvement we have in each subsector with the calibration

In [12]:
# Load the output dataset
if energy_model_flag:
    output_df_path = os.path.join(DUMMY_DIR_PATH, f'ssp_{region}_output_dummy_energy.csv')
else:
    output_df_path = os.path.join(DUMMY_DIR_PATH, f'ssp_{region}_output_dummy.csv')

output_df = pd.read_csv(output_df_path)
output_df.head()

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,1110674.0,2470537.0,512618.657191,1336131.0,1724155.0,940987.489485,1244761.0,830632.082001,571949.517471,...,9851960.0,3660909.0,717057.782374,1223273.0,305174.162968,448689.809992,1088927.0,59049290.0,2194701.0,1187277.0
1,1,1099568.0,2445834.0,507493.032449,1322771.0,1706915.0,931578.645912,1232315.0,822326.671554,566230.649153,...,9536923.0,3706026.0,667169.430627,1213181.0,302122.75581,455151.199769,1105636.0,58965350.0,2227650.0,1195600.0
2,2,1108967.0,2466741.0,511831.054023,1334078.0,1721506.0,939541.727186,1242849.0,829355.873165,571070.756532,...,9588445.0,3678309.0,724448.535785,1242594.0,304705.28394,461647.924279,1147618.0,59318580.0,2564269.0,1186876.0
3,3,1097881.0,2442080.0,506714.112877,1320741.0,1704295.0,930148.82364,1230424.0,821064.53336,565361.576856,...,9473999.0,3637027.0,692803.554751,1773441.0,301659.046334,454988.571853,1156682.0,58362010.0,2735081.0,1177157.0
4,4,1107827.0,2464205.0,511304.810668,1332707.0,1719736.0,938575.729546,1241571.0,828503.164027,570483.605385,...,9621189.0,3735378.0,429980.347008,1897525.0,304391.998669,456280.825701,1170637.0,59613260.0,2366263.0,1194025.0


In [13]:
# output_df[[col for col in output_df.columns if "subsector" in col and "frst" in col]]

In [14]:
edgar_ssp_cw_path = os.path.join(SECTORAL_REPORT_MAPPING_DIR_PATH, ssp_edgar_cw_file_name)

dru = DiffReportUtils(iso_alpha_3, edgar_ssp_cw_path, SECTORAL_REPORTS_DIR_PATH, energy_model_flag, use_edgar_db_flag=use_edgar_db_flag)

In [15]:
if use_edgar_db_flag:   
    edgar_emission_db_path = os.path.join(SECTORAL_REPORT_MAPPING_DIR_PATH, 'CSC-GHG_emissions-April2024_to_calibrate.csv')
    edgar_emission_df = dru.edgar_emission_db_etl(edgar_emission_db_path)
else: 
    edgar_emission_targets_path = os.path.join(SECTORAL_REPORT_MAPPING_DIR_PATH, 'emission_targets_uganda.csv')
    edgar_emission_df = dru.get_edgar_region_df(edgar_emission_targets_path)
    
edgar_emission_df.head()

Unnamed: 0,iso_alpha_3,subsector,edgar_class,edgar_emission,year
0,UGA,lvst,AG - Livestock:CH4,9.972244,2015
1,UGA,lsmm,AG - Livestock:CH4,9.972244,2015
2,UGA,lsmm,AG - Livestock:N2O,0.299059,2015
3,UGA,agrc,AG - Crops:CO2,0.0,2015
4,UGA,agrc,AG - Crops:CH4,0.576887,2015


In [16]:
report_dict = dru.run_report_generator(edgar_emission_df, output_df)

sectoral_emission_report = report_dict['sectoral_emission_report']
subsector_emission_report = report_dict['subsector_emission_report']
model_failed_flag = report_dict['model_failed_flag']

In [17]:
og_report = sectoral_emission_report.copy()
og_report.head()

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,26.088885,UGA,1.663096,2015,1.663097,14.68693,596.619146,1.663097,0.013778,0.979489
1,scoe,EN - Building,7.794163,UGA,3.489266,2015,3.489267,1.233753,18.532123,3.489267,0.028907,1.501689
2,trns,EN - Transportation,1.835655,UGA,3.903503,2015,3.903504,-0.529742,4.276003,3.903504,0.032339,1.58995
3,agrc,AG - Crops:CH4,1.771977,UGA,0.576887,2015,0.576888,2.071615,1.428239,0.576888,0.004779,0.455452
4,agrc,AG - Crops:CO2,0.864601,UGA,0.0,2015,1e-06,864599.899911,0.747533,1e-06,0.0,0.0


In [18]:
og_report

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,26.088885,UGA,1.663096,2015,1.663097,14.68693,596.6191,1.663097,0.013778,0.979489
1,scoe,EN - Building,7.794163,UGA,3.489266,2015,3.489267,1.233753,18.53212,3.489267,0.028907,1.501689
2,trns,EN - Transportation,1.835655,UGA,3.903503,2015,3.903504,-0.5297419,4.276003,3.903504,0.032339,1.58995
3,agrc,AG - Crops:CH4,1.771977,UGA,0.576887,2015,0.576888,2.071615,1.428239,0.576888,0.004779,0.455452
4,agrc,AG - Crops:CO2,0.864601,UGA,0.0,2015,1e-06,864599.9,0.747533,1e-06,0.0,0.0
5,agrc,AG - Crops:N2O,0.56528,UGA,8.092843,2015,8.092844,-0.9301507,56.66422,8.092844,0.067045,2.207488
15,frst,LULUCF - Forest Land:CH4,0.104575,UGA,0.0,2015,1e-06,104574.4,0.01093581,1e-06,0.0,0.0
16,frst,LULUCF - Forest Land:CO2,-6.028699,UGA,-9.61,2015,-9.609999,-0.3726639,12.82571,9.610001,0.079614,2.361797
17,ippu,IN - Industrial Processes:CH4,0.006068,UGA,0.0,2015,1e-06,6067.313,3.681229e-05,1e-06,0.0,0.0
18,ippu,IN - Industrial Processes:CO2,10.386787,UGA,1.765612,2015,1.765613,4.882823,74.32464,1.765613,0.014627,1.017262


In [19]:
# Sort by subsector and edgar_class
og_report = og_report.sort_values(by=['subsector', 'edgar_class']).reset_index(drop=True)

# Add numeric_id column so we can merge with the opt report
og_report['numeric_id'] = og_report.index

# Filter out the columns we need
og_report = og_report[['numeric_id', 'subsector', 'ssp_emission', 'edgar_class', 'rel_error']]
og_report

Unnamed: 0,numeric_id,subsector,ssp_emission,edgar_class,rel_error
0,0,agrc,1.771977,AG - Crops:CH4,2.071615
1,1,agrc,0.864601,AG - Crops:CO2,864599.9
2,2,agrc,0.56528,AG - Crops:N2O,-0.9301507
3,3,frst,0.104575,LULUCF - Forest Land:CH4,104574.4
4,4,frst,-6.028699,LULUCF - Forest Land:CO2,-0.3726639
5,5,inen,26.088885,EN - Manufacturing/Construction,14.68693
6,6,ippu,0.006068,IN - Industrial Processes:CH4,6067.313
7,7,ippu,10.386787,IN - Industrial Processes:CO2,4.882823
8,8,ippu,0.0,IN - Industrial Processes:HFC,-1.0
9,9,ippu,0.528751,IN - Industrial Processes:N2O,-0.01095647


In [20]:
opt_detailed_report_path = os.path.join(OUTPUT_DIR_PATH, region, run_id, f'best_detailed_diff_report_{run_id}.csv')
# opt_detailed_report_path = os.path.join(DUMMY_DIR_PATH, 'sectoral_emission_report_dummy.csv')
opt_report = pd.read_csv(opt_detailed_report_path)
opt_report.head()

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,13.581495,UGA,1.663096,2022,1.663097,7.166388,142.048214,1.663097,0.013778,0.979489
1,scoe,EN - Building,5.162666,UGA,3.489266,2022,3.489267,0.479585,2.800264,3.489267,0.028907,1.501689
2,trns,EN - Transportation,4.529091,UGA,3.903503,2022,3.903504,0.160263,0.391358,3.903504,0.032339,1.58995
3,agrc,AG - Crops:CH4,1.446536,UGA,0.576887,2022,0.576888,1.507483,0.756288,0.576888,0.004779,0.455452
4,agrc,AG - Crops:CO2,0.927151,UGA,0.0,2022,1e-06,927150.151068,0.859607,1e-06,0.0,0.0


In [21]:
# Sort the report by subsector and edgar_class so we don't mess up with the numeric_id
opt_report = opt_report.sort_values(by=['subsector', 'edgar_class']).reset_index(drop=True)

# Create numeric_id column so we can merge with the og report
opt_report['numeric_id'] = opt_report.index

# Filter out the columns we need
opt_report = opt_report[['numeric_id', 'subsector', 'edgar_class', 'ssp_emission', 'edgar_emission_epsilon', 'norm_weight', 'rel_error']]
opt_report

Unnamed: 0,numeric_id,subsector,edgar_class,ssp_emission,edgar_emission_epsilon,norm_weight,rel_error
0,0,agrc,AG - Crops:CH4,1.446536,0.576888,0.004779,1.507483
1,1,agrc,AG - Crops:CO2,0.9271512,1e-06,0.0,927150.2
2,2,agrc,AG - Crops:N2O,0.9012919,8.092844,0.067045,-0.888631
3,3,frst,LULUCF - Forest Land:CH4,0.1297134,1e-06,0.0,129712.4
4,4,frst,LULUCF - Forest Land:CO2,-9.520294,-9.609999,0.079614,-0.009334522
5,5,inen,EN - Manufacturing/Construction,13.5815,1.663097,0.013778,7.166388
6,6,ippu,IN - Industrial Processes:CH4,0.006280294,1e-06,0.0,6279.294
7,7,ippu,IN - Industrial Processes:CO2,1.050635,1.765613,0.014627,-0.404946
8,8,ippu,IN - Industrial Processes:HFC,0.0,1e-06,0.0,-1.0
9,9,ippu,IN - Industrial Processes:N2O,0.6114137,0.534608,0.004429,0.1436665


In [22]:
merged_df = pd.merge(og_report, opt_report, how='inner', on=['numeric_id', 'subsector', 'edgar_class'])
new_col_names = {
    'rel_error_x': 'rel_error_og',
    'rel_error_y': 'rel_error_opt',
    'edgar_emission_epsilon': 'edgar_emission_value',
    'norm_weight': 'emission_contribution_per',
    'ssp_emission_x': 'ssp_emission_value_og',
    'ssp_emission_y': 'ssp_emission_value_opt'
}

merged_df.rename(columns= new_col_names, inplace=True)

# Add weighted_rel_error column
merged_df['weighted_rel_error_opt'] = merged_df['rel_error_opt'] * merged_df['emission_contribution_per']
merged_df['weighted_rel_error_og'] = merged_df['rel_error_og'] * merged_df['emission_contribution_per']

merged_df.head()

Unnamed: 0,numeric_id,subsector,ssp_emission_value_og,edgar_class,rel_error_og,ssp_emission_value_opt,edgar_emission_value,emission_contribution_per,rel_error_opt,weighted_rel_error_opt,weighted_rel_error_og
0,0,agrc,1.771977,AG - Crops:CH4,2.071615,1.446536,0.576888,0.004779,1.507483,0.007205,0.009901
1,1,agrc,0.864601,AG - Crops:CO2,864599.899911,0.927151,1e-06,0.0,927150.151068,0.0,0.0
2,2,agrc,0.56528,AG - Crops:N2O,-0.930151,0.901292,8.092844,0.067045,-0.888631,-0.059578,-0.062362
3,3,frst,0.104575,LULUCF - Forest Land:CH4,104574.439057,0.129713,1e-06,0.0,129712.353173,0.0,0.0
4,4,frst,-6.028699,LULUCF - Forest Land:CO2,-0.372664,-9.520294,-9.609999,0.079614,-0.009335,-0.000743,-0.029669


In [23]:
# Drop the numeric_id column
merged_df = merged_df.drop(columns='numeric_id')

# Round squared_diff_og and squared_diff_opt to 3 decimal places
# merged_df['squared_diff_og'] = merged_df['squared_diff_og'].round(6)
# merged_df['squared_diff_opt'] = merged_df['squared_diff_opt'].round(6)

# Calculate the relative change between the og and opt squared_diff
# merged_df['relative_change_squared_diff'] = (merged_df['squared_diff_opt'] - merged_df['squared_diff_og']) / merged_df['squared_diff_og']

# Reorder the columns
merged_df = merged_df[['subsector', 
                       'edgar_class',
                       'ssp_emission_value_og',
                       'ssp_emission_value_opt', 
                       'edgar_emission_value', 
                       'emission_contribution_per', 
                       'rel_error_og', 
                       'rel_error_opt', 
                       'weighted_rel_error_og', 
                       'weighted_rel_error_opt', 
                       ]]
merged_df

Unnamed: 0,subsector,edgar_class,ssp_emission_value_og,ssp_emission_value_opt,edgar_emission_value,emission_contribution_per,rel_error_og,rel_error_opt,weighted_rel_error_og,weighted_rel_error_opt
0,agrc,AG - Crops:CH4,1.771977,1.446536,0.576888,0.004779,2.071615,1.507483,0.009901,0.007205
1,agrc,AG - Crops:CO2,0.864601,0.9271512,1e-06,0.0,864599.9,927150.2,0.0,0.0
2,agrc,AG - Crops:N2O,0.56528,0.9012919,8.092844,0.067045,-0.9301507,-0.888631,-0.062362,-0.059578
3,frst,LULUCF - Forest Land:CH4,0.104575,0.1297134,1e-06,0.0,104574.4,129712.4,0.0,0.0
4,frst,LULUCF - Forest Land:CO2,-6.028699,-9.520294,-9.609999,0.079614,-0.3726639,-0.009334522,-0.029669,-0.000743
5,inen,EN - Manufacturing/Construction,26.088885,13.5815,1.663097,0.013778,14.68693,7.166388,0.202355,0.098738
6,ippu,IN - Industrial Processes:CH4,0.006068,0.006280294,1e-06,0.0,6067.313,6279.294,0.0,0.0
7,ippu,IN - Industrial Processes:CO2,10.386787,1.050635,1.765613,0.014627,4.882823,-0.404946,0.071422,-0.005923
8,ippu,IN - Industrial Processes:HFC,0.0,0.0,1e-06,0.0,-1.0,-1.0,-0.0,-0.0
9,ippu,IN - Industrial Processes:N2O,0.528751,0.6114137,0.534608,0.004429,-0.01095647,0.1436665,-4.9e-05,0.000636


In [24]:
# Calculate the sum of the absolute value of the weighted_rel_error_opt and weighted_rel_error_og
total_w_rel_error_opt = merged_df['weighted_rel_error_opt'].abs().sum()
total_w_rel_error_og = merged_df['weighted_rel_error_og'].abs().sum()

# Create a dataframe to store the total weighted relative error
total_w_rel_error_df = pd.DataFrame({
    'total_w_rel_error_og': [total_w_rel_error_og],
    'total_w_rel_error_opt': [total_w_rel_error_opt]
    
})

total_w_rel_error_df

Unnamed: 0,total_w_rel_error_og,total_w_rel_error_opt
0,1.42717,0.740517


In [25]:
opt_evaluation_path = os.path.join(OUTPUT_DIR_PATH, region, run_id, f'opt_evaluation_{run_id}.xlsx')

with pd.ExcelWriter(opt_evaluation_path) as writer:
    merged_df.to_excel(writer, sheet_name='evaluation_report', index=False)
    total_w_rel_error_df.to_excel(writer, sheet_name='accumulated_error', index=False)