In [1]:
from utilities.utils import SSPModelForCalibration, HelperFunctions
from utilities.diff_reports import DiffReportUtils
from sisepuede.manager.sisepuede_examples import SISEPUEDEExamples
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Initialize helper functions
helper_functions = HelperFunctions()

In [4]:
input_df = pd.read_csv('../data/real_data.csv')
input_df.head()

Unnamed: 0,region,iso_code3,period,area_gnrl_country_ha,avgload_trns_freight_tonne_per_vehicle_aviation,avgload_trns_freight_tonne_per_vehicle_rail_freight,avgload_trns_freight_tonne_per_vehicle_road_heavy_freight,avgload_trns_freight_tonne_per_vehicle_water_borne,avgmass_lvst_animal_buffalo_kg,avgmass_lvst_animal_cattle_dairy_kg,...,yf_agrc_fruits_tonne_ha,yf_agrc_herbs_and_other_perennial_crops_tonne_ha,yf_agrc_nuts_tonne_ha,yf_agrc_other_annual_tonne_ha,yf_agrc_other_woody_perennial_tonne_ha,yf_agrc_pulses_tonne_ha,yf_agrc_rice_tonne_ha,yf_agrc_sugar_cane_tonne_ha,yf_agrc_tubers_tonne_ha,yf_agrc_vegetables_and_vines_tonne_ha
0,croatia,HRV,0,8807000,70,2923,31.751466,6468,315,508,...,5.546667,28.8742,0.602367,1.930675,0.5205,2.638183,0,0,35.7648,21.067738
1,croatia,HRV,1,8807000,70,2923,31.751466,6468,315,508,...,5.555383,29.6558,0.4799,2.30405,0.7342,3.256933,0,0,47.5766,20.412554
2,croatia,HRV,2,8807000,70,2923,31.751466,6468,315,508,...,4.304906,30.039,0.3604,2.07595,0.8615,3.199083,0,0,41.0978,22.329531
3,croatia,HRV,3,8807000,70,2923,31.751466,6468,315,508,...,6.272229,30.039,0.1603,2.662133,0.9852,3.26668,0,0,37.42445,21.5058
4,croatia,HRV,4,8807000,70,2923,31.751466,6468,315,508,...,5.878853,30.039,0.196325,2.543567,0.9045,4.06804,0,0,39.8149,21.874247


In [5]:
# Load input dataset
examples = SISEPUEDEExamples()
cr = examples("input_data_frame")

# Add missing columns and reformat the input datas
df_input = input_df.rename(columns={'period': 'time_period'})
df_input = helper_functions.add_missing_cols(cr, df_input.copy())
df_input = df_input.drop(columns='iso_code3')

# Subset df_input to the input rows amount
df_input = df_input.iloc[:10]

df_input

Unnamed: 0,region,time_period,area_gnrl_country_ha,avgload_trns_freight_tonne_per_vehicle_aviation,avgload_trns_freight_tonne_per_vehicle_rail_freight,avgload_trns_freight_tonne_per_vehicle_road_heavy_freight,avgload_trns_freight_tonne_per_vehicle_water_borne,avgmass_lvst_animal_buffalo_kg,avgmass_lvst_animal_cattle_dairy_kg,avgmass_lvst_animal_cattle_nondairy_kg,...,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_electrolysis_water,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_electricity,energydensity_gravimetric_enfu_gj_per_tonne_fuel_ammonia,energydensity_gravimetric_enfu_gj_per_tonne_fuel_water,frac_trns_fuelmix_water_borne_ammonia,nemomod_entc_output_activity_ratio_fuel_production_fp_ammonia_production_ammonia,nemomod_entc_output_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_hydrogen,nemomod_entc_frac_min_share_production_fp_hydrogen_reformation_ccs,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_natural_gas,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_oil
0,croatia,0,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
1,croatia,1,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
2,croatia,2,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
3,croatia,3,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
4,croatia,4,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
5,croatia,5,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
6,croatia,6,8807000,70,4082,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
7,croatia,7,8807000,70,4082,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
8,croatia,8,8807000,70,4082,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
9,croatia,9,8807000,70,4082,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0


In [6]:
# Initialize the SSP model
ssp_model = SSPModelForCalibration(energy_model_flag=False)

2025-02-20 17:00:35,035 - INFO - Successfully initialized SISEPUEDEFileStructure.
2025-02-20 17:00:35,042 - INFO - 	Setting export engine to 'csv'.
2025-02-20 17:00:35,045 - INFO - Successfully instantiated table ANALYSIS_METADATA
2025-02-20 17:00:35,049 - INFO - Successfully instantiated table ATTRIBUTE_DESIGN
2025-02-20 17:00:35,053 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_EXOGENOUS_UNCERTAINTIES
2025-02-20 17:00:35,058 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_LEVER_EFFECTS
2025-02-20 17:00:35,061 - INFO - Successfully instantiated table ATTRIBUTE_PRIMARY
2025-02-20 17:00:35,064 - INFO - Successfully instantiated table ATTRIBUTE_STRATEGY
2025-02-20 17:00:35,067 - INFO - Successfully instantiated table MODEL_BASE_INPUT_DATABASE
2025-02-20 17:00:35,070 - INFO - Successfully instantiated table MODEL_INPUT
2025-02-20 17:00:35,074 - INFO - Successfully instantiated table MODEL_OUTPUT
2025-02-20 17:00:35,075 - INFO - SISEPUEDEOutputDatabase succe

In [7]:
output_df = ssp_model.run_ssp_simulation(df_input)

2025-02-20 17:01:19,951 - INFO - Running AFOLU model
2025-02-20 17:01:20,100 - INFO - AFOLU model run successfully completed
2025-02-20 17:01:20,101 - INFO - Running CircularEconomy model
2025-02-20 17:01:20,157 - INFO - CircularEconomy model run successfully completed
2025-02-20 17:01:20,158 - INFO - Running IPPU model
2025-02-20 17:01:20,251 - INFO - IPPU model run successfully completed
2025-02-20 17:01:20,252 - INFO - Running Energy model (EnergyConsumption without Fugitive Emissions)
2025-02-20 17:01:20,272 - DEBUG - Missing elasticity information found in 'project_energy_consumption_by_fuel_from_effvars': using specified future demands.
2025-02-20 17:01:20,375 - INFO - EnergyConsumption without Fugitive Emissions model run successfully completed
2025-02-20 17:01:20,376 - INFO - Running Energy (Fugitive Emissions)
2025-02-20 17:01:20,401 - ERROR - Error running Fugitive Emissions from Energy model: 'NoneType' object has no attribute 'to_numpy'
2025-02-20 17:01:20,402 - INFO - Appe

In [8]:
output_df

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,128992.475401,286925.570284,59534.988647,155176.845408,200241.524314,109285.29166,144565.284932,96468.731604,66425.612333,...,860714.235864,5781814.0,65829.816853,279108.581486,50211.9748,175242.943363,0.0,0.0,2050396.0,725849.288265
1,1,128305.935425,285398.458926,59218.124042,154350.943684,199175.773688,108703.639735,143795.86139,95955.293587,66072.073584,...,857478.660027,5906717.0,52166.876709,331312.854435,70450.376552,215192.338859,2736.0,2449.0,2713049.0,699533.070417
2,2,127621.990578,283877.120067,58902.45719,153528.163879,198114.051612,108124.186462,143029.346278,95443.796372,65719.871216,...,660924.245615,5951148.0,38967.956801,296921.771405,82224.830575,210243.344677,2736.0,2449.0,2331104.0,761148.281366
3,3,126940.639041,282361.549661,58587.98725,152708.503803,197056.355263,107546.930298,142265.737557,94934.238599,65369.004292,...,957822.768981,5919376.0,17239.772927,378730.362142,93529.211867,213539.618942,2736.0,2449.0,2111415.0,729155.981592
4,4,126261.879236,280851.744198,58274.713494,151891.96156,196002.68219,106971.869908,141505.03346,94426.619087,65019.472,...,892950.504956,5887725.0,21001.25236,359927.486275,85408.876964,264501.812874,2736.0,2449.0,2234268.0,737682.556359
5,5,125585.709808,279347.700663,57962.635296,151078.535517,194953.03029,106399.004143,140747.232467,93920.936822,64671.273641,...,888168.492874,5856194.0,20888.784488,357999.968928,84951.487356,263085.328021,2736.0,2449.0,2222303.0,733732.049721
6,6,124912.129604,277849.416491,57651.752125,150268.224289,193907.397772,105828.332026,139992.333285,93417.190943,64324.408621,...,883404.792326,5824784.0,20776.747285,356079.832533,84495.849208,261674.267246,2736.0,2449.0,2210384.0,729796.670572
7,7,124241.137657,276356.889531,57342.063534,149461.026711,192865.783136,105259.852737,139240.334831,92915.380726,63978.876443,...,878659.396471,5793495.0,20665.140589,354167.074332,84041.961867,260268.628525,2736.0,2449.0,2198510.0,725876.413261
8,8,123572.733166,274870.118005,57033.569154,148656.941822,191828.18514,104693.565599,138491.236209,92415.505573,63634.676695,...,873932.299657,5762327.0,20553.964266,352261.692047,83589.824791,258868.410181,2736.0,2449.0,2186683.0,721971.273119
9,9,122906.915486,273389.100472,56726.268686,147855.968843,190794.602779,104129.470065,137745.036693,91917.565,63291.809043,...,869223.497306,5731279.0,20443.21821,350363.683832,83139.437543,257473.61086,2736.0,2449.0,2174901.0,718081.246363


In [9]:
# output_df.to_csv('misc/dummy/ssp_output_dummy.csv', index=False)

In [10]:
dru = DiffReportUtils('HRV', 'misc/sectoral_report_mapping/edgar_ssp_cw.csv', 'misc/sectoral_reports', energy_model_flag=False)

In [11]:
edgar_emission_df = dru.edgar_emission_db_etl('misc/sectoral_report_mapping/CSC-GHG_emissions-April2024_to_calibrate.csv')
edgar_emission_df.head()

Unnamed: 0,iso_alpha_3,edgar_class,edgar_emission,year
0,HRV,AG - Livestock:CH4,1.682916,2015
1,HRV,AG - Livestock:N2O,0.110193,2015
2,HRV,AG - Crops:CH4,0.000227,2015
3,HRV,AG - Crops:CO2,0.095857,2015
4,HRV,AG - Crops:N2O,1.038055,2015


In [12]:
dru.run_report_generator(edgar_emission_df, output_df)

In [13]:
og_report = dru.sectoral_emission_report.copy()
og_report

Unnamed: 0,subsector,gas,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
0,lvst,ch4,AG - Livestock:CH4,1.938988,HRV,0.841458,2015,0.841459,0.841459,0.025337,0.610558,1.304317,1.204570
1,lsmm,ch4,AG - Livestock:CH4,0.040259,HRV,0.841458,2015,0.841459,0.841459,0.025337,0.610558,-0.952156,0.641921
2,lsmm,n2o,AG - Livestock:N2O,0.165996,HRV,0.110193,2015,0.110194,0.110194,0.003318,0.104534,0.506401,0.003114
3,agrc,co2,AG - Crops:CO2,1.316953,HRV,0.095857,2015,0.095858,0.095858,0.002886,0.091537,12.738568,1.491074
4,agrc,ch4,AG - Crops:CH4,0.214505,HRV,0.000227,2015,0.000228,0.000228,0.000007,0.000227,941.296070,0.045915
...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,soil,co2,LULUCF - Organic Soil:CO2,0.110122,HRV,0.092259,2015,0.092260,0.092260,0.002778,0.088248,0.193601,0.000319
64,soil,n2o,LULUCF - Organic Soil:N2O,1.031985,,,2015,,,,,,
65,ccsq,ch4,CCSQ:CH4,0.000000,,,2015,,,,,,
66,ccsq,co2,CCSQ:CO2,0.000000,,,2015,,,,,,


In [None]:
og_report = og_report.reset_index(drop=True).reset_index().rename(columns={'index': 'numeric_id'})
og_report = og_report[['numeric_id', 'subsector', 'edgar_class', 'diff', 'squared_diff']]

og_report

Unnamed: 0,numeric_id,subsector,edgar_class,diff,squared_diff
0,0,lvst,AG - Livestock:CH4,1.304317,1.20457
1,1,lsmm,AG - Livestock:CH4,-0.952156,0.641921
2,2,lsmm,AG - Livestock:N2O,0.506401,0.003113876
3,3,agrc,AG - Crops:CO2,12.738568,1.491074
4,4,agrc,AG - Crops:CH4,941.29607,0.04591488
5,5,agrc,AG - Crops:N2O,-0.761957,0.6256084
6,6,scoe,EN - Building:CO2,1.748488,22.73282
7,7,scoe,EN - Building:CH4,-0.954123,0.1580794
8,8,scoe,EN - Building:N2O,-0.676113,0.004955972
9,9,entc,EN - Electricity/Heat:CO2,,


In [15]:
report_to_compare = pd.read_csv('../output/pso/20250219230520/best_detailed_diff_report_20250219230520.csv')
report_to_compare.head()

Unnamed: 0,subsector,gas,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
0,lvst,ch4,AG - Livestock:CH4,0.969494,HRV,0.841458,2015,0.841459,0.841459,0.025337,0.610558,0.152158,0.016393
1,lsmm,ch4,AG - Livestock:CH4,0.048434,HRV,0.841458,2015,0.841459,0.841459,0.025337,0.610558,-0.942441,0.628889
2,lsmm,n2o,AG - Livestock:N2O,0.197186,HRV,0.110193,2015,0.110194,0.110194,0.003318,0.104534,0.789449,0.007568
3,agrc,co2,AG - Crops:CO2,0.954669,HRV,0.095857,2015,0.095858,0.095858,0.002886,0.091537,8.959186,0.737556
4,agrc,ch4,AG - Crops:CH4,0.200463,HRV,0.000227,2015,0.000228,0.000228,7e-06,0.000227,879.608085,0.040094


In [None]:
# Set an id from 0 to total rows
report_to_compare = report_to_compare.reset_index(drop=True).reset_index().rename(columns={'index': 'numeric_id'})
report_to_compare = report_to_compare[['numeric_id', 'subsector', 'edgar_class', 'diff', 'squared_diff']]
report_to_compare

Unnamed: 0,numeric_id,subsector,edgar_class,diff,squared_diff
0,0,lvst,AG - Livestock:CH4,0.152158,0.01639299
1,1,lsmm,AG - Livestock:CH4,-0.942441,0.6288887
2,2,lsmm,AG - Livestock:N2O,0.789449,0.007567654
3,3,agrc,AG - Crops:CO2,8.959186,0.737556
4,4,agrc,AG - Crops:CH4,879.608085,0.040094
5,5,agrc,AG - Crops:N2O,-0.636467,0.436509
6,6,scoe,EN - Building:CO2,0.446752,1.484091
7,7,scoe,EN - Building:CH4,-0.978267,0.1661809
8,8,scoe,EN - Building:N2O,-0.846841,0.007774873
9,9,entc,EN - Electricity/Heat:CO2,,


In [None]:
merged_df = pd.merge(og_report, report_to_compare, how='inner', on=['numeric_id', 'subsector', 'edgar_class'])
merged_df.rename(columns={'diff_x': 'diff_og', 'squared_diff_x': 'squared_diff_og', 'diff_y': 'diff_opt', 'squared_diff_y': 'squared_diff_opt'}, inplace=True)
merged_df['squared_diff_change'] = (merged_df['squared_diff_og'] - merged_df['squared_diff_opt']).round(3)
merged_df.sort_values(by='squared_diff_change', ascending=False, inplace=True)
merged_df

Unnamed: 0,numeric_id,subsector,edgar_class,diff_og,squared_diff_og,diff_opt,squared_diff_opt,squared_diff_change
21,21,ippu,IN - Industrial Processes:CO2,2.047009,24.4565,0.512473,1.532841,22.924
6,6,scoe,EN - Building:CO2,1.748488,22.73282,0.446752,1.484091,21.249
39,39,frst,LULUCF - Forest Land:CO2,-0.809532,28.09094,-0.700021,21.00492,7.086
18,18,trns,EN - Transportation:CO2,-0.362987,4.563752,0.00103,3.673823e-05,4.564
15,15,inen,EN - Manufacturing/Construction:CO2,0.881612,3.587734,0.163448,0.1233167,3.464
23,23,ippu,IN - Industrial Processes:N2O,1.698437,2.582262,0.349219,0.1091682,2.473
0,0,lvst,AG - Livestock:CH4,1.304317,1.20457,0.152158,0.01639299,1.188
3,3,agrc,AG - Crops:CO2,12.738568,1.491074,8.959186,0.737556,0.754
5,5,agrc,AG - Crops:N2O,-0.761957,0.6256084,-0.636467,0.436509,0.189
38,38,trww,Waste - Wastewater Treatment:N2O,3.400672,0.0436381,1.403592,0.007433923,0.036


In [18]:
merged_df.to_csv('misc/sectoral_reports/opt_evaluation.csv', index=False)

In [None]:
# from scipy.stats import ttest_rel

# # Drop rows with NaN in either column
# paired_data = merged_df.dropna(subset=['diff_og', 'diff_opt'])
# t_stat, p_value = ttest_rel(paired_data['diff_og'], paired_data['diff_opt'])
# print("t-statistic:", t_stat, "p-value:", p_value)

t-statistic: -0.43552443058483925 p-value: 0.665782076014129
