In [48]:
import pandas as pd
import numpy as np
import os
from utilities.utils import ErrorFunctions, SSPModelForCalibration
from utilities.diff_reports import DiffReportUtils

In [49]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
# Define paths
curr_dir = os.getcwd()
misc_dir = os.path.join(curr_dir, 'misc')
sectoral_report_dir = os.path.join(misc_dir, 'sectoral_reports')
dummy_files_dir = os.path.join(misc_dir, 'dummy')
sectoral_report_mapping_dir = os.path.join(misc_dir, 'sectoral_report_mapping')


## Test and Sketch the new DiffReport Methods

In [51]:
dru = DiffReportUtils('HRV', os.path.join(sectoral_report_mapping_dir, 'edgar_ssp_cw.csv'), sectoral_report_dir, energy_model_flag=False)

In [52]:
edgar_emissions = dru.edgar_emission_db_etl(os.path.join(sectoral_report_mapping_dir, 'CSC-GHG_emissions-April2024_to_calibrate.csv'))
edgar_emissions

Unnamed: 0,iso_alpha_3,edgar_class,edgar_emission,year
0,HRV,AG - Livestock:CH4,1.682916,2015
1,HRV,AG - Livestock:N2O,0.110193,2015
2,HRV,AG - Crops:CH4,0.000227,2015
3,HRV,AG - Crops:CO2,0.095857,2015
4,HRV,AG - Crops:N2O,1.038055,2015
5,HRV,EN - Building:CH4,0.416708,2015
6,HRV,EN - Building:CO2,2.726867,2015
7,HRV,EN - Building:N2O,0.104122,2015
8,HRV,EN - Electricity/Heat:CH4,0.004396,2015
9,HRV,EN - Electricity/Heat:CO2,3.297328,2015


In [53]:
ssp_out = pd.read_csv(os.path.join(dummy_files_dir, 'ssp_output_no_energy_dummy.csv'))

In [54]:
ssp_out.head()

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,128992.475401,286925.570284,59534.988647,155176.845408,200241.524314,109285.29166,144565.284932,96468.731604,66425.612333,...,860714.235864,5781814.0,65829.816853,279108.581486,50211.9748,175242.943363,0.0,0.0,2050396.0,725849.288265
1,1,128305.935425,285398.458926,59218.124042,154350.943684,199175.773688,108703.639735,143795.86139,95955.293587,66072.073584,...,857478.660027,5906717.0,52166.876709,331312.854435,70450.376552,215192.338859,2736.0,2449.0,2713049.0,699533.070417
2,2,127621.990578,283877.120067,58902.45719,153528.163879,198114.051612,108124.186462,143029.346278,95443.796372,65719.871216,...,660924.245615,5951148.0,38967.956801,296921.771405,82224.830575,210243.344677,2736.0,2449.0,2331104.0,761148.281366
3,3,126940.639041,282361.549661,58587.98725,152708.503803,197056.355263,107546.930298,142265.737557,94934.238599,65369.004292,...,957822.768981,5919376.0,17239.772927,378730.362142,93529.211867,213539.618942,2736.0,2449.0,2111415.0,729155.981592
4,4,126261.879236,280851.744198,58274.713494,151891.96156,196002.68219,106971.869908,141505.03346,94426.619087,65019.472,...,892950.504956,5887725.0,21001.25236,359927.486275,85408.876964,264501.812874,2736.0,2449.0,2234268.0,737682.556359


In [55]:
?dru.clean_ssp_out_data

[0;31mSignature:[0m [0mdru[0m[0;34m.[0m[0mclean_ssp_out_data[0m[0;34m([0m[0mssp_out_df[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Cleans the SSP output data by adding a year column and filtering for the comparison year.
Args:
    ssp_out_df (pd.DataFrame): The SSP output data frame to be cleaned.
Returns:
    pd.DataFrame: The cleaned SSP output data frame containing only the data for the comparison year.
Raises:
    ValueError: If the comparison year is not present in the simulation data.
[0;31mFile:[0m      ~/decision_sciences/ssp_data_calibration/src/utilities/diff_reports.py
[0;31mType:[0m      method

In [56]:
dru.clean_ssp_out_data(ssp_out)

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne,year
0,0,128992.475401,286925.570284,59534.988647,155176.845408,200241.524314,109285.29166,144565.284932,96468.731604,66425.612333,...,5781814.0,65829.816853,279108.581486,50211.9748,175242.943363,0.0,0.0,2050396.0,725849.288265,2015


In [57]:
ssp_emissions_report = dru.generate_ssp_emissions_report(ssp_out)
ssp_emissions_report

Unnamed: 0,subsector,gas,edgar_class,ssp_emission
0,lvst,ch4,AG - Livestock:CH4,1.938988
1,lsmm,ch4,AG - Livestock:CH4,0.040259
2,lsmm,n2o,AG - Livestock:N2O,0.165996
3,agrc,co2,AG - Crops:CO2,1.316953
4,agrc,ch4,AG - Crops:CH4,0.214505
...,...,...,...,...
63,soil,co2,LULUCF - Organic Soil:CO2,0.110122
64,soil,n2o,LULUCF - Organic Soil:N2O,1.031985
65,ccsq,ch4,CCSQ:CH4,0.000000
66,ccsq,co2,CCSQ:CO2,0.000000


In [58]:
ssp_emissions_report.head(20)

Unnamed: 0,subsector,gas,edgar_class,ssp_emission
0,lvst,ch4,AG - Livestock:CH4,1.938988
1,lsmm,ch4,AG - Livestock:CH4,0.040259
2,lsmm,n2o,AG - Livestock:N2O,0.165996
3,agrc,co2,AG - Crops:CO2,1.316953
4,agrc,ch4,AG - Crops:CH4,0.214505
5,agrc,n2o,AG - Crops:N2O,0.247102
6,scoe,co2,EN - Building:CO2,7.494763
7,scoe,ch4,EN - Building:CH4,0.019117
8,scoe,n2o,EN - Building:N2O,0.033724
9,entc,co2,EN - Electricity/Heat:CO2,


In [59]:
ssp_emissions_report = dru.group_ssp_emissions_report_vars(ssp_emissions_report)
ssp_emissions_report

Unnamed: 0,subsector,edgar_class,ssp_emission
0,agrc,AG - Crops:CH4,0.214505
1,agrc,AG - Crops:CO2,1.316953
2,agrc,AG - Crops:N2O,0.247102
3,ccsq,CCSQ:CH4,0.0
4,ccsq,CCSQ:CO2,0.0
5,ccsq,CCSQ:N2O,0.0
6,entc,EN - Electricity/Heat:CH4,0.0
7,entc,EN - Electricity/Heat:CO2,0.0
8,entc,EN - Electricity/Heat:N2O,0.0
9,fgtv,EN - Fugitive Emissions:CH4,0.0


In [60]:
# check energy subsector
energy_subsectors = ['trns', 'entc', 'fgtv', 'scoe', 'inen', 'ccsq']
ssp_emissions_report[ssp_emissions_report.subsector.isin(energy_subsectors)]

Unnamed: 0,subsector,edgar_class,ssp_emission
3,ccsq,CCSQ:CH4,0.0
4,ccsq,CCSQ:CO2,0.0
5,ccsq,CCSQ:N2O,0.0
6,entc,EN - Electricity/Heat:CH4,0.0
7,entc,EN - Electricity/Heat:CO2,0.0
8,entc,EN - Electricity/Heat:N2O,0.0
9,fgtv,EN - Fugitive Emissions:CH4,0.0
10,fgtv,EN - Fugitive Emissions:CO2,0.0
11,fgtv,EN - Fugitive Emissions:N2O,0.0
14,inen,EN - Manufacturing/Construction:CH4,0.010258


In [61]:
dru.model_failed_flag

False

In [62]:
ssp_emissions_report['ssp_emission'].min()

-1.2470138668192876

In [63]:
edgar_emissions.sort_values(by=['edgar_class'], ascending=True)

Unnamed: 0,iso_alpha_3,edgar_class,edgar_emission,year
2,HRV,AG - Crops:CH4,0.000227,2015
3,HRV,AG - Crops:CO2,0.095857,2015
4,HRV,AG - Crops:N2O,1.038055,2015
0,HRV,AG - Livestock:CH4,1.682916,2015
1,HRV,AG - Livestock:N2O,0.110193,2015
5,HRV,EN - Building:CH4,0.416708,2015
6,HRV,EN - Building:CO2,2.726867,2015
7,HRV,EN - Building:N2O,0.104122,2015
8,HRV,EN - Electricity/Heat:CH4,0.004396,2015
9,HRV,EN - Electricity/Heat:CO2,3.297328,2015


In [64]:
df_report = dru.merge_ssp_with_edgar(ssp_emissions_report, edgar_emissions)
df_report


Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
0,agrc,AG - Crops:CH4,0.214505,HRV,0.000227,2015,0.000228,0.000228,6.82447e-06,0.000227,941.29607,0.04591488
1,agrc,AG - Crops:CO2,1.316953,HRV,0.095857,2015,0.095858,0.095858,0.002886389,0.091537,12.738568,1.491074
2,agrc,AG - Crops:N2O,0.247102,HRV,1.038055,2015,1.038056,1.038056,0.03125726,0.711996,-0.761957,0.6256084
3,ccsq,CCSQ:CH4,0.0,,,2015,,,,,,
4,ccsq,CCSQ:CO2,0.0,,,2015,,,,,,
5,ccsq,CCSQ:N2O,0.0,,,2015,,,,,,
6,entc,EN - Electricity/Heat:CH4,0.0,HRV,0.004396,2015,0.004397,0.004397,0.0001323562,0.004386,-1.0,1.93297e-05
7,entc,EN - Electricity/Heat:CO2,0.0,HRV,3.297328,2015,3.297329,3.297329,0.09928705,1.457993,-1.0,10.87238
8,entc,EN - Electricity/Heat:N2O,0.0,HRV,0.011222,2015,0.011223,0.011223,0.0003379238,0.01116,-1.0,0.0001259662
9,fgtv,EN - Fugitive Emissions:CH4,0.0,HRV,1.046727,2015,1.046728,1.046728,0.03151837,0.716242,-1.0,1.095639


In [65]:
# get duplicated edgar_class
df_report[df_report.duplicated(subset=['edgar_class'], keep=False)]

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff


In [66]:
df_report['norm_weight'].sum()

1.0000000000000002

In [67]:
dru.run_report_generator(edgar_emissions, ssp_out, subsector_to_calibrate='trns')

In [68]:
dru.sectoral_emission_report[dru.sectoral_emission_report['subsector'].isin(energy_subsectors)]

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
33,trns,EN - Transportation:CH4,0.012943,HRV,0.013863,2015,0.013864,0.013864,0.000417,0.013768,-0.066422,8.480464e-07
34,trns,EN - Transportation:CO2,3.749032,HRV,5.885325,2015,5.885326,5.885326,0.177215,1.929392,-0.362987,4.563752
35,trns,EN - Transportation:N2O,0.064532,HRV,0.05899,2015,0.058991,0.058991,0.001776,0.057316,0.093917,3.069482e-05


In [69]:
dru.sectoral_emission_report.tail(10)

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
33,trns,EN - Transportation:CH4,0.012943,HRV,0.013863,2015,0.013864,0.013864,0.000417,0.013768,-0.066422,8.480464e-07
34,trns,EN - Transportation:CO2,3.749032,HRV,5.885325,2015,5.885326,5.885326,0.177215,1.929392,-0.362987,4.563752
35,trns,EN - Transportation:N2O,0.064532,HRV,0.05899,2015,0.058991,0.058991,0.001776,0.057316,0.093917,3.069482e-05


In [70]:
dru.sectoral_emission_report[dru.sectoral_emission_report['subsector'] == 'ippu']

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff


In [71]:
dru.subsector_emission_report

Unnamed: 0,subsector,ssp_emission,edgar_emission_epsilon,diff,squared_diff,year
12,trns,3.826507,5.958182,-0.357773,4.544037,2015
