In [167]:
import pandas as pd
import numpy as np
import os
from utilities.utils import ErrorFunctions, SSPModelForCalibration
from utilities.diff_reports import DiffReportUtils

In [168]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [169]:
# Define paths
curr_dir = os.getcwd()
misc_dir = os.path.join(curr_dir, 'misc')
sectoral_report_dir = os.path.join(misc_dir, 'sectoral_reports')
dummy_files_dir = os.path.join(misc_dir, 'dummy')
sectoral_report_mapping_dir = os.path.join(misc_dir, 'sectoral_report_mapping')


In [170]:
use_edgar_db_flag = False

In [171]:
# Initialize the diff report object
dru = DiffReportUtils('UGA', os.path.join(sectoral_report_mapping_dir, 'sisepuede_edgar_active_crosswalk.csv'), sectoral_report_dir, energy_model_flag=False, use_edgar_db_flag=use_edgar_db_flag, comparison_year=2022, sim_init_year=2022)

In [172]:
# Load the edgar emissions database for the desired region
if use_edgar_db_flag:
    edgar_emissions = dru.edgar_emission_db_etl(os.path.join(sectoral_report_mapping_dir, 'CSC-GHG_emissions-April2024_to_calibrate.csv'))
else:
    edgar_emissions = dru.get_edgar_region_df(os.path.join(sectoral_report_mapping_dir, 'emission_targets_uganda.csv'))

edgar_emissions.head()

Unnamed: 0,iso_alpha_3,subsector,edgar_class,edgar_emission,year
0,UGA,lvst,AG - Livestock:CH4,9.972244,2022
1,UGA,lsmm,AG - Livestock:CH4,9.972244,2022
2,UGA,lsmm,AG - Livestock:N2O,0.299059,2022
3,UGA,agrc,AG - Crops:CO2,0.0,2022
4,UGA,agrc,AG - Crops:CH4,0.576887,2022


In [173]:
edgar_emissions.tail()

Unnamed: 0,iso_alpha_3,subsector,edgar_class,edgar_emission,year
63,UGA,soil,LULUCF - Organic Soil:CO2,0.0,2022
64,UGA,soil,LULUCF - Organic Soil:N2O,0.0,2022
65,UGA,ccsq,CCSQ:CH4,0.0,2022
66,UGA,ccsq,CCSQ:CO2,0.0,2022
67,UGA,ccsq,CCSQ:N2O,0.0,2022


## Test Diff Report Class Methods

In [174]:
ssp_out = pd.read_csv(os.path.join(dummy_files_dir, 'ssp_uganda_output_dummy.csv'))

In [175]:
ssp_out.head()

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,1110674.0,2470537.0,512618.657191,1336131.0,1724155.0,940987.489485,1244761.0,830632.082001,571949.517471,...,9851960.0,3660909.0,717057.782374,1223273.0,305174.162968,448689.809992,1088927.0,59049290.0,2194701.0,1187277.0
1,1,1099568.0,2445834.0,507493.032449,1322771.0,1706915.0,931578.645912,1232315.0,822326.671554,566230.649153,...,9536923.0,3706026.0,667169.430627,1213181.0,302122.75581,455151.199769,1105636.0,58965350.0,2227650.0,1195600.0
2,2,1108967.0,2466741.0,511831.054023,1334078.0,1721506.0,939541.727186,1242849.0,829355.873165,571070.756532,...,9588445.0,3678309.0,724448.535785,1242594.0,304705.28394,461647.924279,1147618.0,59318580.0,2564269.0,1186876.0
3,3,1097881.0,2442080.0,506714.112877,1320741.0,1704295.0,930148.82364,1230424.0,821064.53336,565361.576856,...,9473999.0,3637027.0,692803.554751,1773441.0,301659.046334,454988.571853,1156682.0,58362010.0,2735081.0,1177157.0
4,4,1107827.0,2464205.0,511304.810668,1332707.0,1719736.0,938575.729546,1241571.0,828503.164027,570483.605385,...,9621189.0,3735378.0,429980.347008,1897525.0,304391.998669,456280.825701,1170637.0,59613260.0,2366263.0,1194025.0


In [176]:
ssp_edgar_df = dru.load_ssp_edgar_cw()
ssp_edgar_df.tail(20)

Unnamed: 0,subsector,gas,edgar_class,edgar_subsector,edgar_sector,ignore,note,need_better_information_on_what_is_contained,vars
23,ippu,n2o,IN - Industrial Processes:N2O,IN - Industrial Processes,Industrial Processes,,,,emission_co2e_n2o_ippu_production_chemicals:em...
24,ippu,hfcs,IN - Industrial Processes:HFC,IN - Industrial Processes,Industrial Processes,,NOTE: can pick this set (HFCs aggregate) or sp...,,emission_co2e_hfcs_ippu_product_use_product_us...
25,ippu,other_fcs,IN - Industrial Processes:HFC,IN - Industrial Processes,Industrial Processes,,NOTE: these aren't HFCs,,
26,ippu,pfcs,IN - Industrial Processes:PFC,IN - Industrial Processes,Industrial Processes,,,,emission_co2e_pfcs_ippu_product_use_product_us...
27,ippu,sf6,IN - Industrial Processes:SF6,IN - Industrial Processes,Industrial Processes,,,,emission_co2e_sf6_ippu_production_chemicals:em...
28,waso,co2,Waste - Solid Waste:CO2,Waste - Solid Waste,Waste,,,,emission_co2e_co2_waso_incineration
29,waso,ch4,Waste - Solid Waste:CH4,Waste - Solid Waste,Waste,,,,emission_co2e_ch4_waso_biogas_food:emission_co...
30,waso,n2o,Waste - Solid Waste:N2O,Waste - Solid Waste,Waste,,,,emission_co2e_n2o_waso_compost_food:emission_c...
31,trww,ch4,Waste - Wastewater Treatment:CH4,Waste - Wastewater Treatment,Waste,,,,emission_co2e_ch4_trww_treated_advanced_aerobi...
32,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,,,,emission_co2e_n2o_trww_treated_advanced_aerobi...


In [177]:
# ssp_edgar_df[ssp_edgar_df.ignore == 1]

In [178]:
ssp_emissions_report, model_failed_flag = dru.generate_ssp_emissions_report(ssp_out)
ssp_emissions_report

Unnamed: 0,subsector,gas,edgar_class,ssp_emission
0,lvst,ch4,AG - Livestock:CH4,59.862185
1,lsmm,ch4,AG - Livestock:CH4,2.511765
2,lsmm,n2o,AG - Livestock:N2O,6.355724
3,agrc,co2,AG - Crops:CO2,0.864601
4,agrc,ch4,AG - Crops:CH4,1.771977
5,agrc,n2o,AG - Crops:N2O,0.56528
6,scoe,co2,EN - Building:CO2,7.702788
7,scoe,ch4,EN - Building:CH4,0.033125
8,scoe,n2o,EN - Building:N2O,0.05825
9,entc,co2,EN - Electricity/Heat:CO2,


In [179]:
ssp_emissions_report = dru.group_ssp_emissions_report_vars(ssp_emissions_report)
ssp_emissions_report

Unnamed: 0,subsector,edgar_class,ssp_emission
0,agrc,AG - Crops:CH4,1.771977
1,agrc,AG - Crops:CO2,0.864601
2,agrc,AG - Crops:N2O,0.56528
3,ccsq,CCSQ:CH4,0.0
4,ccsq,CCSQ:CO2,0.0
5,ccsq,CCSQ:N2O,0.0
6,entc,EN - Electricity/Heat:CH4,0.0
7,entc,EN - Electricity/Heat:CO2,0.0
8,entc,EN - Electricity/Heat:N2O,0.0
9,fgtv,EN - Fugitive Emissions:CH4,0.0


In [180]:
# check energy subsector
energy_subsectors = ['trns', 'entc', 'fgtv', 'scoe', 'inen', 'ccsq']
ssp_emissions_report[ssp_emissions_report.subsector.isin(energy_subsectors)]

Unnamed: 0,subsector,edgar_class,ssp_emission
3,ccsq,CCSQ:CH4,0.0
4,ccsq,CCSQ:CO2,0.0
5,ccsq,CCSQ:N2O,0.0
6,entc,EN - Electricity/Heat:CH4,0.0
7,entc,EN - Electricity/Heat:CO2,0.0
8,entc,EN - Electricity/Heat:N2O,0.0
9,fgtv,EN - Fugitive Emissions:CH4,0.0
10,fgtv,EN - Fugitive Emissions:CO2,0.0
11,fgtv,EN - Fugitive Emissions:N2O,0.0
14,inen,EN - Manufacturing/Construction:CH4,0.108583


In [181]:
model_failed_flag

False

In [182]:
edgar_emissions.sort_values(by=['edgar_class'], ascending=True)

Unnamed: 0,iso_alpha_3,subsector,edgar_class,edgar_emission,year
4,UGA,agrc,AG - Crops:CH4,0.576887,2022
3,UGA,agrc,AG - Crops:CO2,0.000000,2022
5,UGA,agrc,AG - Crops:N2O,8.092843,2022
0,UGA,lvst,AG - Livestock:CH4,9.972244,2022
1,UGA,lsmm,AG - Livestock:CH4,9.972244,2022
...,...,...,...,...,...
54,UGA,waso,Waste - Solid Waste:CH4,0.595829,2022
53,UGA,waso,Waste - Solid Waste:CO2,0.000000,2022
55,UGA,waso,Waste - Solid Waste:N2O,0.000000,2022
56,UGA,trww,Waste - Wastewater Treatment:CH4,7.170170,2022


In [183]:
edgar_emissions

Unnamed: 0,iso_alpha_3,subsector,edgar_class,edgar_emission,year
0,UGA,lvst,AG - Livestock:CH4,9.972244,2022
1,UGA,lsmm,AG - Livestock:CH4,9.972244,2022
2,UGA,lsmm,AG - Livestock:N2O,0.299059,2022
3,UGA,agrc,AG - Crops:CO2,0.000000,2022
4,UGA,agrc,AG - Crops:CH4,0.576887,2022
...,...,...,...,...,...
63,UGA,soil,LULUCF - Organic Soil:CO2,0.000000,2022
64,UGA,soil,LULUCF - Organic Soil:N2O,0.000000,2022
65,UGA,ccsq,CCSQ:CH4,0.000000,2022
66,UGA,ccsq,CCSQ:CO2,0.000000,2022


In [184]:
if not use_edgar_db_flag:
    edgar_emissions_groupped = dru.group_ssp_emissions_report_vars(edgar_emissions, emissions_col_name="edgar_emission")
    edgar_emissions_groupped["iso_alpha_3"] = dru.iso_alpha_3
    print(edgar_emissions_groupped)

    subsector                          edgar_class  edgar_emission iso_alpha_3
0        agrc                       AG - Crops:CH4        0.576887         UGA
1        agrc                       AG - Crops:CO2        0.000000         UGA
2        agrc                       AG - Crops:N2O        8.092843         UGA
3        ccsq                             CCSQ:CH4        0.000000         UGA
4        ccsq                             CCSQ:CO2        0.000000         UGA
5        ccsq                             CCSQ:N2O        0.000000         UGA
6        entc            EN - Electricity/Heat:CH4        0.003233         UGA
7        entc            EN - Electricity/Heat:CO2        0.056306         UGA
8        entc            EN - Electricity/Heat:N2O        0.004118         UGA
9        fgtv          EN - Fugitive Emissions:CH4        7.845444         UGA
10       fgtv          EN - Fugitive Emissions:CO2        0.000000         UGA
11       fgtv          EN - Fugitive Emissions:N2O  

In [185]:
if use_edgar_db_flag:
    df_report = dru.merge_ssp_with_edgar(ssp_emissions_report, edgar_emissions)

else:
    edgar_emissions_groupped = edgar_emissions_groupped.drop(columns="subsector")
    df_report = dru.merge_ssp_with_edgar(ssp_emissions_report, edgar_emissions_groupped)

df_report

Unnamed: 0,subsector,edgar_class,ssp_emission,edgar_emission,iso_alpha_3,year
0,agrc,AG - Crops:CH4,1.771977,0.576887,UGA,2022
1,agrc,AG - Crops:CO2,0.864601,0.0,UGA,2022
2,agrc,AG - Crops:N2O,0.56528,8.092843,UGA,2022
3,ccsq,CCSQ:CH4,0.0,0.0,UGA,2022
4,ccsq,CCSQ:CO2,0.0,0.0,UGA,2022
5,ccsq,CCSQ:N2O,0.0,0.0,UGA,2022
6,entc,EN - Electricity/Heat:CH4,0.0,0.003233,UGA,2022
7,entc,EN - Electricity/Heat:CO2,0.0,0.056306,UGA,2022
8,entc,EN - Electricity/Heat:N2O,0.0,0.004118,UGA,2022
9,fgtv,EN - Fugitive Emissions:CH4,0.0,7.845444,UGA,2022


In [186]:
dru.group_trns_scoe_inen(df_report)

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year
0,inen,EN - Manufacturing/Construction,26.088885,UGA,1.663096,2022
1,scoe,EN - Building,7.794163,UGA,3.489266,2022
2,trns,EN - Transportation,1.835655,UGA,3.903503,2022
3,agrc,AG - Crops:CH4,1.771977,UGA,0.576887,2022
4,agrc,AG - Crops:CO2,0.864601,UGA,0.0,2022
5,agrc,AG - Crops:N2O,0.56528,UGA,8.092843,2022
6,ccsq,CCSQ:CH4,0.0,UGA,0.0,2022
7,ccsq,CCSQ:CO2,0.0,UGA,0.0,2022
8,ccsq,CCSQ:N2O,0.0,UGA,0.0,2022
9,entc,EN - Electricity/Heat:CH4,0.0,UGA,0.003233,2022


In [187]:
# get duplicated edgar_class
df_report[df_report.duplicated(subset=['edgar_class'], keep=False)]

Unnamed: 0,subsector,edgar_class,ssp_emission,edgar_emission,iso_alpha_3,year


In [188]:
if use_edgar_db_flag:
    reports_dict = dru.run_report_generator(edgar_emissions, ssp_out)
else:
    reports_dict = dru.run_report_generator(edgar_emissions, ssp_out)

sectoral_emission_report = reports_dict['sectoral_emission_report']
subsector_emission_report = reports_dict['subsector_emission_report']

In [189]:
sectoral_emission_report[sectoral_emission_report['subsector'].isin(energy_subsectors)]

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,26.088885,UGA,1.663096,2022,1.663097,14.68693,596.619146,1.663097,0.013778,0.979489
1,scoe,EN - Building,7.794163,UGA,3.489266,2022,3.489267,1.233753,18.532123,3.489267,0.028907,1.501689
2,trns,EN - Transportation,1.835655,UGA,3.903503,2022,3.903504,-0.529742,4.276003,3.903504,0.032339,1.58995


In [190]:
sectoral_emission_report

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
0,inen,EN - Manufacturing/Construction,26.088885,UGA,1.663096,2022,1.663097,14.68693,596.6191,1.663097,0.013778,0.979489
1,scoe,EN - Building,7.794163,UGA,3.489266,2022,3.489267,1.233753,18.53212,3.489267,0.028907,1.501689
2,trns,EN - Transportation,1.835655,UGA,3.903503,2022,3.903504,-0.5297419,4.276003,3.903504,0.032339,1.58995
3,agrc,AG - Crops:CH4,1.771977,UGA,0.576887,2022,0.576888,2.071615,1.428239,0.576888,0.004779,0.455452
4,agrc,AG - Crops:CO2,0.864601,UGA,0.0,2022,1e-06,864599.9,0.747533,1e-06,0.0,0.0
5,agrc,AG - Crops:N2O,0.56528,UGA,8.092843,2022,8.092844,-0.9301507,56.66422,8.092844,0.067045,2.207488
15,frst,LULUCF - Forest Land:CH4,0.104575,UGA,0.0,2022,1e-06,104574.4,0.01093581,1e-06,0.0,0.0
16,frst,LULUCF - Forest Land:CO2,-6.028699,UGA,-9.61,2022,-9.609999,-0.3726639,12.82571,9.610001,0.079614,2.361797
17,ippu,IN - Industrial Processes:CH4,0.006068,UGA,0.0,2022,1e-06,6067.313,3.681229e-05,1e-06,0.0,0.0
18,ippu,IN - Industrial Processes:CO2,10.386787,UGA,1.765612,2022,1.765613,4.882823,74.32464,1.765613,0.014627,1.017262


In [191]:
sectoral_emission_report.tail(10)

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
25,lndu,LULUCF - Other Land:CO2,3e-06,UGA,0.0,2022,1e-06,2.395024,5.736138e-12,1e-06,0.0,0.0
26,lsmm,AG - Livestock:N2O,6.355724,UGA,0.299059,2022,0.29906,20.25235,36.68318,0.29906,0.002478,0.26164
27,lvst-lsmm,AG - Livestock:CH4,62.37395,UGA,19.944488,2022,19.944489,2.127378,1800.259,19.944489,0.16523,3.041876
28,soil,LULUCF - Organic Soil:CO2,27.401065,UGA,0.0,2022,1e-06,27401060.0,750.8183,1e-06,0.0,0.0
29,soil,LULUCF - Organic Soil:N2O,0.929888,UGA,0.0,2022,1e-06,929887.2,0.8646903,1e-06,0.0,0.0
30,trww,Waste - Wastewater Treatment:CH4,4.011817,UGA,7.17017,2022,7.170171,-0.4404852,9.975199,7.170171,0.059401,2.10049
31,trww,Waste - Wastewater Treatment:N2O,2.655611,UGA,0.452025,2022,0.452026,4.874914,4.855789,0.452026,0.003745,0.372959
32,waso,Waste - Solid Waste:CH4,7.906004,UGA,0.595829,2022,0.59583,12.2689,53.43865,0.59583,0.004936,0.467393
33,waso,Waste - Solid Waste:CO2,0.665792,UGA,0.0,2022,1e-06,665791.1,0.4432778,1e-06,0.0,0.0
34,waso,Waste - Solid Waste:N2O,0.012123,UGA,0.0,2022,1e-06,12121.69,0.0001469353,1e-06,0.0,0.0


In [192]:
sectoral_emission_report[sectoral_emission_report['subsector'] == 'ippu']

Unnamed: 0,subsector,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,rel_error,squared_diff,direct_weight,norm_weight,log_weight
17,ippu,IN - Industrial Processes:CH4,0.006068,UGA,0.0,2022,1e-06,6067.313151,3.681229e-05,1e-06,0.0,0.0
18,ippu,IN - Industrial Processes:CO2,10.386787,UGA,1.765612,2022,1.765613,4.882823,74.32464,1.765613,0.014627,1.017262
19,ippu,IN - Industrial Processes:HFC,0.0,UGA,0.0,2022,1e-06,-1.0,1e-12,1e-06,0.0,0.0
20,ippu,IN - Industrial Processes:N2O,0.528751,UGA,0.534607,2022,0.534608,-0.010956,3.43094e-05,0.534608,0.004429,0.428275
21,ippu,IN - Industrial Processes:PFC,2e-06,UGA,0.0,2022,1e-06,1.366543,1.86744e-12,1e-06,0.0,0.0
22,ippu,IN - Industrial Processes:SF6,0.0,UGA,0.0,2022,1e-06,-1.0,1e-12,1e-06,0.0,0.0


In [193]:
subsector_emission_report

Unnamed: 0,subsector,ssp_emission,edgar_emission_epsilon,rel_error,squared_diff,year
0,agrc,3.201858,8.669733,-0.6306855,29.897656,2022
1,frst,-5.924124,-9.609998,-0.3835458,13.585667,2022
2,inen,26.088885,1.663097,14.68693,596.619146,2022
3,ippu,10.921608,2.300225,3.748061,74.32825,2022
4,lndu,3.227681,62.610003,-0.9484478,3526.26016,2022
5,lsmm,6.355724,0.29906,20.25235,36.683184,2022
6,lvst-lsmm,62.37395,19.944489,2.127378,1800.2592,2022
7,scoe,7.794163,3.489267,1.233753,18.532123,2022
8,soil,28.330953,2e-06,14165480.0,802.642812,2022
9,trns,1.835655,3.903504,-0.5297419,4.276003,2022


### Test new error function

In [None]:
input_df = pd.read_csv('misc/dummy/ssp_output_dummy.csv')
input_df.head()

In [None]:
dru.run_report_generator(edgar_emissions, input_df)

In [None]:
df = dru.sectoral_emission_report.copy()
df.head()

In [None]:
df.to_csv('misc/dummy/sectoral_emission_report_dummy.csv', index=False)

In [None]:
df.sort_values(by=['norm_weight'], ascending=False, inplace=True)

In [None]:
df

In [None]:
df['norm_weight'] * 100

In [None]:
ef = ErrorFunctions()
ef.calculate_error(error_type='wmape', dataframe=df)

In [None]:
np.sum(df['norm_weight'] * df['rel_error'].abs()) * 100