In [216]:
import pandas as pd
import numpy as np
import os
from utilities.utils import SectoralDiffReport, ErrorFunctions, SSPModelForCalibration
from utilities.diff_reports import DiffReportUtils

In [217]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [218]:
# Define paths
curr_dir = os.getcwd()
misc_dir = os.path.join(curr_dir, 'misc')
sectoral_report_dir = os.path.join(misc_dir, 'sectoral_reports')
dummy_files_dir = os.path.join(misc_dir, 'dummy')
sectoral_report_mapping_dir = os.path.join(misc_dir, 'sectoral_report_mapping')


## Explore the new emissions targets csv

In [219]:
# Read emission targets csv
emission_targets = pd.read_csv(os.path.join(sectoral_report_mapping_dir, '2022_emission_targets.csv'))
emission_targets.head()


Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,ids,IND,PRY,KAZ,...,SLE,BEN,ETH,MRT,NER,TGO,IRN,HRV,GEO,Year
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,1:lvst:ch4,216.832747,12.036521,13.135347,...,0.517069,1.488482,45.03151,3.446201,12.146617,0.787989,9.529176,0.805474,0.996739,2022
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,2:lsmm:ch4,216.832747,12.036521,13.135347,...,0.517069,1.488482,45.03151,3.446201,12.146617,0.787989,9.529176,0.805474,0.996739,2022
2,lsmm,n2o,AG - Livestock:N2O,AG - Livestock,Agriculture,emission_co2e_n2o_lsmm_anaerobic_digester:emis...,3:lsmm:n2o,2.169403,0.125988,1.269783,...,0.032553,0.06131,0.280803,0.010149,0.084756,0.10027,0.210769,0.100881,0.088307,2022
3,agrc,co2,AG - Crops:CO2,AG - Crops,Agriculture,emission_co2e_co2_agrc_biomass_bevs_and_spices...,4:agrc:co2,32.634819,0.076804,0.454098,...,0.0,0.0,0.46514,0.0,0.0,0.0,1.8583,0.106978,0.100935,2022
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,5:agrc:ch4,127.221401,1.101916,0.833077,...,1.327563,0.527888,0.64565,0.264313,0.309672,0.38962,3.137081,0.000115,0.00306,2022


In [220]:
# Cols to keep
cols_to_keep = ['Subsector', 'Gas', 'Edgar_Class', 'Edgar_Subsector', 'Edgar_Sector', 'Vars', 'HRV']
# Filter df
emission_targets = emission_targets[cols_to_keep]

In [221]:
# Edgar_Class is repeated for some rows
emission_targets.sort_values(by='Edgar_Class').head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,0.000115
3,agrc,co2,AG - Crops:CO2,AG - Crops,Agriculture,emission_co2e_co2_agrc_biomass_bevs_and_spices...,0.106978
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474
2,lsmm,n2o,AG - Livestock:N2O,AG - Livestock,Agriculture,emission_co2e_n2o_lsmm_anaerobic_digester:emis...,0.100881
65,ccsq,ch4,CCSQ:CH4,CCSQ,CCSQ,emission_co2e_ch4_ccsq_direct_air_capture,0.0
66,ccsq,co2,CCSQ:CO2,CCSQ,CCSQ,emission_co2e_co2_ccsq_direct_air_capture,0.0
67,ccsq,n2o,CCSQ:N2O,CCSQ,CCSQ,emission_co2e_n2o_ccsq_direct_air_capture,0.0
7,scoe,ch4,EN - Building:CH4,EN - Building,Energy,emission_co2e_ch4_scoe_commercial_municipal:em...,0.332792


In [222]:
# Check the highest emissions
emission_targets.sort_values(by='HRV', ascending=False).head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV
18,trns,co2,EN - Transportation:CO2,EN - Transportation,Energy,emission_co2e_co2_trns_aviation:emission_co2e_...,6.654934
6,scoe,co2,EN - Building:CO2,EN - Building,Energy,emission_co2e_co2_scoe_commercial_municipal:em...,2.603821
9,entc,co2,EN - Electricity/Heat:CO2,EN - Electricity/Heat,Energy,emission_co2e_co2_entc_generation_pp_biogas:em...,2.522588
15,inen,co2,EN - Manufacturing/Construction:CO2,EN - Manufacturing/Construction,Energy,emission_co2e_co2_inen_agriculture_and_livesto...,2.37007
21,ippu,co2,IN - Industrial Processes:CO2,IN - Industrial Processes,Industrial Processes,emission_co2e_co2_ippu_product_use_product_use...,2.168521
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449
54,waso,ch4,Waste - Solid Waste:CH4,Waste - Solid Waste,Waste,emission_co2e_ch4_waso_biogas_food:emission_co...,1.009457
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474
12,fgtv,co2,EN - Fugitive Emissions:CO2,EN - Fugitive Emissions,Energy,emission_co2e_co2_fgtv_fuel_coal:emission_co2e...,0.798061


In [223]:
emission_targets['weights'] = emission_targets['HRV'] / emission_targets['HRV'].sum()

In [224]:
emission_targets.sort_values(by='weights', ascending=False).head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV,weights
18,trns,co2,EN - Transportation:CO2,EN - Transportation,Energy,emission_co2e_co2_trns_aviation:emission_co2e_...,6.654934,0.357225
6,scoe,co2,EN - Building:CO2,EN - Building,Energy,emission_co2e_co2_scoe_commercial_municipal:em...,2.603821,0.139768
9,entc,co2,EN - Electricity/Heat:CO2,EN - Electricity/Heat,Energy,emission_co2e_co2_entc_generation_pp_biogas:em...,2.522588,0.135408
15,inen,co2,EN - Manufacturing/Construction:CO2,EN - Manufacturing/Construction,Energy,emission_co2e_co2_inen_agriculture_and_livesto...,2.37007,0.127221
21,ippu,co2,IN - Industrial Processes:CO2,IN - Industrial Processes,Industrial Processes,emission_co2e_co2_ippu_product_use_product_use...,2.168521,0.116402
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449,0.069806
54,waso,ch4,Waste - Solid Waste:CH4,Waste - Solid Waste,Waste,emission_co2e_ch4_waso_biogas_food:emission_co...,1.009457,0.054186
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474,0.043236
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474,0.043236
12,fgtv,co2,EN - Fugitive Emissions:CO2,EN - Fugitive Emissions,Energy,emission_co2e_co2_fgtv_fuel_coal:emission_co2e...,0.798061,0.042838


## Sketch the new diff report format

In [225]:
og_diff_report = pd.read_csv(os.path.join(sectoral_report_dir, 'detailed_diff_report_all-sectors.csv'))
og_diff_report.head()

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,diff
0,2022,agrc,ch4,AG - Crops:CH4,0.20536,0.000115,1781.46456
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,10.85702
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,-0.82803
3,2022,ccsq,ch4,CCSQ:CH4,0.0,0.0,
4,2022,ccsq,co2,CCSQ:CO2,0.0,0.0,


In [226]:
sdr = SectoralDiffReport(sectoral_report_mapping_dir, 'HRV', init_year=2015, ref_year=2015)

In [227]:
diff_report_with_w = sdr.calculate_weights(og_diff_report)
diff_report_with_w.head()

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,diff,Weights
0,2022,agrc,ch4,AG - Crops:CH4,0.20536,0.000115,1781.46456,4e-06
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,10.85702,0.00341
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,-0.82803,0.041447
3,2022,ccsq,ch4,CCSQ:CH4,0.0,0.0,,0.0
4,2022,ccsq,co2,CCSQ:CO2,0.0,0.0,,0.0


In [228]:
diff_report_with_w.drop(columns=['diff'], inplace=True)
diff_report = sdr.calculate_difference(diff_report_with_w)

In [229]:
diff_report

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff
0,2022,agrc,ch4,AG - Crops:CH4,0.205360,0.000115,0.000004,1781.464560,0.042125
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,0.003410,10.857020,1.349004
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,0.041447,-0.828030,1.159523
3,2022,ccsq,ch4,CCSQ:CH4,0.000000,0.000000,0.000000,,0.000000
4,2022,ccsq,co2,CCSQ:CO2,0.000000,0.000000,0.000000,,0.000000
...,...,...,...,...,...,...,...,...,...
63,2022,trww,ch4,Waste - Wastewater Treatment:CH4,0.438294,0.428962,0.013672,0.021754,0.000087
64,2022,trww,n2o,Waste - Wastewater Treatment:N2O,0.273529,0.065935,0.002101,3.148446,0.043095
65,2022,waso,ch4,Waste - Solid Waste:CH4,1.145807,1.009457,0.032173,0.135072,0.018591
66,2022,waso,co2,Waste - Solid Waste:CO2,0.574465,0.000000,0.000000,inf,0.330011


In [230]:
# Check for NaNs or inf in squared_diff
diff_report[diff_report['squared_diff'].isna()]



Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff


In [231]:
diff_report[diff_report['squared_diff'] == np.inf]

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff


In [232]:
# diff_report.drop(columns=['Weights', 'diff', 'squared_diff'])

In [233]:
ef = ErrorFunctions()

In [234]:
ef.wmse(diff_report)

12.798983030648392

## Test and Sketch the new DiffReport Methods

In [235]:
dru = DiffReportUtils('HRV', 2022, os.path.join(sectoral_report_mapping_dir, 'edgar_ssp_cw.csv'))

In [236]:
edgar_emissions = dru.edgar_emission_db_etl(os.path.join(sectoral_report_mapping_dir, 'CSC-GHG_emissions-April2024_to_calibrate.csv'))
edgar_emissions

Unnamed: 0,iso_alpha_3,edgar_class,edgar_emission,year
0,HRV,AG - Livestock:CH4,1.610948,2022
1,HRV,AG - Livestock:N2O,0.100881,2022
2,HRV,AG - Crops:CH4,0.000115,2022
3,HRV,AG - Crops:CO2,0.106978,2022
4,HRV,AG - Crops:N2O,1.300449,2022
5,HRV,EN - Building:CH4,0.332792,2022
6,HRV,EN - Building:CO2,2.603821,2022
7,HRV,EN - Building:N2O,0.106908,2022
8,HRV,EN - Electricity/Heat:CH4,0.010481,2022
9,HRV,EN - Electricity/Heat:CO2,2.522588,2022


In [237]:
ssp_out = pd.read_csv(os.path.join(dummy_files_dir, 'ssp_output_w_energy_dummy.csv'))

In [238]:
dru.clean_ssp_out_data(ssp_out)

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne,year
0,0,128992.475401,286925.570284,59534.988647,155176.845408,200241.524314,109285.29166,144565.284932,96468.731604,66425.612333,...,5781814.0,65829.816853,279108.581486,50211.9748,175242.943363,0.0,0.0,2050396.0,725849.288265,2015


In [239]:
ssp_emissions_report = dru.generate_ssp_emissions_report(ssp_out)
ssp_emissions_report

Unnamed: 0,subsector,gas,edgar_class,ssp_emission
0,lvst,ch4,AG - Livestock:CH4,1.938988
1,lsmm,ch4,AG - Livestock:CH4,0.040259
2,lsmm,n2o,AG - Livestock:N2O,0.165996
3,agrc,co2,AG - Crops:CO2,1.316953
4,agrc,ch4,AG - Crops:CH4,0.214505
...,...,...,...,...
63,soil,co2,LULUCF - Organic Soil:CO2,0.110122
64,soil,n2o,LULUCF - Organic Soil:N2O,1.031985
65,ccsq,ch4,CCSQ:CH4,0.000000
66,ccsq,co2,CCSQ:CO2,0.000000


In [240]:
ssp_emissions_report['ssp_emission'].min()

-1.2470138668192876

In [241]:
ssp_emissions_report.sort_values(by=['subsector', 'gas']).tail(20)

Unnamed: 0,subsector,gas,edgar_class,ssp_emission
52,ippu,sf6,IN - Industrial Processes:SF6,0.0
61,lndu,ch4,LULUCF - Deforestation:CH4,0.417046
60,lndu,co2,LULUCF - Deforestation:CO2,0.242741
62,lndu,co2,LULUCF - Other Land:CO2,0.025623
1,lsmm,ch4,AG - Livestock:CH4,0.040259
2,lsmm,n2o,AG - Livestock:N2O,0.165996
0,lvst,ch4,AG - Livestock:CH4,1.938988
7,scoe,ch4,EN - Building:CH4,0.019117
6,scoe,co2,EN - Building:CO2,7.494763
8,scoe,n2o,EN - Building:N2O,0.033724


In [242]:
df_report = dru.merge_ssp_with_edgar(ssp_emissions_report, edgar_emissions)
df_report

Updating AG - Livestock:CH4 with 0.805474041
Updating IN - Industrial Processes:HFC with 0.020415280666666667


Unnamed: 0,subsector,gas,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
0,lvst,ch4,AG - Livestock:CH4,1.938988,HRV,0.805474,2022,0.805475,0.805475,0.025671,0.590823,1.407260,1.284851
1,lsmm,ch4,AG - Livestock:CH4,0.040259,HRV,0.805474,2022,0.805475,0.805475,0.025671,0.590823,-0.950018,0.585556
2,lsmm,n2o,AG - Livestock:N2O,0.165996,HRV,0.100881,2022,0.100882,0.100882,0.003215,0.096111,0.645442,0.004240
3,agrc,co2,AG - Crops:CO2,1.316953,HRV,0.106978,2022,0.106979,0.106979,0.003410,0.101634,11.310352,1.464037
4,agrc,ch4,AG - Crops:CH4,0.214505,HRV,0.000115,2022,0.000116,0.000116,0.000004,0.000115,1844.825435,0.045963
...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,soil,co2,LULUCF - Organic Soil:CO2,0.110122,HRV,0.092259,2022,0.092260,0.092260,0.002940,0.088248,0.193601,0.000319
64,soil,n2o,LULUCF - Organic Soil:N2O,1.031985,-999,-999.000000,2022,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000
65,ccsq,ch4,CCSQ:CH4,0.000000,-999,-999.000000,2022,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000
66,ccsq,co2,CCSQ:CO2,0.000000,-999,-999.000000,2022,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000,-999.000000


In [249]:
# print the df with -999 values anywhere
df_report[df_report['edgar_emission'] == -999]

Unnamed: 0,subsector,gas,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
59,frst,ch4,LULUCF - Forest Land:CH4,0.126491,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
61,lndu,ch4,LULUCF - Deforestation:CH4,0.417046,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
64,soil,n2o,LULUCF - Organic Soil:N2O,1.031985,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
65,ccsq,ch4,CCSQ:CH4,0.0,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
66,ccsq,co2,CCSQ:CO2,0.0,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
67,ccsq,n2o,CCSQ:N2O,0.0,-999,-999.0,2022,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0


In [244]:
# get duplicated edgar_class
df_report[df_report.duplicated(subset=['edgar_class'], keep=False)]

Unnamed: 0,subsector,gas,edgar_class,ssp_emission,iso_alpha_3,edgar_emission,year,edgar_emission_epsilon,direct_weight,norm_weight,log_weight,diff,squared_diff
0,lvst,ch4,AG - Livestock:CH4,1.938988,HRV,0.805474,2022,0.805475,0.805475,0.025671,0.590823,1.40726,1.284851
1,lsmm,ch4,AG - Livestock:CH4,0.04025905,HRV,0.805474,2022,0.805475,0.805475,0.025671,0.590823,-0.950018,0.585556
24,ippu,c2f6,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417
25,ippu,c2h3f3,IN - Industrial Processes:HFC,0.2203381,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,9.792276,0.039969
26,ippu,c2hf5,IN - Industrial Processes:HFC,0.2003794,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,8.814688,0.032387
27,ippu,c3f8,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417
28,ippu,c3h2f6,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417
29,ippu,c3h3f5,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417
30,ippu,c3hf7,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417
31,ippu,c4f10,IN - Industrial Processes:HFC,0.0,HRV,0.020415,2022,0.020416,0.020416,0.000651,0.02021,-1.0,0.000417


In [245]:
df_report['norm_weight'].sum()

-5993.0