In [1]:
import pandas as pd
import numpy as np
import os
from utilities.utils import SectoralDiffReport, ErrorFunctions

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Define paths
curr_dir = os.getcwd()
misc_dir = os.path.join(curr_dir, 'misc')
sectoral_report_dir = os.path.join(misc_dir, 'sectoral_reports')


## Explore the new emissions targets csv

In [4]:
# Read emission targets csv
emission_targets = pd.read_csv(os.path.join(sectoral_report_dir, '2022_emission_targets.csv'))
emission_targets.head()


Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,ids,IND,PRY,KAZ,...,SLE,BEN,ETH,MRT,NER,TGO,IRN,HRV,GEO,Year
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,1:lvst:ch4,216.832747,12.036521,13.135347,...,0.517069,1.488482,45.03151,3.446201,12.146617,0.787989,9.529176,0.805474,0.996739,2022
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,2:lsmm:ch4,216.832747,12.036521,13.135347,...,0.517069,1.488482,45.03151,3.446201,12.146617,0.787989,9.529176,0.805474,0.996739,2022
2,lsmm,n2o,AG - Livestock:N2O,AG - Livestock,Agriculture,emission_co2e_n2o_lsmm_anaerobic_digester:emis...,3:lsmm:n2o,2.169403,0.125988,1.269783,...,0.032553,0.06131,0.280803,0.010149,0.084756,0.10027,0.210769,0.100881,0.088307,2022
3,agrc,co2,AG - Crops:CO2,AG - Crops,Agriculture,emission_co2e_co2_agrc_biomass_bevs_and_spices...,4:agrc:co2,32.634819,0.076804,0.454098,...,0.0,0.0,0.46514,0.0,0.0,0.0,1.8583,0.106978,0.100935,2022
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,5:agrc:ch4,127.221401,1.101916,0.833077,...,1.327563,0.527888,0.64565,0.264313,0.309672,0.38962,3.137081,0.000115,0.00306,2022


In [5]:
# Cols to keep
cols_to_keep = ['Subsector', 'Gas', 'Edgar_Class', 'Edgar_Subsector', 'Edgar_Sector', 'Vars', 'HRV']
# Filter df
emission_targets = emission_targets[cols_to_keep]

In [6]:
# Edgar_Class is repeated for some rows
emission_targets.sort_values(by='Edgar_Class').head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,0.000115
3,agrc,co2,AG - Crops:CO2,AG - Crops,Agriculture,emission_co2e_co2_agrc_biomass_bevs_and_spices...,0.106978
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474
2,lsmm,n2o,AG - Livestock:N2O,AG - Livestock,Agriculture,emission_co2e_n2o_lsmm_anaerobic_digester:emis...,0.100881
65,ccsq,ch4,CCSQ:CH4,CCSQ,CCSQ,emission_co2e_ch4_ccsq_direct_air_capture,0.0
66,ccsq,co2,CCSQ:CO2,CCSQ,CCSQ,emission_co2e_co2_ccsq_direct_air_capture,0.0
67,ccsq,n2o,CCSQ:N2O,CCSQ,CCSQ,emission_co2e_n2o_ccsq_direct_air_capture,0.0
7,scoe,ch4,EN - Building:CH4,EN - Building,Energy,emission_co2e_ch4_scoe_commercial_municipal:em...,0.332792


In [7]:
# Check the highest emissions
emission_targets.sort_values(by='HRV', ascending=False).head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV
18,trns,co2,EN - Transportation:CO2,EN - Transportation,Energy,emission_co2e_co2_trns_aviation:emission_co2e_...,6.654934
6,scoe,co2,EN - Building:CO2,EN - Building,Energy,emission_co2e_co2_scoe_commercial_municipal:em...,2.603821
9,entc,co2,EN - Electricity/Heat:CO2,EN - Electricity/Heat,Energy,emission_co2e_co2_entc_generation_pp_biogas:em...,2.522588
15,inen,co2,EN - Manufacturing/Construction:CO2,EN - Manufacturing/Construction,Energy,emission_co2e_co2_inen_agriculture_and_livesto...,2.37007
21,ippu,co2,IN - Industrial Processes:CO2,IN - Industrial Processes,Industrial Processes,emission_co2e_co2_ippu_product_use_product_use...,2.168521
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449
54,waso,ch4,Waste - Solid Waste:CH4,Waste - Solid Waste,Waste,emission_co2e_ch4_waso_biogas_food:emission_co...,1.009457
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474
12,fgtv,co2,EN - Fugitive Emissions:CO2,EN - Fugitive Emissions,Energy,emission_co2e_co2_fgtv_fuel_coal:emission_co2e...,0.798061


In [8]:
emission_targets['weights'] = emission_targets['HRV'] / emission_targets['HRV'].sum()

In [9]:
emission_targets.sort_values(by='weights', ascending=False).head(20)

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars,HRV,weights
18,trns,co2,EN - Transportation:CO2,EN - Transportation,Energy,emission_co2e_co2_trns_aviation:emission_co2e_...,6.654934,0.357225
6,scoe,co2,EN - Building:CO2,EN - Building,Energy,emission_co2e_co2_scoe_commercial_municipal:em...,2.603821,0.139768
9,entc,co2,EN - Electricity/Heat:CO2,EN - Electricity/Heat,Energy,emission_co2e_co2_entc_generation_pp_biogas:em...,2.522588,0.135408
15,inen,co2,EN - Manufacturing/Construction:CO2,EN - Manufacturing/Construction,Energy,emission_co2e_co2_inen_agriculture_and_livesto...,2.37007,0.127221
21,ippu,co2,IN - Industrial Processes:CO2,IN - Industrial Processes,Industrial Processes,emission_co2e_co2_ippu_product_use_product_use...,2.168521,0.116402
5,agrc,n2o,AG - Crops:N2O,AG - Crops,Agriculture,emission_co2e_n2o_agrc_biomass_burning:emissio...,1.300449,0.069806
54,waso,ch4,Waste - Solid Waste:CH4,Waste - Solid Waste,Waste,emission_co2e_ch4_waso_biogas_food:emission_co...,1.009457,0.054186
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,0.805474,0.043236
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,0.805474,0.043236
12,fgtv,co2,EN - Fugitive Emissions:CO2,EN - Fugitive Emissions,Energy,emission_co2e_co2_fgtv_fuel_coal:emission_co2e...,0.798061,0.042838


## Sketch the new diff report format

In [10]:
og_diff_report = pd.read_csv(os.path.join(sectoral_report_dir, 'detailed_diff_report_all-sectors.csv'))
og_diff_report.head()

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,diff
0,2022,agrc,ch4,AG - Crops:CH4,0.20536,0.000115,1781.46456
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,10.85702
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,-0.82803
3,2022,ccsq,ch4,CCSQ:CH4,0.0,0.0,
4,2022,ccsq,co2,CCSQ:CO2,0.0,0.0,


In [11]:
sdr = SectoralDiffReport(sectoral_report_dir, 'HRV', init_year=2015, ref_year=2015)

In [12]:
diff_report_with_w = sdr.calculate_weights(og_diff_report)
diff_report_with_w.head()

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,diff,Weights
0,2022,agrc,ch4,AG - Crops:CH4,0.20536,0.000115,1781.46456,4e-06
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,10.85702,0.00341
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,-0.82803,0.041447
3,2022,ccsq,ch4,CCSQ:CH4,0.0,0.0,,0.0
4,2022,ccsq,co2,CCSQ:CO2,0.0,0.0,,0.0


In [13]:
diff_report_with_w.drop(columns=['diff'], inplace=True)
diff_report = sdr.calculate_difference(diff_report_with_w)

In [14]:
diff_report

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff
0,2022,agrc,ch4,AG - Crops:CH4,0.205360,0.000115,0.000004,1781.464560,0.042125
1,2022,agrc,co2,AG - Crops:CO2,1.268444,0.106978,0.003410,10.857020,1.349004
2,2022,agrc,n2o,AG - Crops:N2O,0.223638,1.300449,0.041447,-0.828030,1.159523
3,2022,ccsq,ch4,CCSQ:CH4,0.000000,0.000000,0.000000,,0.000000
4,2022,ccsq,co2,CCSQ:CO2,0.000000,0.000000,0.000000,,0.000000
...,...,...,...,...,...,...,...,...,...
63,2022,trww,ch4,Waste - Wastewater Treatment:CH4,0.438294,0.428962,0.013672,0.021754,0.000087
64,2022,trww,n2o,Waste - Wastewater Treatment:N2O,0.273529,0.065935,0.002101,3.148446,0.043095
65,2022,waso,ch4,Waste - Solid Waste:CH4,1.145807,1.009457,0.032173,0.135072,0.018591
66,2022,waso,co2,Waste - Solid Waste:CO2,0.574465,0.000000,0.000000,inf,0.330011


In [None]:
# Check for NaNs or inf in squared_diff
diff_report[diff_report['squared_diff'].isna()]



Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff
3,2022,ccsq,ch4,CCSQ:CH4,0.0,0.0,0.0,,0.0
4,2022,ccsq,co2,CCSQ:CO2,0.0,0.0,0.0,,0.0
5,2022,ccsq,n2o,CCSQ:N2O,0.0,0.0,0.0,,0.0


In [None]:
diff_report[diff_report['squared_diff'] == np.inf]

Unnamed: 0,Year,Subsector,Gas,Edgar_Class,Simulation_Values,Edgar_Values,Weights,diff,squared_diff
12,2022,frst,ch4,LULUCF - Forest Land:CH4,0.127166,0.0,0.0,inf,0.01617124
47,2022,ippu,pfcs,IN - Industrial Processes:PFC,8e-06,0.0,0.0,inf,6.984258e-11
49,2022,lndu,ch4,LULUCF - Deforestation:CH4,0.454797,0.0,0.0,inf,0.2068404
59,2022,soil,n2o,LULUCF - Organic Soil:N2O,1.062639,0.0,0.0,inf,1.129201
66,2022,waso,co2,Waste - Solid Waste:CO2,0.574465,0.0,0.0,inf,0.3300106


In [16]:
# diff_report.drop(columns=['Weights', 'diff', 'squared_diff'])

In [17]:
ef = ErrorFunctions()

In [18]:
ef.weighted_mse(diff_report)

AttributeError: 'ErrorFunctions' object has no attribute 'weighted_mse'