In [74]:
import pandas as pd
import os

In [75]:
SCRIPT_DIR_PATH = os.getcwd()
DATA_DIR_PATH = os.path.join(SCRIPT_DIR_PATH, "data")
OUTPUT_DIR_PATH = os.path.join(SCRIPT_DIR_PATH, "output")

## Load Inventory and Crosswalk Files

In [76]:
# Set up file names
INVENTORY_FILE = os.path.join(DATA_DIR_PATH, "CSC-GHG_emissions-April2024_to_calibrate.csv")
CROSSWALK_FILE = os.path.join(DATA_DIR_PATH, "sisepuede_edgar_active_crosswalk.csv")

# Set up region name, year, and iso3
REGION = "libya"
YEAR = 2022
ISO3 = "LBY"

In [77]:
inventory_df = pd.read_csv(INVENTORY_FILE, encoding='latin1')
crosswalk_df = pd.read_csv(CROSSWALK_FILE)

In [78]:
inventory_df.head()

Unnamed: 0,Code,Country,EDGAR Country Code,Income group,Lending category,Region,CSC Sector,CSC Subsector,Gas,Units,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,ABW,Aruba,ABW,High income,,Latin America & Caribbean,Energy,EN - Building,CH4,MtCO2e,...,0.000554,0.000538,0.0005,0.00057,0.000542,0.000574,0.000471,0.000616,0.000617,0.000622
1,ABW,Aruba,ABW,High income,,Latin America & Caribbean,Energy,EN - Building,CO2,MtCO2e,...,0.04709,0.037332,0.035079,0.039403,0.035488,0.044616,0.050051,0.044068,0.042466,0.044068
2,ABW,Aruba,ABW,High income,,Latin America & Caribbean,Energy,EN - Building,N2O,MtCO2e,...,0.00016,0.000138,0.00013,0.000137,0.000135,0.000235,0.000264,0.00024,0.000234,0.000241
3,ABW,Aruba,ABW,High income,,Latin America & Caribbean,Energy,EN - Electricity/Heat,CH4,MtCO2e,...,0.000114,0.00012,0.000126,0.000127,0.000122,0.000132,0.000156,0.000135,0.000132,0.000136
4,ABW,Aruba,ABW,High income,,Latin America & Caribbean,Energy,EN - Electricity/Heat,CO2,MtCO2e,...,0.213816,0.222204,0.229352,0.23974,0.223555,0.232325,0.27429,0.232537,0.224081,0.232537


In [79]:
crosswalk_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,ignore,note,need_better_information_on_what_is_contained
0,agrc,ch4,Agriculture,AG - Crops,AG - Crops,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,,may include fertilizer application,1.0
1,agrc,co2,Agriculture,AG - Crops,AG - Crops,emission_co2e_co2_agrc_biomass_bevs_and_spices...,,may include liming and urea,1.0
2,agrc,n2o,Agriculture,AG - Crops,AG - Crops,emission_co2e_n2o_agrc_biomass_burning:emissio...,,may include fertilizer application,1.0
3,lvst,ch4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,,,
4,lsmm,ch4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,,,


## Cleaning dataframes before merge

In [80]:
# Filter inventory for the specified region and year
inventory_region_year_df = inventory_df[(inventory_df['Code'] == ISO3)][["Code", "CSC Subsector", "Gas", str(YEAR)]]
inventory_region_year_df = inventory_region_year_df.rename(columns={str(YEAR): ISO3})
inventory_region_year_df.head()

Unnamed: 0,Code,CSC Subsector,Gas,LBY
3057,LBY,AG - Livestock,CH4,1.909586
3058,LBY,AG - Livestock,N2O,0.007607
3059,LBY,AG - Crops,CH4,0.000293
3060,LBY,AG - Crops,CO2,0.01588
3061,LBY,AG - Crops,N2O,0.759204


In [81]:
# Make Gas column Uppercase for merging
crosswalk_df['Gas'] = crosswalk_df['Gas'].str.upper()
inventory_region_year_df['Gas'] = inventory_region_year_df['Gas'].str.upper()

In [82]:
crosswalk_df = crosswalk_df.drop(columns=["ignore", "note", "need_better_information_on_what_is_contained"])
crosswalk_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars
0,agrc,CH4,Agriculture,AG - Crops,AG - Crops,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...
1,agrc,CO2,Agriculture,AG - Crops,AG - Crops,emission_co2e_co2_agrc_biomass_bevs_and_spices...
2,agrc,N2O,Agriculture,AG - Crops,AG - Crops,emission_co2e_n2o_agrc_biomass_burning:emissio...
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...


In [83]:
# Let's create a id column to merge on
inventory_region_year_df["id"] = inventory_region_year_df["CSC Subsector"] + " - "+ inventory_region_year_df["Gas"]
crosswalk_df["id"] = crosswalk_df["Edgar_Subsector"] + " - " + crosswalk_df["Gas"]

In [84]:
inventory_region_year_df.head()

Unnamed: 0,Code,CSC Subsector,Gas,LBY,id
3057,LBY,AG - Livestock,CH4,1.909586,AG - Livestock - CH4
3058,LBY,AG - Livestock,N2O,0.007607,AG - Livestock - N2O
3059,LBY,AG - Crops,CH4,0.000293,AG - Crops - CH4
3060,LBY,AG - Crops,CO2,0.01588,AG - Crops - CO2
3061,LBY,AG - Crops,N2O,0.759204,AG - Crops - N2O


In [85]:
crosswalk_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id
0,agrc,CH4,Agriculture,AG - Crops,AG - Crops,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,AG - Crops - CH4
1,agrc,CO2,Agriculture,AG - Crops,AG - Crops,emission_co2e_co2_agrc_biomass_bevs_and_spices...,AG - Crops - CO2
2,agrc,N2O,Agriculture,AG - Crops,AG - Crops,emission_co2e_n2o_agrc_biomass_burning:emissio...,AG - Crops - N2O
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4


In [86]:
# Check for duplicated ids in both dataframes
print("Duplicated IDs in inventory_region_year_df:", inventory_region_year_df['id'].duplicated().sum())
print("Duplicated IDs in crosswalk_df:", crosswalk_df['id'].duplicated().sum())

Duplicated IDs in inventory_region_year_df: 0
Duplicated IDs in crosswalk_df: 2


## Merge Data

In [87]:
inventory_region_year_df.columns

Index(['Code', 'CSC Subsector', 'Gas', 'LBY', 'id'], dtype='object')

In [88]:
merged_df = pd.merge(crosswalk_df, inventory_region_year_df[["id", ISO3]], how='left', on='id')
merged_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
0,agrc,CH4,Agriculture,AG - Crops,AG - Crops,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,AG - Crops - CH4,0.000293
1,agrc,CO2,Agriculture,AG - Crops,AG - Crops,emission_co2e_co2_agrc_biomass_bevs_and_spices...,AG - Crops - CO2,0.01588
2,agrc,N2O,Agriculture,AG - Crops,AG - Crops,emission_co2e_n2o_agrc_biomass_burning:emissio...,AG - Crops - N2O,0.759204
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,1.909586
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,1.909586


In [89]:
# Check duplicated ids
duplicated_ids = merged_df[merged_df['id'].duplicated(keep=False)]
duplicated_ids

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,1.909586
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,1.909586
62,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Removals,emission_co2e_co2_entc_bmass_processing_and_re...,LULUCF - Forest Land - CO2,
63,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Sequestration,emission_co2e_co2_frst_sequestration_mangroves...,LULUCF - Forest Land - CO2,


## Deal with NaNs

In [90]:
# merged_df[merged_df[ISO3].isna()]

### Check IPPU

In [91]:
inventory_region_year_df[inventory_region_year_df["CSC Subsector"] == "IN - Industrial Processes"]

Unnamed: 0,Code,CSC Subsector,Gas,LBY,id
3077,LBY,IN - Industrial Processes,CH4,0.003465,IN - Industrial Processes - CH4
3078,LBY,IN - Industrial Processes,CO2,2.499458,IN - Industrial Processes - CO2
3079,LBY,IN - Industrial Processes,HFC,4.191501,IN - Industrial Processes - HFC
3080,LBY,IN - Industrial Processes,N2O,0.360349,IN - Industrial Processes - N2O
3081,LBY,IN - Industrial Processes,SF6,0.560506,IN - Industrial Processes - SF6


In [92]:
pd.merge(crosswalk_df[crosswalk_df["Edgar_Subsector"] == "IN - Industrial Processes"], 
        inventory_region_year_df[inventory_region_year_df["CSC Subsector"] == "IN - Industrial Processes"],
        how="inner",
        on="id"
        )

Unnamed: 0,Subsector,Gas_x,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,Code,CSC Subsector,Gas_y,LBY
0,ippu,CH4,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_ch4_ippu_production_chemicals:em...,IN - Industrial Processes - CH4,LBY,IN - Industrial Processes,CH4,0.003465
1,ippu,CO2,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_co2_ippu_product_use_product_use...,IN - Industrial Processes - CO2,LBY,IN - Industrial Processes,CO2,2.499458
2,ippu,N2O,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_n2o_ippu_production_chemicals:em...,IN - Industrial Processes - N2O,LBY,IN - Industrial Processes,N2O,0.360349
3,ippu,SF6,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_sf6_ippu_production_chemicals:em...,IN - Industrial Processes - SF6,LBY,IN - Industrial Processes,SF6,0.560506


In [93]:
# Get the emission value for FC gases
fc_gas_total = inventory_region_year_df[(inventory_region_year_df["CSC Subsector"] == "IN - Industrial Processes") & (inventory_region_year_df["Gas"] == "HFC")][ISO3].values[0]
fc_gas_total

np.float64(4.191500967)

In [94]:
merged_df.loc[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (merged_df["Gas"] == "HFCS"), ISO3] = fc_gas_total
merged_df[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (merged_df["Gas"] == "HFCS")]


Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
54,ippu,HFCS,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_hfcs_ippu_product_use_product_us...,IN - Industrial Processes - HFCS,4.191501


In [95]:
# # divide the fc_gas_total between the Industrial Processes NaN rows
# num_of_ippu_nan_rows = merged_df[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (merged_df[ISO3].isna())].shape[0]
# fc_gas_per_row = fc_gas_total / num_of_ippu_nan_rows
# fc_gas_per_row

In [96]:
# # replace the NaNs in the Industrial Processes rows with the calculated fc_gas_per_row
# merged_df.loc[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (merged_df[ISO3].isna()), ISO3] = fc_gas_per_row

In [97]:
merged_df[merged_df[ISO3].isna()]

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
6,ccsq,CH4,CCSQ,CCSQ,CCSQ,emission_co2e_ch4_ccsq_direct_air_capture,CCSQ - CH4,
7,ccsq,CO2,CCSQ,CCSQ,CCSQ,emission_co2e_co2_ccsq_direct_air_capture,CCSQ - CO2,
8,ccsq,N2O,CCSQ,CCSQ,CCSQ,emission_co2e_n2o_ccsq_direct_air_capture,CCSQ - N2O,
21,scoe,CH4,Energy,EN - Other,EN - Other,emission_co2e_ch4_scoe_other_se,EN - Other - CH4,
22,scoe,CO2,Energy,EN - Other,EN - Other,emission_co2e_co2_scoe_nbmass_other_se,EN - Other - CO2,
23,scoe,N2O,Energy,EN - Other,EN - Other,emission_co2e_n2o_scoe_other_se,EN - Other - N2O,
27,ippu,C2F6,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_c2f6_ippu_product_use_product_us...,IN - Industrial Processes - C2F6,
28,ippu,C2H3F3,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_c2h3f3_ippu_product_use_product_...,IN - Industrial Processes - C2H3F3,
29,ippu,C2HF5,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_c2hf5_ippu_product_use_product_u...,IN - Industrial Processes - C2HF5,
30,ippu,C3F8,Industrial Processes,IN - Industrial Processes,IN - Industrial Processes,emission_co2e_c3f8_ippu_production_chemicals:e...,IN - Industrial Processes - C3F8,


In [98]:
merged_df[ISO3] = merged_df[ISO3].fillna(0)
merged_df.isna().sum()

Subsector                    0
Gas                          0
Edgar_Sector                 0
Edgar_Subsector              0
Edgar_Subsector_Synthetic    0
Vars                         0
id                           0
LBY                          0
dtype: int64

In [99]:
print("Original inventory total:", inventory_region_year_df[ISO3].sum())
print("Merged dataframe total:", merged_df[ISO3].sum())

Original inventory total: 104.513880277
Merged dataframe total: 106.42346668999998


## Fix discrepancies between totals in original inventory and merged inventory

In [101]:
duplicated_ids.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,1.909586
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,1.909586
62,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Removals,emission_co2e_co2_entc_bmass_processing_and_re...,LULUCF - Forest Land - CO2,
63,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Sequestration,emission_co2e_co2_frst_sequestration_mangroves...,LULUCF - Forest Land - CO2,


In [102]:
duplicated_ids.id.unique()

array(['AG - Livestock - CH4', 'LULUCF - Forest Land - CO2'], dtype=object)

In [103]:
merged_df[merged_df.id.isin(duplicated_ids.id.unique())]

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,1.909586
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,1.909586
62,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Removals,emission_co2e_co2_entc_bmass_processing_and_re...,LULUCF - Forest Land - CO2,0.0
63,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Sequestration,emission_co2e_co2_frst_sequestration_mangroves...,LULUCF - Forest Land - CO2,0.0


In [104]:
# Since we had duplicated ids in the crosswalk, we need to distribute the emissions values equally among the duplicated rows
lvst_ch4_value = merged_df[(merged_df["Subsector"] == "lvst") & (merged_df["Gas"] == "CH4")][ISO3].values[0]
lvst_ch4_value

np.float64(1.909586413)

In [105]:
value_to_distribute = lvst_ch4_value / 2
value_to_distribute

np.float64(0.9547932065)

In [106]:
# Update values
merged_df.loc[merged_df.id == "AG - Livestock - CH4", ISO3] = value_to_distribute

In [107]:
merged_df[merged_df.id.isin(duplicated_ids.id.unique())]

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,0.954793
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,0.954793
62,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Removals,emission_co2e_co2_entc_bmass_processing_and_re...,LULUCF - Forest Land - CO2,0.0
63,frst,CO2,"Land Use, Land Use Change, and Forestry",LULUCF - Forest Land,LULUCF - Forest Land Sequestration,emission_co2e_co2_frst_sequestration_mangroves...,LULUCF - Forest Land - CO2,0.0


In [108]:
print("Original inventory total:", inventory_region_year_df[ISO3].sum())
print("Merged dataframe total:", merged_df[ISO3].sum())

Original inventory total: 104.513880277
Merged dataframe total: 104.51388027699998


## Double check IPPU

In [109]:
inventory_region_year_df[inventory_region_year_df["CSC Subsector"] == "IN - Industrial Processes"]

Unnamed: 0,Code,CSC Subsector,Gas,LBY,id
3077,LBY,IN - Industrial Processes,CH4,0.003465,IN - Industrial Processes - CH4
3078,LBY,IN - Industrial Processes,CO2,2.499458,IN - Industrial Processes - CO2
3079,LBY,IN - Industrial Processes,HFC,4.191501,IN - Industrial Processes - HFC
3080,LBY,IN - Industrial Processes,N2O,0.360349,IN - Industrial Processes - N2O
3081,LBY,IN - Industrial Processes,SF6,0.560506,IN - Industrial Processes - SF6


In [112]:
merged_df[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (~merged_df["Gas"].isin(["CH4", "CO2", "N2O", "SF6"]))][ISO3].sum()

np.float64(4.191500967)

In [114]:
merged_df[(merged_df["Edgar_Subsector"] == "IN - Industrial Processes") & (merged_df["Gas"].isin(["CH4", "CO2", "N2O", "SF6"]))][ISO3]

49    0.003465
53    2.499458
55    0.360349
58    0.560506
Name: LBY, dtype: float64

## Save

In [115]:
merged_df["Edgar_Class"] = merged_df["Edgar_Subsector"] + ":" +merged_df["Gas"] 
merged_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Sector,Edgar_Subsector,Edgar_Subsector_Synthetic,Vars,id,LBY,Edgar_Class
0,agrc,CH4,Agriculture,AG - Crops,AG - Crops,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...,AG - Crops - CH4,0.000293,AG - Crops:CH4
1,agrc,CO2,Agriculture,AG - Crops,AG - Crops,emission_co2e_co2_agrc_biomass_bevs_and_spices...,AG - Crops - CO2,0.01588,AG - Crops:CO2
2,agrc,N2O,Agriculture,AG - Crops,AG - Crops,emission_co2e_n2o_agrc_biomass_burning:emissio...,AG - Crops - N2O,0.759204,AG - Crops:N2O
3,lvst,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lvst_entferm_buffalo:emissio...,AG - Livestock - CH4,0.954793,AG - Livestock:CH4
4,lsmm,CH4,Agriculture,AG - Livestock,AG - Livestock,emission_co2e_ch4_lsmm_anaerobic_digester:emis...,AG - Livestock - CH4,0.954793,AG - Livestock:CH4


In [116]:
merged_df.to_csv(os.path.join(OUTPUT_DIR_PATH, f"emission_targets_{REGION}_{YEAR}.csv"), index=False)