In [1]:
import pandas as pd
import os

In [2]:
# Define the current directory and output data path
SCRIPT_DIR_PATH = os.getcwd()
PARENT_DIR_PATH = os.path.dirname(SCRIPT_DIR_PATH)
TABLEAU_DATA_PATH = os.path.join(PARENT_DIR_PATH, "Tableau/data")
OUTPUTS_DATA_DIR_PATH = os.path.join(PARENT_DIR_PATH, "ssp_run_output")
MAPPING_CORRECTED_PATH = os.path.join(SCRIPT_DIR_PATH, "data/mapping_corrected_uganda.csv")

In [3]:
rescaled_df = pd.read_csv(os.path.join(TABLEAU_DATA_PATH, "emissions_uganda_sisepuede_run_2025-08-08T19;27;48.212333.csv"))
original_df = pd.read_csv(os.path.join(OUTPUTS_DATA_DIR_PATH, "sisepuede_run_2025-08-08T19;27;48.212333/sisepuede_run_2025-08-08T19;27;48.212333.csv"))

In [4]:
mapping_df = pd.read_csv(MAPPING_CORRECTED_PATH)
mapping_df.head()

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,Vars
0,lvst,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lvst_entferm_buffalo:emissio...
1,lsmm,ch4,AG - Livestock:CH4,AG - Livestock,Agriculture,emission_co2e_ch4_lsmm_anaerobic_digester:emis...
2,lsmm,n2o,AG - Livestock:N2O,AG - Livestock,Agriculture,emission_co2e_n2o_lsmm_direct_anaerobic_digest...
3,agrc,co2,AG - Crops:CO2,AG - Crops,Agriculture,emission_co2e_co2_agrc_biomass_bevs_and_spices...
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,emission_co2e_ch4_agrc_anaerobicdom_rice:emiss...


In [5]:
rescaled_df.head()

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source
0,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.576887,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
1,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.57962,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
2,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.582325,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
3,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.585001,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
4,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.587649,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE


In [6]:
original_df.head()

Unnamed: 0,primary_id,region,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,...,yf_agrc_herbs_and_other_perennial_crops_tonne_ha,yf_agrc_nuts_tonne_ha,yf_agrc_other_annual_tonne_ha,yf_agrc_other_woody_perennial_tonne_ha,yf_agrc_pulses_tonne_ha,yf_agrc_rice_tonne_ha,yf_agrc_sugar_cane_tonne_ha,yf_agrc_tubers_tonne_ha,yf_agrc_vegetables_and_vines_tonne_ha,yf_lndu_supremum_pastures_tonne_per_ha
0,0,uganda,0,747901.028486,2386299.0,183833.867477,1333131.0,595.32975,605228.344795,3433943.0,...,2.123306,0.762027,0.982737,0.3674,0.784492,2.755775,76.7943,4.44602,4.002223,92.81
1,0,uganda,1,748561.556254,2388407.0,183996.225021,1334309.0,595.855531,605762.867563,3436975.0,...,2.171183,0.716171,0.984473,0.3674,0.803826,2.826323,77.459654,4.558346,4.070985,92.81
2,0,uganda,2,750426.731475,2394358.0,184454.684578,1337633.0,597.340212,607272.234269,3445539.0,...,2.136681,0.771066,0.999795,0.3674,0.80839,2.908776,77.263223,5.202683,4.007029,92.81
3,0,uganda,3,754597.062701,2407664.0,185479.750849,1345067.0,600.659798,610647.015916,3464687.0,...,2.134035,0.744831,1.441325,0.3674,0.804774,2.961356,76.78493,5.605283,4.014348,92.81
4,0,uganda,4,759702.879272,2423955.0,186734.75916,1354168.0,604.724032,614778.825868,3488130.0,...,2.172065,0.45812,1.528325,0.3674,0.799814,2.970174,77.726965,4.805888,4.035314,92.81


In [7]:
import pandas as pd
from typing import List, Optional, Tuple, Dict

def edgar_totals_from_sim(
    sim_df: pd.DataFrame,
    crosswalk_df: pd.DataFrame,
    primary_id: int,
    vars_col: str = "Vars",
    meta_cols: Optional[List[str]] = None,
) -> Tuple[pd.DataFrame, Dict[int, List[str]]]:
    """
    Compute EDGAR-style totals from simulation output using a crosswalk.

    Parameters
    ----------
    sim_df : pd.DataFrame
        Simulation output with columns like ['primary_id','region','time_period', <many vars...>].
    crosswalk_df : pd.DataFrame
        Crosswalk with a column `vars_col` containing colon-separated variable names to sum.
        Other columns (e.g., 'Subsector','Gas','Edgar_Class','Edgar_Subsector','Edgar_Sector') are carried through.
    primary_id : int
        The primary_id to filter in sim_df.
    vars_col : str, default "Vars"
        Name of the column in crosswalk_df containing colon-separated variable names.
    meta_cols : list[str] or None
        Crosswalk columns to include in the output. If None, uses all columns except `vars_col`.

    Returns
    -------
    totals_df : pd.DataFrame
        Long dataframe with one row per (crosswalk row × region × time_period),
        including crosswalk metadata + ['primary_id','region','time_period','total'].
    missing_by_row : dict[int, list[str]]
        Dict mapping crosswalk row index -> list of missing columns (if any).
    """
    if meta_cols is None:
        meta_cols = [c for c in crosswalk_df.columns if c != vars_col]

    # 1) Filter sim_df by primary_id
    sdf = sim_df[sim_df["primary_id"] == primary_id].copy()
    if sdf.empty:
        raise ValueError(f"No rows found in sim_df for primary_id={primary_id}")

    # Ensure required id columns exist
    for needed in ["region", "time_period"]:
        if needed not in sdf.columns:
            raise ValueError(f"sim_df is missing required column: '{needed}'")

    # 2) Parse crosswalk vars into lists
    def _parse_vars(s: str) -> List[str]:
        if pd.isna(s):
            return []
        return [v.strip() for v in str(s).split(":") if v.strip()]

    cw = crosswalk_df.copy()
    cw["_vars_list"] = cw[vars_col].apply(_parse_vars)

    # 3) Build union of all referenced columns that actually exist in sim_df
    all_cols = set().union(*cw["_vars_list"]) if len(cw) else set()
    sim_cols = [c for c in all_cols if c in sdf.columns]

    # We'll keep track of missing columns per crosswalk row
    missing_by_row: Dict[int, List[str]] = {}

    # 4) For fast summing with missing handling, we’ll select only referenced columns
    #    (if none referenced exist, we'll create an empty frame with same index)
    if sim_cols:
        sim_slice = sdf[["region", "time_period"] + sim_cols].copy()
    else:
        sim_slice = sdf[["region", "time_period"]].copy()

    out_rows = []

    # 5) For each crosswalk row, sum the requested variables across columns (row-wise)
    for i, row in cw.iterrows():
        cols_i = row["_vars_list"]

        # Identify which requested columns are present / missing
        present = [c for c in cols_i if c in sdf.columns]
        missing = [c for c in cols_i if c not in sdf.columns]
        if missing:
            missing_by_row[i] = missing

        if present:
            # Sum across present columns; reindex not needed since we already filtered to existing columns
            totals = sdf[present].sum(axis=1)
        else:
            # If all requested columns are missing, the total is zero
            totals = pd.Series(0.0, index=sdf.index)

        # Build a small output block with crosswalk metadata
        meta_vals = {c: row[c] for c in meta_cols if c in cw.columns}
        block = pd.DataFrame({
            **meta_vals,
            "primary_id": primary_id,
            "region": sdf["region"].values,
            "time_period": sdf["time_period"].values,
            "total": totals.values,
        })
        out_rows.append(block)

    totals_df = pd.concat(out_rows, ignore_index=True) if out_rows else pd.DataFrame(
        columns=meta_cols + ["primary_id", "region", "time_period", "total"]
    )

    # Optional: sort for readability
    sort_keys = [c for c in ["Edgar_Sector", "Edgar_Subsector", "Edgar_Class", "Subsector", "Gas"]
                 if c in totals_df.columns]
    totals_df = totals_df.sort_values(sort_keys + ["region", "time_period"]).reset_index(drop=True)

    # Clean up helper column
    if "_vars_list" in totals_df.columns:
        totals_df.drop(columns=["_vars_list"], errors="ignore", inplace=True)

    return totals_df, missing_by_row


In [8]:
totals_baseline, missing = edgar_totals_from_sim(
    sim_df=original_df,                 # your big 3973-col sim DataFrame
    crosswalk_df=mapping_df,            # your crosswalk like the sample shown
    primary_id=0,               # pick the baseline or any scenario ID
    vars_col="Vars",            # column in crosswalk with colon-separated vars
    meta_cols=["Subsector","Gas","Edgar_Class","Edgar_Subsector","Edgar_Sector"],  # carried through
)

print(totals_baseline.head())
# Inspect any crosswalk rows that referenced columns not present in sim_df:
for i, missing_cols in missing.items():
    print(f"Crosswalk row {i} missing {len(missing_cols)} columns (e.g., {missing_cols[:3]}...)")


  Subsector  Gas     Edgar_Class Edgar_Subsector Edgar_Sector  primary_id  \
0      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture           0   
1      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture           0   
2      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture           0   
3      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture           0   
4      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture           0   

   region  time_period     total  
0  uganda            0  0.640436  
1  uganda            1  0.641693  
2  uganda            2  0.644717  
3  uganda            3  0.656427  
4  uganda            4  0.661354  


In [9]:
totals_strategy, missing = edgar_totals_from_sim(
    sim_df=original_df,                 # your big 3973-col sim DataFrame
    crosswalk_df=mapping_df,            # your crosswalk like the sample shown
    primary_id=73073,               # pick the baseline or any scenario ID
    vars_col="Vars",            # column in crosswalk with colon-separated vars
    meta_cols=["Subsector","Gas","Edgar_Class","Edgar_Subsector","Edgar_Sector"],  # carried through
)

print(totals_strategy.head())
# Inspect any crosswalk rows that referenced columns not present in sim_df:
for i, missing_cols in missing.items():
    print(f"Crosswalk row {i} missing {len(missing_cols)} columns (e.g., {missing_cols[:3]}...)")


  Subsector  Gas     Edgar_Class Edgar_Subsector Edgar_Sector  primary_id  \
0      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture       73073   
1      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture       73073   
2      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture       73073   
3      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture       73073   
4      agrc  ch4  AG - Crops:CH4      AG - Crops  Agriculture       73073   

   region  time_period     total  
0  uganda            0  0.640436  
1  uganda            1  0.641693  
2  uganda            2  0.644717  
3  uganda            3  0.656427  
4  uganda            4  0.661354  


In [26]:
totals_baseline[totals_baseline["Edgar_Subsector"] == 'EN - Fugitive Emissions'].isna().sum()


Subsector          0
Gas                0
Edgar_Class        0
Edgar_Subsector    0
Edgar_Sector       0
primary_id         0
region             0
time_period        0
total              0
dtype: int64

In [25]:
totals_strategy[totals_strategy["Edgar_Subsector"] == 'EN - Fugitive Emissions'].isna().sum()

Subsector          0
Gas                0
Edgar_Class        0
Edgar_Subsector    0
Edgar_Sector       0
primary_id         0
region             0
time_period        0
total              0
dtype: int64

In [27]:
totals_concat = pd.concat([totals_baseline, totals_strategy], ignore_index=True)
totals_concat

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,primary_id,region,time_period,total
0,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,0,0.640436
1,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,1,0.641693
2,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,2,0.644717
3,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,3,0.656427
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,4,0.661354
...,...,...,...,...,...,...,...,...,...
7611,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,51,7.774600
7612,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,52,7.795886
7613,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,53,7.815411
7614,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,54,7.832774


In [28]:
totals_concat[totals_concat["Edgar_Subsector"] == 'EN - Fugitive Emissions'].isna().sum()

Subsector          0
Gas                0
Edgar_Class        0
Edgar_Subsector    0
Edgar_Sector       0
primary_id         0
region             0
time_period        0
total              0
dtype: int64

In [29]:
# add year column
totals_concat["Year"] = totals_concat["time_period"] + 2015
totals_concat

Unnamed: 0,Subsector,Gas,Edgar_Class,Edgar_Subsector,Edgar_Sector,primary_id,region,time_period,total,Year
0,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,0,0.640436,2015
1,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,1,0.641693,2016
2,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,2,0.644717,2017
3,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,3,0.656427,2018
4,agrc,ch4,AG - Crops:CH4,AG - Crops,Agriculture,0,uganda,4,0.661354,2019
...,...,...,...,...,...,...,...,...,...,...
7611,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,51,7.774600,2066
7612,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,52,7.795886,2067
7613,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,53,7.815411,2068
7614,trww,n2o,Waste - Wastewater Treatment:N2O,Waste - Wastewater Treatment,Waste,73073,uganda,54,7.832774,2069


In [30]:
# Keep relevant columns
totals_concat = totals_concat[["primary_id", "Edgar_Class", "Year", "total"]]
totals_concat

Unnamed: 0,primary_id,Edgar_Class,Year,total
0,0,AG - Crops:CH4,2015,0.640436
1,0,AG - Crops:CH4,2016,0.641693
2,0,AG - Crops:CH4,2017,0.644717
3,0,AG - Crops:CH4,2018,0.656427
4,0,AG - Crops:CH4,2019,0.661354
...,...,...,...,...
7611,73073,Waste - Wastewater Treatment:N2O,2066,7.774600
7612,73073,Waste - Wastewater Treatment:N2O,2067,7.795886
7613,73073,Waste - Wastewater Treatment:N2O,2068,7.815411
7614,73073,Waste - Wastewater Treatment:N2O,2069,7.832774


In [31]:
rescaled_df

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source
0,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.576887,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
1,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.579620,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
2,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.582325,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
3,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.585001,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
4,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.587649,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4634,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.400586,2018,N2O,,,Historical,UGA,uganda,EDGAR
4635,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.413628,2019,N2O,,,Historical,UGA,uganda,EDGAR
4636,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.426982,2020,N2O,,,Historical,UGA,uganda,EDGAR
4637,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.439325,2021,N2O,,,Historical,UGA,uganda,EDGAR


In [32]:
merged_df = pd.merge(rescaled_df, totals_concat, on=["primary_id", "Edgar_Class", "Year"], how="left")
merged_df

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source,total
0,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.576887,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.685441
1,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.579620,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.688689
2,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.582325,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.691902
3,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.585001,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.695081
4,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.587649,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.698228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7280,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.400586,2018,N2O,,,Historical,UGA,uganda,EDGAR,
7281,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.413628,2019,N2O,,,Historical,UGA,uganda,EDGAR,
7282,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.426982,2020,N2O,,,Historical,UGA,uganda,EDGAR,
7283,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.439325,2021,N2O,,,Historical,UGA,uganda,EDGAR,


In [33]:
merged_df["CSC.Subsector"].unique()

array(['AG - Crops', 'AG - Livestock', 'CCSQ', 'EN - Building',
       'EN - Electricity/Heat', 'EN - Fugitive Emissions',
       'EN - Manufacturing/Construction', 'EN - Transportation',
       'IN - Industrial Processes', 'LULUCF - Deforestation',
       'LULUCF - Forest Land', 'LULUCF - Organic Soil',
       'LULUCF - Other Land', 'Waste - Solid Waste',
       'Waste - Wastewater Treatment'], dtype=object)

In [37]:
subsectors_to_update = ['EN - Electricity/Heat', 'EN - Fugitive Emissions']
merged_df[(merged_df["CSC.Subsector"].isin(subsectors_to_update)) & (merged_df["strategy"] != "Historical")]

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source,total
588,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.003233,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.053458
589,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.003359,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.055540
590,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.003476,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.057472
591,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.003614,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.059750
592,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.003765,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.062256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4209,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2066,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4210,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2067,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4211,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2068,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4212,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2069,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000


In [38]:
mask = (
    merged_df["CSC.Subsector"].isin(subsectors_to_update)
    & (merged_df["strategy"] != "Historical")
)

# If you want to overwrite regardless:
merged_df.loc[mask, "value"] = merged_df.loc[mask, "total"]


In [39]:
merged_df[(merged_df["CSC.Subsector"].isin(subsectors_to_update)) & (merged_df["strategy"] != "Historical")]

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source,total
588,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.053458,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.053458
589,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.055540,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.055540
590,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.057472,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.057472
591,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.059750,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.059750
592,0.0,0.0,EN - Electricity/Heat:CH4,Energy,EN - Electricity/Heat,0.062256,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.062256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4209,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2066,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4210,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2067,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4211,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2068,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000
4212,6006.0,73073.0,EN - Fugitive Emissions:N2O,Energy,EN - Fugitive Emissions,0.000000,2069,N2O,0.0,0.0,NDC_2,UGA,uganda,SISEPUEDE,0.000000


In [40]:
merged_df

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source,total
0,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.576887,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.685441
1,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.579620,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.688689
2,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.582325,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.691902
3,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.585001,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.695081
4,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.587649,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE,0.698228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7280,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.400586,2018,N2O,,,Historical,UGA,uganda,EDGAR,
7281,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.413628,2019,N2O,,,Historical,UGA,uganda,EDGAR,
7282,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.426982,2020,N2O,,,Historical,UGA,uganda,EDGAR,
7283,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.439325,2021,N2O,,,Historical,UGA,uganda,EDGAR,


In [41]:
# drop total column
merged_df.drop(columns=["total"], inplace=True, errors="ignore")
merged_df

Unnamed: 0,strategy_id,primary_id,Edgar_Class,CSC.Sector,CSC.Subsector,value,Year,Gas,design_id,future_id,strategy,Code,Contry,source
0,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.576887,2022,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
1,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.579620,2023,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
2,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.582325,2024,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
3,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.585001,2025,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
4,0.0,0.0,AG - Crops:CH4,Agriculture,AG - Crops,0.587649,2026,CH4,0.0,0.0,Strategy TX:BASE,UGA,uganda,SISEPUEDE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7280,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.400586,2018,N2O,,,Historical,UGA,uganda,EDGAR
7281,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.413628,2019,N2O,,,Historical,UGA,uganda,EDGAR
7282,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.426982,2020,N2O,,,Historical,UGA,uganda,EDGAR
7283,,,Waste - Wastewater Treatment:N2O,Waste,Waste - Wastewater Treatment,0.439325,2021,N2O,,,Historical,UGA,uganda,EDGAR


In [42]:
# save the updated DataFrame
output_path = os.path.join(TABLEAU_DATA_PATH, "fgtv_entc_hotfix.csv")
merged_df.to_csv(output_path, index=False)