In [None]:
# import packages
import pandas as pd

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
import oge.emissions as emissions
import oge.create_nox_so2_factors
from oge.filepaths import *
from oge.logging_util import get_logger, configure_root_logger

from oge.constants import CLEAN_FUELS

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

# Create the new factor tables

In [None]:
nox_factors = oge.create_nox_so2_factors.generate_nox_emission_factor_reference_table()

In [None]:
so2_factors = oge.create_nox_so2_factors.generate_so2_emission_factor_reference_table()

### Check for duplicates

In [None]:
# be sure to check for and drop duplicates
nox_factors[
    nox_factors.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "wet_dry_bottom",
            "boiler_firing_type",
        ],
        keep=False,
    )
].sort_values(
    by=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ]
)

In [None]:
# be sure to check for and drop duplicates
so2_factors[
    so2_factors.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
        ],
        keep=False,
    )
].sort_values(
    by=[
        "prime_mover_code",
        "energy_source_code",
        "boiler_firing_type",
    ]
)

### Compare with existing factors

In [None]:
ref_nox = load_data.load_nox_emission_factors()

# let's compare to our current data
compare_nox = nox_factors.merge(
    ref_nox[
        [
            "energy_source_code",
            "prime_mover_code",
            "wet_dry_bottom",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "energy_source_code",
        "prime_mover_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ],
    suffixes=("_new", "_old"),
)
compare_nox = compare_nox[
    (
        compare_nox["emission_factor_new"].round(4)
        != compare_nox["emission_factor_old"].round(4)
    )
    & (~compare_nox["emission_factor_old"].isna())
]
compare_nox

In [None]:
ref_so2 = load_data.load_so2_emission_factors()

# let's compare to our current data
compare_so2 = so2_factors.merge(
    ref_so2[
        [
            "energy_source_code",
            "prime_mover_code",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "energy_source_code",
        "prime_mover_code",
        "boiler_firing_type",
    ],
    suffixes=("_new", "_old"),
)
compare_so2 = compare_so2[
    (
        compare_so2["emission_factor_new"].round(4)
        != compare_so2["emission_factor_old"].round(4)
    )
    & (~compare_so2["emission_factor_old"].isna())
]
compare_so2

### Check that all PM-BFT combinations are covered

In [None]:
gen_keys_for_so2 = load_data.load_pudl_table(
    "out_eia923__monthly_generation_fuel_by_generator_energy_source",
    columns=[
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
        "fuel_consumed_mmbtu",
    ],
)

gen_keys_for_so2 = gen_keys_for_so2[gen_keys_for_so2["fuel_consumed_mmbtu"] > 0]
gen_keys_for_so2 = gen_keys_for_so2[
    [
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
    ]
].drop_duplicates()
gen_keys_for_so2 = gen_keys_for_so2[
    ~gen_keys_for_so2["energy_source_code"].isin(CLEAN_FUELS + ["GEO"])
]

# load the boiler firing type info
boiler_firing_type = emissions.load_boiler_firing_type(year)
# drop the boiler bottom type data
boiler_firing_type = boiler_firing_type.drop(columns="wet_dry_bottom")
boiler_firing_type = boiler_firing_type.drop_duplicates()

# identify the boiler firing type for each generator
boiler_generator_assn = load_data.load_pudl_table(
    "core_eia860__assn_boiler_generator",
    year,
    columns=["plant_id_eia", "boiler_id", "generator_id"],
)
# associate each boiler record with generator_id s
boiler_firing_type = boiler_firing_type.merge(
    boiler_generator_assn,
    how="left",
    on=["plant_id_eia", "boiler_id"],
    validate="1:m",
)

# merge the gen keys with the boiler firing types
gen_types = gen_keys_for_so2.merge(
    boiler_firing_type,
    how="left",
    on=["plant_id_eia", "generator_id"],
    validate="m:m",
)
gen_types = gen_types[
    gen_types["prime_mover_code"].isin(
        ["GT", "IC", "ST", "OT", "CA", "CT", "CS", "CS", "CE"]
    )
]
gen_types["boiler_firing_type"] = gen_types["boiler_firing_type"].fillna("none")

missing_so2 = gen_types.merge(
    so2_factors[
        [
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "prime_mover_code",
        "boiler_firing_type",
        "energy_source_code",
    ],
)
missing_so2 = (
    missing_so2[missing_so2["emission_factor"].isna()][
        [
            "prime_mover_code",
            "boiler_firing_type",
            "energy_source_code",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        by=[
            "energy_source_code",
            "prime_mover_code",
            "boiler_firing_type",
        ]
    )
)
missing_so2

In [None]:
# get a dataframe with all unique generator-pm-esc combinations for emitting energy
# source types with data reported
gen_keys_for_nox = load_data.load_pudl_table(
    "out_eia923__monthly_generation_fuel_by_generator_energy_source",
    columns=[
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
        "fuel_consumed_mmbtu",
    ],
)
gen_keys_for_nox = gen_keys_for_nox[(gen_keys_for_nox["fuel_consumed_mmbtu"] > 0)]
gen_keys_for_nox = gen_keys_for_nox[
    [
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
    ]
].drop_duplicates()
gen_keys_for_nox = gen_keys_for_nox[
    ~gen_keys_for_nox["energy_source_code"].isin(CLEAN_FUELS + ["GEO"])
]

nox_emission_factors = load_data.load_nox_emission_factors()
# remove duplicate factors
nox_emission_factors = nox_emission_factors.drop_duplicates(
    subset=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ]
)

# load the boiler firing type info
boiler_firing_type = emissions.load_boiler_firing_type(year)

# identify the boiler firing type for each generator
boiler_generator_assn = load_data.load_pudl_table(
    "core_eia860__assn_boiler_generator",
    year,
    columns=["plant_id_eia", "boiler_id", "generator_id"],
)
# associate each boiler record with generator_id s
boiler_firing_type = boiler_firing_type.merge(
    boiler_generator_assn,
    how="left",
    on=["plant_id_eia", "boiler_id"],
    validate="1:m",
)

# merge the gen keys with the boiler firing types
gen_nox_factors = gen_keys_for_nox.merge(
    boiler_firing_type,
    how="left",
    on=["plant_id_eia", "generator_id"],
    validate="m:m",
)

gen_nox_factors["wet_dry_bottom"] = gen_nox_factors["wet_dry_bottom"].fillna("none")
gen_nox_factors["boiler_firing_type"] = gen_nox_factors["boiler_firing_type"].fillna(
    "none"
)

missing_nox = (
    gen_nox_factors[
        [
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
            "wet_dry_bottom",
        ]
    ]
    .drop_duplicates()
    .merge(
        nox_factors[
            [
                "prime_mover_code",
                "energy_source_code",
                "wet_dry_bottom",
                "boiler_firing_type",
                "emission_factor",
            ]
        ],
        how="left",
        on=[
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ],
    )
)
missing_nox = (
    missing_nox[missing_nox["emission_factor"].isna()][
        [
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        by=[
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
        ]
    )
)
missing_nox

## Export the factors to the reference table folder

In [None]:
nox_factors.to_csv(reference_table_folder("emission_factors_for_nox.csv"), index=False)
so2_factors.to_csv(reference_table_folder("emission_factors_for_so2.csv"), index=False)