In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

In [None]:
# todo: update synthesis factors

In [None]:
# download the nox and so2 tables
# so2: https://www.eia.gov/electricity/annual/xls/epa_a_01.xlsx
# nox: https://www.eia.gov/electricity/annual/xls/epa_a_02.xlsx

In [None]:
from oge.constants import ConversionFactors

In [None]:
import oge.load_data as load_data

In [None]:
import oge.create_nox_so2_factors

In [None]:
nox_factors = oge.create_nox_so2_factors.generate_nox_emission_factor_reference_table()

In [None]:
so2_factors = oge.create_nox_so2_factors.generate_so2_emission_factor_reference_table()

### Check for duplicates

In [None]:
# be sure to check for and drop duplicates
nox_factors[
    nox_factors.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "wet_dry_bottom",
            "boiler_firing_type",
        ],
        keep=False,
    )
].sort_values(
    by=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ]
)

In [None]:
# be sure to check for and drop duplicates
so2_factors[
    so2_factors.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
        ],
        keep=False,
    )
].sort_values(
    by=[
        "prime_mover_code",
        "energy_source_code",
        "boiler_firing_type",
    ]
)

### Compare with existing factors

In [None]:
ref_nox = load_data.load_nox_emission_factors()

# let's compare to our current data
compare = nox_factors.merge(
    ref_nox[
        [
            "energy_source_code",
            "prime_mover_code",
            "wet_dry_bottom",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "energy_source_code",
        "prime_mover_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ],
    suffixes=("_new", "_old"),
)
compare[
    (compare["emission_factor_new"].round(4) != compare["emission_factor_old"].round(4))
    & (~compare["emission_factor_old"].isna())
]

In [None]:
ref_so2 = load_data.load_so2_emission_factors()

# let's compare to our current data
compare = so2_factors.merge(
    ref_so2[
        [
            "energy_source_code",
            "prime_mover_code",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "energy_source_code",
        "prime_mover_code",
        "boiler_firing_type",
    ],
    suffixes=("_new", "_old"),
)
compare[
    (compare["emission_factor_new"].round(4) != compare["emission_factor_old"].round(4))
    & (~compare["emission_factor_old"].isna())
]

### Check that all PM-BFT combinations are covered

# SO2 Factors

In [None]:
ref_so2 = pd.read_csv(
    reference_table_folder("emission_factors_for_so2.csv"),
    dtype=get_dtypes(),
)

# Add a boolean column that reports whether the emission factor is a formula or
# value.
ref_so2["multiply_by_sulfur_content"] = (
    ref_so2["emission_factor"].str.contains("*", regex=False).astype(int)
)

# Extract the numeric coefficient from the emission factor.
ref_so2["emission_factor"] = (
    ref_so2["emission_factor"].str.replace("*S", "", regex=False).astype(float)
)

# standardize units as lower case
ref_so2["emission_factor_denominator"] = ref_so2[
    "emission_factor_denominator"
].str.lower()

In [None]:
"""
The EIA-provided factors are listed for a combination of boiler firing types (BFT) and prime movers.
Of the several dozen prime movers, only four types are associated with thermal generators that combust fuel:
steam turbines (ST), combustion turbines (GT), combined cycle turbines (CS, CT, CA, CC), internal combustion engines (IC), and possibly other (OT).
Prime-mover specific factors are provided for GT and IC prime movers, and are assumed to apply to all boiler firing types, if provided. 
Because combined cycle plants combust fuel in the combustion turbine part, GT factors are applied to all combined cycle prime mover components.
The BFT-specific factors are used only for ST and OT prime movers
"""


so2_factors_eia = pd.read_excel(
    downloads_folder("eia/epa_a_01.xlsx"),
    header=2,
    names=[
        "fuel",
        "energy_source_code",
        "data_source",
        "unit",
        "cyclone_firing",
        "fluidized_bed_firing",
        "stoker",
        "tangential_firing",
        "other",
        "GT",
        "IC",
    ],
    skipfooter=1,
)

# standardize the unit column
so2_factors_eia["emission_factor_numerator"] = (
    so2_factors_eia["unit"].str.split(" ", expand=True)[0].replace("Lbs", "lb")
)
so2_factors_eia["emission_factor_denominator"] = so2_factors_eia["unit"].str.split(
    " ", expand=True
)[2]

factor_columns = [
    "cyclone_firing",
    "fluidized_bed_firing",
    "stoker",
    "tangential_firing",
    "other",
    "GT",
    "IC",
]

# convert the denominator units into units used by EIA-923
# convert million cubic feet (MMCF) to thousand cubic feet (Mcf)
so2_factors_eia.loc[
    so2_factors_eia["emission_factor_denominator"] == "MMCF", factor_columns
] /= 1000
so2_factors_eia.loc[
    so2_factors_eia["emission_factor_denominator"] == "MMCF",
    "emission_factor_denominator",
] = "mcf"
# convert thousand gallons (MG) to barrels
so2_factors_eia.loc[
    so2_factors_eia["emission_factor_denominator"] == "MG", factor_columns
] /= ConversionFactors.kgal_to_barrel
so2_factors_eia.loc[
    so2_factors_eia["emission_factor_denominator"] == "MG",
    "emission_factor_denominator",
] = "barrel"


# identify which fuels need to be multiplied by the sulfur content
so2_factors_eia["multiply_by_sulfur_content"] = 0
# where the fuel name has an "*" in the raw spreadsheet, this means the factor needs to
# be adjusted by the fuel sulfur content.
so2_factors_eia.loc[
    so2_factors_eia["fuel"].str.contains("\*"), "multiply_by_sulfur_content"
] = 1

# add data for fuels with no reported emission factors
# MSB/MSN: use the same factor as MSW
# SC: Assume the same as RC, which is assumed to be the same as BIT
so2_factors_eia = so2_factors_eia.set_index("energy_source_code")
for new_esc, esc_to_copy in {"MSB": "MSW", "MSN": "MSW", "SC": "RC"}.items():
    so2_factors_eia.loc[new_esc, :] = so2_factors_eia.loc[esc_to_copy, :]
so2_factors_eia = so2_factors_eia.reset_index()

# separate the data by boiler-specific and PM-specific factors
so2_factors_eia_bft = so2_factors_eia[
    [
        "energy_source_code",
        "data_source",
        "cyclone_firing",
        "fluidized_bed_firing",
        "stoker",
        "tangential_firing",
        "other",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "multiply_by_sulfur_content",
    ]
].copy()

# add columns for specific boiler firing types that are not mentioned specifically
# use the "All other Boiler Types" column for this
other_firing_types = [
    "cell_burner",
    "duct_burner",
    "vertical_firing",
    "wall_fired",
]
for bft in other_firing_types:
    so2_factors_eia_bft[bft] = so2_factors_eia_bft["other"]

# melt the data into long form
so2_factors_eia_bft = so2_factors_eia_bft.melt(
    id_vars=[
        "energy_source_code",
        "data_source",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "multiply_by_sulfur_content",
    ],
    var_name="boiler_firing_type",
    value_name="emission_factor",
).dropna()

# add prime mover columns to this data
# these BFT-specific factors generally only apply to ST and OT prime movers.
# However, sometimes combined cycle units can have duct burners or other boilers as part
# of a HRSG (https://www.eia.gov/todayinenergy/detail.php?id=52778). Thus, we will also
# add factors for these prime movers as well,
# To add all at the same time we will create a dataframe with both and merge it in to
# easily duplicate for each row
so2_factors_eia_bft = so2_factors_eia_bft.merge(
    pd.DataFrame(
        columns=["emission_factor_numerator", "prime_mover_code"],
        data=[
            ["lb", "ST"],
            ["lb", "OT"],
            ["lb", "GT"],
            ["lb", "CA"],
            ["lb", "CS"],
            ["lb", "CT"],
        ],
    ),
    how="left",
    on="emission_factor_numerator",
)

# create a prime-mover specific table
so2_factors_eia_pm = so2_factors_eia[
    [
        "energy_source_code",
        "data_source",
        "GT",
        "IC",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "multiply_by_sulfur_content",
    ]
].copy()

# for combined cycle parts, apply the GT prime mover
for pm in ["CA", "CC", "CS", "CT"]:
    if bft not in so2_factors_eia_pm.columns:
        so2_factors_eia_pm[pm] = so2_factors_eia_pm["GT"]

so2_factors_eia_pm = so2_factors_eia_pm.melt(
    id_vars=[
        "energy_source_code",
        "data_source",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "multiply_by_sulfur_content",
    ],
    var_name="prime_mover_code",
    value_name="emission_factor",
).dropna()

# add a column for the boiler firing type and set as "none"
so2_factors_eia_pm["boiler_firing_type"] = "none"

# concat these back toghether and organize columns
so2_factors_eia_combined = pd.concat([so2_factors_eia_bft, so2_factors_eia_pm], axis=0)
so2_factors_eia_combined = so2_factors_eia_combined[
    [
        "prime_mover_code",
        "energy_source_code",
        "boiler_firing_type",
        "emission_factor",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "multiply_by_sulfur_content",
        # "data_source"
    ]
].sort_values(by=["energy_source_code", "prime_mover_code", "boiler_firing_type"])

In [None]:
# be sure to check for and drop duplicates
so2_factors[
    so2_factors.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
        ],
        keep=False,
    )
]

In [None]:
# let's compare to our current data
compare = so2_factors_eia_combined.merge(
    ref_so2[
        [
            "energy_source_code",
            "prime_mover_code",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=["energy_source_code", "prime_mover_code", "boiler_firing_type"],
)
compare[compare["emission_factor_x"].round(4) != compare["emission_factor_y"].round(4)]

In [None]:
# let's compare to our current data
compare_2 = ref_so2[
    [
        "energy_source_code",
        "prime_mover_code",
        "boiler_firing_type",
        "emission_factor",
    ]
].merge(
    so2_factors_eia_combined,
    how="left",
    on=["energy_source_code", "prime_mover_code", "boiler_firing_type"],
)
compare_2[
    (compare_2["emission_factor_x"].round(4) != compare_2["emission_factor_y"].round(4))
    & ~compare_2["emission_factor_y"].isna()
]

## NOx factors

In [None]:
nox_factors_eia = pd.read_excel(
    downloads_folder("eia/epa_a_02.xlsx"),
    header=3,
    names=[
        "fuel",
        "energy_source_code",
        "data_source",
        "unit",
        "cyclone_firing",
        "fluidized_bed_firing",
        "stoker",
        "tangential_firing_dry",
        "tangential_firing_wet",
        "other_dry",
        "other_wet",
        "GT",
        "IC",
    ],
    skipfooter=1,
)

# standardize the unit column
nox_factors_eia["emission_factor_numerator"] = (
    nox_factors_eia["unit"].str.split(" ", expand=True)[0].replace("Lbs", "lb")
)
nox_factors_eia["emission_factor_denominator"] = nox_factors_eia["unit"].str.split(
    " ", expand=True
)[2]

factor_columns = [
    "cyclone_firing",
    "fluidized_bed_firing",
    "stoker",
    "tangential_firing_dry",
    "tangential_firing_wet",
    "other_dry",
    "other_wet",
    "GT",
    "IC",
]

# convert the denominator units into units used by EIA-923
# convert million cubic feet (MMCF) to thousand cubic feet (Mcf)
nox_factors_eia.loc[
    nox_factors_eia["emission_factor_denominator"] == "MMCF", factor_columns
] /= 1000
nox_factors_eia.loc[
    nox_factors_eia["emission_factor_denominator"] == "MMCF",
    "emission_factor_denominator",
] = "mcf"
# convert thousand gallons (MG) to barrels
nox_factors_eia.loc[
    nox_factors_eia["emission_factor_denominator"] == "MG", factor_columns
] /= ConversionFactors.kgal_to_barrel
nox_factors_eia.loc[
    nox_factors_eia["emission_factor_denominator"] == "MG",
    "emission_factor_denominator",
] = "barrel"

# add data for fuels with no reported emission factors
# MSB/MSN: use the same factor as MSW
# add data for fuels with no reported emission factors
# MSB/MSN: use the same factor as MSW
# SC: Assume the same as RC, which is assumed to be the same as BIT
nox_factors_eia = nox_factors_eia.set_index("energy_source_code")
for new_esc, esc_to_copy in {"MSB": "MSW", "MSN": "MSW", "SC": "RC"}.items():
    nox_factors_eia.loc[new_esc, :] = nox_factors_eia.loc[esc_to_copy, :]
nox_factors_eia = nox_factors_eia.reset_index()

# where we have a prime mover with no BFT or WDB information, we assume a dry-bottom
# boiler in the "all other" BFT category. This is consistent with the assumption made
# by the EIA. see: https://www.eia.gov/electricity/annual/pdf/tech_notes.pdf
for pm in ["ST", "OT"]:
    nox_factors_eia[pm] = nox_factors_eia["other_dry"]

# if there are prime-mover energy source code combinations that are not provided in EIA,
# (like a BIT-fueled GT), fill these missing values using the "other_dry" factor as well
# as a backstop
nox_factors_eia["GT"] = nox_factors_eia["GT"].fillna(nox_factors_eia["other_dry"])

# separate the data by boiler-specific and PM-specific factors
nox_factors_eia_bft = nox_factors_eia[
    [
        "energy_source_code",
        "data_source",
        "cyclone_firing",
        "fluidized_bed_firing",
        "stoker",
        "tangential_firing_dry",
        "tangential_firing_wet",
        "other_dry",
        "other_wet",
        "emission_factor_numerator",
        "emission_factor_denominator",
    ]
].copy()

# we need to fill out all possible combinations of BFT and WDB
# for columns where BFT is specified without difference to WDB, we set all WDB options
# equal to each other so that wet = dry = none
for col in [
    "cyclone_firing",
    "fluidized_bed_firing",
    "stoker",
]:
    nox_factors_eia_bft[f"{col}_wet"] = nox_factors_eia_bft[col]
    nox_factors_eia_bft[f"{col}_dry"] = nox_factors_eia_bft[col]
# for columns where WDB is specified, we set the "none" WDB equal to the dry bottom
# parameter. This is consistent with the assumption EIA makes:
# see: https://www.eia.gov/electricity/annual/pdf/tech_notes.pdf
for col in ["tangential_firing", "other"]:
    nox_factors_eia_bft[f"{col}_none"] = nox_factors_eia_bft[f"{col}_dry"]


# NOTE: This is where the nox and so2 are different with wet and dry
# add columns for specific boiler firing types that are not mentioned specifically
# use the "All other Boiler Types" column for this
other_firing_types = [
    "cell_burner",
    "duct_burner",
    "vertical_firing",
    "wall_fired",
]
for bft in other_firing_types:
    nox_factors_eia_bft[f"{bft}_wet"] = nox_factors_eia_bft["other_wet"]
    nox_factors_eia_bft[f"{bft}_dry"] = nox_factors_eia_bft["other_dry"]
    nox_factors_eia_bft[f"{bft}_none"] = nox_factors_eia_bft["other_none"]
# create wet and dry columns for unknown BFTs
nox_factors_eia_bft["none_wet"] = nox_factors_eia_bft["other_wet"]
nox_factors_eia_bft["none_dry"] = nox_factors_eia_bft["other_dry"]


# melt the data into long form
nox_factors_eia_bft = nox_factors_eia_bft.melt(
    id_vars=[
        "energy_source_code",
        "data_source",
        "emission_factor_numerator",
        "emission_factor_denominator",
    ],
    var_name="boiler_firing_type",
    value_name="emission_factor",
).dropna()

# create a wet_dry_bottom column and remove this suffix from the boiler firing type
nox_factors_eia_bft["wet_dry_bottom"] = "none"
nox_factors_eia_bft["boiler_firing_type"] = nox_factors_eia_bft[
    "boiler_firing_type"
].str.replace("_none", "")
nox_factors_eia_bft.loc[
    nox_factors_eia_bft["boiler_firing_type"].str.contains("_wet"), "wet_dry_bottom"
] = "wet"
nox_factors_eia_bft.loc[
    nox_factors_eia_bft["boiler_firing_type"].str.contains("_dry"), "wet_dry_bottom"
] = "dry"
nox_factors_eia_bft["boiler_firing_type"] = nox_factors_eia_bft[
    "boiler_firing_type"
].str.replace("_dry", "")
nox_factors_eia_bft["boiler_firing_type"] = nox_factors_eia_bft[
    "boiler_firing_type"
].str.replace("_wet", "")


# add prime mover columns to this data
# these BFT-specific factors generally only apply to ST and OT prime movers.
# However, sometimes combined cycle units can have duct burners or other boilers as part
# of a HRSG (https://www.eia.gov/todayinenergy/detail.php?id=52778). Thus, we will also
# add factors for these prime movers as well,
# To add all at the same time we will create a dataframe with both and merge it in to
# easily duplicate for each row
nox_factors_eia_bft = nox_factors_eia_bft.merge(
    pd.DataFrame(
        columns=["emission_factor_numerator", "prime_mover_code"],
        data=[
            ["lb", "ST"],
            ["lb", "OT"],
            ["lb", "GT"],
            ["lb", "CA"],
            ["lb", "CS"],
            ["lb", "CT"],
        ],
    ),
    how="left",
    on="emission_factor_numerator",
)


# create a prime-mover specific table
nox_factors_eia_pm = nox_factors_eia[
    [
        "energy_source_code",
        "data_source",
        "GT",
        "IC",
        "ST",
        "OT",
        "emission_factor_numerator",
        "emission_factor_denominator",
    ]
].copy()

# for combined cycle parts, apply the GT prime mover
# also apply to the CE prime mover used for the CAES system at plant 7063
for pm in ["CA", "CC", "CS", "CT", "CE"]:
    nox_factors_eia_pm[pm] = nox_factors_eia_pm["GT"]

nox_factors_eia_pm = nox_factors_eia_pm.melt(
    id_vars=[
        "energy_source_code",
        "data_source",
        "emission_factor_numerator",
        "emission_factor_denominator",
    ],
    var_name="prime_mover_code",
    value_name="emission_factor",
).dropna()

# add a column for the boiler firing type and set as "none"
nox_factors_eia_pm["boiler_firing_type"] = "none"
nox_factors_eia_pm["wet_dry_bottom"] = "none"


# concat these back toghether and organize columns
nox_factors_eia_combined = pd.concat([nox_factors_eia_bft, nox_factors_eia_pm], axis=0)
nox_factors_eia_combined = nox_factors_eia_combined[
    [
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
        "emission_factor",
        "emission_factor_numerator",
        "emission_factor_denominator",
        "data_source",
    ]
].sort_values(by=["energy_source_code", "prime_mover_code", "boiler_firing_type"])

nox_factors_eia_combined

In [None]:
test = (
    gen_nox_factors[
        [
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
            "wet_dry_bottom",
        ]
    ]
    .drop_duplicates()
    .merge(
        nox_factors_eia_combined[
            [
                "prime_mover_code",
                "energy_source_code",
                "wet_dry_bottom",
                "boiler_firing_type",
                "emission_factor",
            ]
        ],
        how="left",
        on=[
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ],
    )
)
test2 = (
    test[test["emission_factor"].isna()][
        [
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        by=[
            "energy_source_code",
            "prime_mover_code",
            "boiler_firing_type",
        ]
    )
)
test2

In [None]:
# be sure to check for and drop duplicates
nox_factors_eia_combined[
    nox_factors_eia_combined.duplicated(
        subset=[
            "prime_mover_code",
            "energy_source_code",
            "wet_dry_bottom",
            "boiler_firing_type",
        ],
        keep=False,
    )
].sort_values(
    by=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ]
)

In [None]:
ref_nox = load_data.load_nox_emission_factors()
ref_nox

## Test that we have factors for all of the possible fuel-PM-BFT combos

In [None]:
gen_keys_for_so2 = load_data.load_pudl_table(
    "out_eia923__monthly_generation_fuel_by_generator_energy_source",
    columns=[
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
        "fuel_consumed_mmbtu",
    ],
)

gen_keys_for_so2 = gen_keys_for_so2[gen_keys_for_so2["fuel_consumed_mmbtu"] > 0]
gen_keys_for_so2 = gen_keys_for_so2[
    [
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
    ]
].drop_duplicates()

# load the boiler firing type info
boiler_firing_type = emissions.load_boiler_firing_type(year)
# drop the boiler bottom type data
boiler_firing_type = boiler_firing_type.drop(columns="wet_dry_bottom")
boiler_firing_type = boiler_firing_type.drop_duplicates()

# identify the boiler firing type for each generator
boiler_generator_assn = load_data.load_pudl_table(
    "core_eia860__assn_boiler_generator",
    year,
    columns=["plant_id_eia", "boiler_id", "generator_id"],
)
# associate each boiler record with generator_id s
boiler_firing_type = boiler_firing_type.merge(
    boiler_generator_assn,
    how="left",
    on=["plant_id_eia", "boiler_id"],
    validate="1:m",
)

# merge the gen keys with the boiler firing types
gen_types = gen_keys_for_so2.merge(
    boiler_firing_type,
    how="left",
    on=["plant_id_eia", "generator_id"],
    validate="m:m",
)
gen_types = gen_types[
    gen_types["prime_mover_code"].isin(["GT", "IC", "ST", "OT", "CA", "CT", "CS", "CS"])
]
gen_types["boiler_firing_type"] = gen_types["boiler_firing_type"].fillna("none")

In [None]:
test = gen_types.merge(
    so2_factors_eia_combined[
        [
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
            "emission_factor",
        ]
    ],
    how="left",
    on=[
        "prime_mover_code",
        "boiler_firing_type",
        "energy_source_code",
    ],
)
test[test["emission_factor"].isna()][
    [
        "prime_mover_code",
        "boiler_firing_type",
        "energy_source_code",
    ]
].drop_duplicates().sort_values(
    by=[
        "prime_mover_code",
        "boiler_firing_type",
        "energy_source_code",
    ]
)

In [None]:
test2 = (
    test[test["emission_factor"].isna()][
        [
            "prime_mover_code",
            "boiler_firing_type",
            "energy_source_code",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        by=[
            "prime_mover_code",
            "boiler_firing_type",
            "energy_source_code",
        ]
    )
)
test2

In [None]:
test[test["emission_factor"].isna()]

## NOx

In [None]:
from oge.constants import CLEAN_FUELS

In [None]:
# get a dataframe with all unique generator-pm-esc combinations for emitting energy
# source types with data reported

gen_keys_for_nox = load_data.load_pudl_table(
    "out_eia923__monthly_generation_fuel_by_generator_energy_source",
    columns=[
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
        "fuel_consumed_mmbtu",
    ],
)
gen_keys_for_nox = gen_keys_for_nox[(gen_keys_for_nox["fuel_consumed_mmbtu"] > 0)]
gen_keys_for_nox = gen_keys_for_nox[
    [
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "energy_source_code",
    ]
].drop_duplicates()
gen_keys_for_nox = gen_keys_for_nox[
    ~gen_keys_for_nox["energy_source_code"].isin(CLEAN_FUELS)
]

nox_emission_factors = load_data.load_nox_emission_factors()
# remove duplicate factors
nox_emission_factors = nox_emission_factors.drop_duplicates(
    subset=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ]
)

# load the boiler firing type info
boiler_firing_type = emissions.load_boiler_firing_type(year)

# identify the boiler firing type for each generator
boiler_generator_assn = load_data.load_pudl_table(
    "core_eia860__assn_boiler_generator",
    year,
    columns=["plant_id_eia", "boiler_id", "generator_id"],
)
# associate each boiler record with generator_id s
boiler_firing_type = boiler_firing_type.merge(
    boiler_generator_assn,
    how="left",
    on=["plant_id_eia", "boiler_id"],
    validate="1:m",
)

# merge the gen keys with the boiler firing types
gen_nox_factors = gen_keys_for_nox.merge(
    boiler_firing_type,
    how="left",
    on=["plant_id_eia", "generator_id"],
    validate="m:m",
)

gen_nox_factors["wet_dry_bottom"] = gen_nox_factors["wet_dry_bottom"].fillna("none")
gen_nox_factors["boiler_firing_type"] = gen_nox_factors["boiler_firing_type"].fillna(
    "none"
)

In [None]:
test = (
    gen_nox_factors[
        [
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
            "wet_dry_bottom",
        ]
    ]
    .drop_duplicates()
    .merge(
        nox_factors_eia_combined[
            [
                "prime_mover_code",
                "energy_source_code",
                "wet_dry_bottom",
                "boiler_firing_type",
                "emission_factor",
            ]
        ],
        how="left",
        on=[
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ],
    )
)
test2 = (
    test[test["emission_factor"].isna()][
        [
            "prime_mover_code",
            "boiler_firing_type",
            "wet_dry_bottom",
            "energy_source_code",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        by=[
            "prime_mover_code",
            "energy_source_code",
            "boiler_firing_type",
        ]
    )
)
test2

In [None]:
# merge in the emission factors for specific boiler types
gen_nox_factors = gen_nox_factors.merge(
    nox_factors_eia_combined,
    how="left",
    on=[
        "prime_mover_code",
        "energy_source_code",
        "wet_dry_bottom",
        "boiler_firing_type",
    ],
    validate="m:1",
)