In [None]:
# import packages
import pandas as pd
import numpy as np
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")


import oge.load_data as load_data
import oge.data_cleaning as data_cleaning
from oge.filepaths import *

year = 2022

# About
The purpose of this notebook is to update the `reference_tables/steam_units_to_remove.csv` table.
This table identifies units in CEMS that do not exist in the EIA data, and which only report steam data.

In [None]:
# load cems data
cems = load_data.load_cems_data(year)
# remove non-grid connected plants
cems = data_cleaning.remove_plants(
    cems,
    non_grid_connected=True,
    remove_states=["PR"],
    steam_only_plants=False,
    distribution_connected_plants=False,
)

In [None]:
# sum operational data by unit and merge the generator map
cems_units = (
    cems.groupby(["plant_id_epa", "plant_id_eia", "emissions_unit_id_epa"])[
        [
            "gross_generation_mwh",
            "fuel_consumed_mmbtu",
            "steam_load_1000_lb",
            "co2_mass_lb",
        ]
    ]
    .sum()
    .reset_index()
)

In [None]:
# load the unit to generator mapping from the subplant crosswalk
# NOTE: you will need to run this in the pipeline first
generator_unit_map = pd.read_csv(
    outputs_folder(f"{year}/subplant_crosswalk_{year}.csv"),
)[["plant_id_epa", "generator_id", "emissions_unit_id_epa"]]

cems_units = cems_units.merge(
    generator_unit_map, how="left", on=["plant_id_epa", "emissions_unit_id_epa"]
)

In [None]:
# identify units that are not mapped to a generator and have non-zero generation
cems_units[
    cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] == 0)
]

In [None]:
cems_units[
    (~cems_units["generator_id"].isna()) & (cems_units["gross_generation_mwh"] == 0)
]

In [None]:
cems_units.loc[
    cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] == 0),
    ["plant_id_eia", "emissions_unit_id_epa"],
].drop_duplicates().to_csv(
    reference_table_folder("steam_units_to_remove.csv", index=False)
)