In [1]:
# import packages
import pandas as pd
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")


import oge.load_data as load_data
import oge.data_cleaning as data_cleaning
from oge.filepaths import *

year = 2023



# About
The purpose of this notebook is to update the `reference_tables/steam_units_to_remove.csv` table.
This table identifies units in CEMS that do not exist in the EIA data, and which only report steam data.

In [2]:
# load cems data
cems = load_data.load_cems_data(year)
# remove non-grid connected plants
cems = data_cleaning.remove_plants(
    cems,
    non_grid_connected=True,
    remove_states=["PR"],
    steam_only_plants=False,
    distribution_connected_plants=False,
)

2024-11-20 15:16:35 [INFO] oge.oge.data_cleaning:677 Removing 41 plants that are not grid-connected
2024-11-20 15:16:41 [INFO] oge.oge.data_cleaning:641 Removing 1 plants located in the following states: ['PR']


In [3]:
# sum operational data by unit and merge the generator map
cems_units = (
    cems.groupby(["plant_id_epa", "plant_id_eia", "emissions_unit_id_epa"])[
        [
            "gross_generation_mwh",
            "fuel_consumed_mmbtu",
            "steam_load_1000_lb",
            "co2_mass_lb",
        ]
    ]
    .sum()
    .reset_index()
)

In [4]:
# load the unit to generator mapping from the subplant crosswalk
# NOTE: you will need to run this in the pipeline first
generator_unit_map = pd.read_csv(
    outputs_folder(f"{year}/subplant_crosswalk_{year}.csv.zip"),
)[["plant_id_epa", "generator_id", "emissions_unit_id_epa"]]

cems_units = cems_units.merge(
    generator_unit_map, how="left", on=["plant_id_epa", "emissions_unit_id_epa"]
)

In [5]:
# identify units that are not mapped to a generator and have zero generation
cems_units[
    cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] == 0)
]

Unnamed: 0,plant_id_epa,plant_id_eia,emissions_unit_id_epa,gross_generation_mwh,fuel_consumed_mmbtu,steam_load_1000_lb,co2_mass_lb,generator_id
745,1702,1702,A,0.0,3.825856e+04,34735.0,4.536492e+06,
746,1702,1702,B,0.0,5.631849e+04,33188.0,6.679372e+06,
1074,2503,2503,BLR114,0.0,1.797387e+06,1245005.0,0.000000e+00,
1075,2503,2503,BLR115,0.0,1.138404e+06,807743.0,0.000000e+00,
1076,2503,2503,BLR116,0.0,1.041446e+05,84140.0,0.000000e+00,
...,...,...,...,...,...,...,...,...
3973,55308,55308,A,0.0,2.856989e+05,339814.0,0.000000e+00,
3974,55308,55308,B,0.0,3.011653e+05,365662.0,0.000000e+00,
4113,55386,55386,B-1,0.0,3.275864e+05,227945.0,0.000000e+00,
4114,55386,55386,B-2,0.0,3.989732e+05,285374.0,0.000000e+00,


In [6]:
cems_units[
    (~cems_units["generator_id"].isna()) & (cems_units["gross_generation_mwh"] == 0)
]

Unnamed: 0,plant_id_epa,plant_id_eia,emissions_unit_id_epa,gross_generation_mwh,fuel_consumed_mmbtu,steam_load_1000_lb,co2_mass_lb,generator_id
94,165,165,1,0.0,0.000000e+00,0.0,0.000000e+00,1
101,202,202,1,0.0,0.000000e+00,0.0,0.000000e+00,1
102,203,203,1,0.0,4.887135e+05,305032.0,5.697775e+07,1
134,341,341,1,0.0,3.765627e+04,0.0,4.476154e+06,CT1
173,399,399,14,0.0,8.260000e+01,0.0,9.600000e+03,GT14
...,...,...,...,...,...,...,...,...
5044,58124,58124,1,0.0,4.581270e+06,3000874.0,9.551951e+08,1
5139,60340,60340,B0004,0.0,7.022665e+06,4294479.0,1.536436e+09,1
5183,60910,60910,1GT,0.0,0.000000e+00,0.0,0.000000e+00,NPLPS
5230,63628,63628,5A-1,0.0,0.000000e+00,0.0,0.000000e+00,5A


In [7]:
cems_units.loc[
    cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] == 0),
    ["plant_id_eia", "emissions_unit_id_epa"],
].drop_duplicates().to_csv(
    reference_table_folder("steam_units_to_remove.csv"), index=False
)