In [1]:
# import packages
import pandas as pd
import numpy as np
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")


import oge.load_data as load_data
from oge.filepaths import *

year = 2022

# About
The purpose of this notebook is to update the `reference_tables/steam_units_to_remove.csv` table.
This table identifies units in CEMS that do not exist in the EIA data, and which only report steam data.

In [2]:
cems = load_data.load_cems_data(2022)
cems

Unnamed: 0,plant_id_eia,emissions_unit_id_epa,datetime_utc,operating_time_hours,gross_generation_mwh,steam_load_1000_lb,fuel_consumed_mmbtu,co2_mass_lb,nox_mass_lb,so2_mass_lb,plant_id_epa,co2_mass_measurement_code,nox_mass_measurement_code,so2_mass_measurement_code
0,6288,2,2022-01-01 09:00:00+00:00,1.0,52.0,0.0,,,,,6288,,,
1,6288,2,2022-01-01 10:00:00+00:00,1.0,52.0,0.0,,,,,6288,,,
2,6288,2,2022-01-01 11:00:00+00:00,1.0,52.0,0.0,,,,,6288,,,
3,6288,2,2022-01-01 12:00:00+00:00,1.0,52.0,0.0,,,,,6288,,,
4,6288,2,2022-01-01 13:00:00+00:00,1.0,52.0,0.0,,,,,6288,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35142667,57703,CT03,2023-01-01 02:00:00+00:00,0.0,,0.0,,,,,57703,,,
35142668,57703,CT03,2023-01-01 03:00:00+00:00,0.0,,0.0,,,,,57703,,,
35142669,57703,CT03,2023-01-01 04:00:00+00:00,0.0,,0.0,,,,,57703,,,
35142670,57703,CT03,2023-01-01 05:00:00+00:00,0.0,,0.0,,,,,57703,,,


In [4]:
generator_unit_map = pd.read_csv(
    outputs_folder(f"{year}/subplant_crosswalk_{year}.csv"),
)[["plant_id_eia", "generator_id", "emissions_unit_id_epa"]]

In [5]:
cems_units = (
    cems.groupby(["plant_id_epa", "plant_id_eia", "emissions_unit_id_epa"])[
        ["gross_generation_mwh", "steam_load_1000_lb", "co2_mass_lb"]
    ]
    .sum()
    .reset_index()
)
cems_units = cems_units.merge(
    generator_unit_map, how="left", on=["plant_id_eia", "emissions_unit_id_epa"]
)

In [10]:
cems_units[cems_units["plant_id_eia"] == 315]

Unnamed: 0,plant_id_epa,plant_id_eia,emissions_unit_id_epa,gross_generation_mwh,steam_load_1000_lb,co2_mass_lb,generator_id
113,315,315,3,283821.0,0.0,400017400.0,3.0
114,315,315,4,275972.0,0.0,426005000.0,4.0
115,315,315,5,110825.0,0.0,156102600.0,5.0
116,315,315,CT1,1763448.0,0.0,1415544000.0,
117,315,315,CT2,1833818.0,0.0,1473966000.0,


In [12]:
cems_units[cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] > 0)]

Unnamed: 0,plant_id_epa,plant_id_eia,emissions_unit_id_epa,gross_generation_mwh,steam_load_1000_lb,co2_mass_lb,generator_id
81,141,141,7,3048.0,0.0,2999672.0,
82,141,141,8,2649.0,0.0,2587676.0,
116,315,315,CT1,1763448.0,0.0,1415544000.0,
117,315,315,CT2,1833818.0,0.0,1473966000.0,
123,335,335,CT1,2059491.0,0.0,1641396000.0,
124,335,335,CT2,2007010.0,0.0,1596175000.0,
302,613,613,PFL7A,1189258.0,0.0,1345747000.0,
303,613,613,PFL7B,1357601.0,0.0,1530212000.0,
1793,6061,6061,3,106269.0,0.0,85015550.0,
2680,10350,10350,CTGA,2229.0,0.0,2561142.0,


In [7]:
cems_units[
    cems_units["generator_id"].isna() & (cems_units["gross_generation_mwh"] == 0)
]

Unnamed: 0,plant_id_epa,plant_id_eia,emissions_unit_id_epa,gross_generation_mwh,steam_load_1000_lb,co2_mass_lb,generator_id
721,1594,1594,11,0.0,111048.0,0.000000e+00,
722,1594,1594,12,0.0,125561.0,0.000000e+00,
752,1702,1702,A,0.0,19443.0,2.688114e+06,
753,1702,1702,B,0.0,44874.0,5.821982e+06,
1090,2503,2503,BLR114,0.0,980786.0,0.000000e+00,
...,...,...,...,...,...,...,...
5404,880109,880109,B001,0.0,0.0,0.000000e+00,
5405,880110,880110,1,0.0,63168.0,0.000000e+00,
5406,880110,880110,2,0.0,72578.0,0.000000e+00,
5407,880110,880110,3,0.0,98377.0,0.000000e+00,


In [None]:
crosswalk = load_data.load_pudl_table("epacamd_eia")

In [None]:
crosswalk[crosswalk["plant_id_eia"] == 55212]

In [None]:
generator_unit_map

In [None]:
eia_