In [1]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

os.environ["PUDL_DATA_STORE"] = "s3"
os.environ["PUDL_BUILD"] = "nightly"

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes, DATA_COLUMNS
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.helpers as helpers
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger
import oge.constants as constants

configure_root_logger()
logger = get_logger("test")


year = 2024
path_prefix = f"{year}/"

In [None]:
os.environ

In [None]:
# load intermediate output data as needed
cems = pd.read_csv(
    outputs_folder(f"{year}/cems_subplant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_subplant = pd.read_csv(
    outputs_folder(f"{year}/partial_cems_subplant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_plant = pd.read_csv(
    outputs_folder(f"{year}/partial_cems_plant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
eia923_allocated = pd.read_csv(
    outputs_folder(f"{year}/eia923_allocated_{year}.csv.zip"),
    compression="zip",
    parse_dates=["report_date"],
)
plant_attributes = pd.read_csv(
    outputs_folder(f"{year}/plant_static_attributes_{year}.csv.zip"), compression="zip"
)
primary_fuel_table = pd.read_csv(
    outputs_folder(f"{year}/primary_fuel_table_{year}.csv.zip"), compression="zip"
)
monthly_eia_data_to_shape = eia923_allocated[
    (eia923_allocated["hourly_data_source"] == "eia")
]

In [2]:
spc = pd.read_csv(
    outputs_folder(f"{year}/subplant_crosswalk_{year}.csv.zip"), compression="zip"
)

In [3]:
gens_to_check = [
    192,
    66621,
    68375,
    68515,
    68516,
    68534,
    68577,
    68644,
    68745,
    68770,
    68771,
    68772,
    68776,
    68777,
    68872,
    68880,
    68938,
    68939,
    69025,
    69299,
    69327,
]

In [4]:
spc_plants = list(spc["plant_id_eia"].unique())
[g for g in gens_to_check if g not in spc_plants]

[192, 69299, 69327]

Missing are: [192, 68515, 68516, 68880, 69299, 69327]
- 192: retired in 2002, no gf data
- 68515: planned status P
- 68516: planned, status P in 2024, but TS in 2025 (monthly update)
- 68880: planned, status P in 2024, V in 2025 (monthly update)
- 69299: proposed, status P, only in nightly build
- 69327: proposed, status U, only available from nightly build


In [None]:
gens = load_data.load_pudl_table("out_eia__yearly_generators")

In [None]:
plants_core = load_data.load_pudl_table("core_eia860__scd_plants") 

In [None]:
si = load_data.load_pudl_table("core_epa__assn_eia_epacamd_subplant_ids")

In [None]:
gens[gens["plant_id_eia"] == 68515]

In [None]:
si[si["plant_id_eia"] == 10682]

In [None]:
plants_core[plants_core["plant_id_eia"] == 69327]

In [None]:
gf[gf["plant_id_eia"] == 69299]