In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.helpers as helpers
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger
import oge.constants as constants

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

Some generators (like 1571 GT1) get renamed over time. This notebook tries to find other examples of that.

In [None]:
gens = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year=2005,
    end_year=2023,
    columns=[
        "plant_id_eia",
        "generator_id",
        "report_date",
        "capacity_mw",
        "operational_status",
        "operational_status_code",
        "planned_generator_retirement_date",
    ],
)

# add county and state
location = load_data.load_pudl_table(
    "core_eia__entity_plants",
    columns=[
        "plant_id_eia",
        "plant_name_eia",
        "county",
        "state",
    ],
)

gens = gens.merge(location, how="left", on=["plant_id_eia"])

In [None]:
gens[(gens["plant_id_eia"] == 1571) & (gens["generator_id"] == "GT1")]

In [None]:
switch_test = gens.copy()
switch_test["operational_status"] = switch_test.groupby(
    ["plant_id_eia", "generator_id"]
)["operational_status"].bfill()

switch_test = switch_test[
    switch_test.groupby(["plant_id_eia", "generator_id"])[
        "operational_status"
    ].transform("first")
    == "existing"
]

expected_num_years = (
    switch_test.groupby(["plant_id_eia", "generator_id"])["report_date"].max().dt.year
    - switch_test.groupby(["plant_id_eia", "generator_id"])["report_date"].min().dt.year
    + 1
)
total_num_years = switch_test.groupby(["plant_id_eia", "generator_id"])[
    "report_date"
].count()
total_num_years[total_num_years < expected_num_years]

In [None]:
# Explore one of the flagged generators
gens[(gens["plant_id_eia"] == 55160) & (gens["generator_id"] == "UNT1")]

In [None]:
# look for generators with the same number in the same county
# see if the capacity matches and name is similar
gens[(gens["county"] == "Hidalgo") & (gens["generator_id"] == "GEN1")]