In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.helpers as helpers
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

In [None]:
plant_attributes = pd.read_csv(
    outputs_folder("2022/plant_static_attributes_2022.csv.zip"), compression="zip"
)
primary_fuel_table = pd.read_csv(
    outputs_folder("2022/primary_fuel_table_2022.csv.zip"), compression="zip"
)

In [None]:
primary_fuel_table[primary_fuel_table["plant_id_eia"] == 7277]

In [None]:
cems = pd.read_csv(outputs_folder("2022/cems_subplant_2022.csv.zip"), compression="zip")

In [None]:
primary_fuel_table[primary_fuel_table["plant_id_eia"] == 1571]

In [None]:
plant_attributes[plant_attributes["plant_id_eia"] == 1571]

In [None]:
cems = helpers.assign_fleet_to_subplant_data(cems, plant_attributes, primary_fuel_table)

In [None]:
cems[(cems["plant_id_eia"] == 165) & (cems["subplant_id"] == 1)]

# try adding proposed

In [None]:
gen_capacity = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year,
    columns=[
        "plant_id_eia",
        "generator_id",
        "capacity_mw",
        "energy_source_code_1",
        "operational_status",
        "operational_status_code",
    ],
)

In [None]:
gen_capacity = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year,
    columns=[
        "plant_id_eia",
        "generator_id",
        "capacity_mw",
        "energy_source_code_1",
        "operational_status",
        "operational_status_code",
    ],
)

# keep operating generators and proposed generators that are already under construction
under_construction_status_codes = ["U", "V", "TS"]
gen_capacity = gen_capacity[
    (gen_capacity["operational_status"] == "existing")
    | (
        (gen_capacity["operational_status"] == "proposed")
        & (
            gen_capacity["operational_status_code"].isin(
                under_construction_status_codes
            )
        )
    )
]

In [None]:
gen_capacity[gen_capacity["plant_id_eia"] == 56350]

# Explore generators that disappear and are re-assigned a new plant ID

In [None]:
gens = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year=2005,
    end_year=2023,
    columns=[
        "plant_id_eia",
        "generator_id",
        "report_date",
        "utility_id_eia",
        "operational_status",
        "operational_status_code",
        "planned_generator_retirement_date",
    ],
)

# add county and state
location = load_data.load_pudl_table(
    "core_eia__entity_plants",
    columns=[
        "plant_id_eia",
        "county",
        "state",
    ],
)

gens = gens.merge(location, how="left", on=["plant_id_eia"])

In [None]:
gens[(gens["plant_id_eia"] == 1571) & (gens["generator_id"] == "GT1")]

In [None]:
# check for gens where the ownership changes
gens[
    gens.groupby(["plant_id_eia", "generator_id"])["utility_id_eia"].transform(
        "nunique"
    )
    > 1
]

In [None]:
gens[(gens["plant_id_eia"] == 70) & (gens["generator_id"] == "2")]

In [None]:
# check for generators where the most recent year is not >= 2023 and it is not retired
flags = gens[
    (
        gens.groupby(["plant_id_eia", "generator_id"])["report_date"]
        .transform("max")
        .dt.year
        < 2023
    )
    & (gens["operational_status"] == "existing")
]
flags

In [None]:
flags = flags.merge(
    gens[["plant_id_eia", "generator_id", "county", "state"]].drop_duplicates(),
    how="left",
    on=["county", "state", "generator_id"],
    suffixes=(None, "_match"),
)

In [None]:
# drop where the two plant ids match
flags[flags["plant_id_eia"] != flags["plant_id_eia_match"]].drop_duplicates(
    subset=["plant_id_eia", "generator_id", "plant_id_eia_match"]
)

In [None]:
# of these, look for generators located in the same county and with the same generator id

In [None]:
gens[(gens["plant_id_eia"] == 63091) & (gens["generator_id"] == "246")]

In [None]:
# check for generators with missing years