# Check PUDL EIA-930 Tables

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd

os.environ["PUDL_DATA_STORE"] = "s3"

In [None]:
from oge.load_data import load_pudl_table

## Generation Data

In [None]:
year = 2025
generation_table = "core_eia930__hourly_net_generation_by_energy_source"

In [None]:
generation = load_pudl_table(
    generation_table,
    dt=pd.Timestamp(year, 6, 1, 1, 0, 0),
    end_dt=pd.Timestamp(year, 7, 1),
)
# End to begin of hour
generation["datetime_utc"] = generation["datetime_utc"] - pd.Timedelta(hours=1)
display(generation)

In [None]:
ba_code = "MISO"
generation_in_ba = generation.query("balancing_authority_code_eia == @ba_code")

In [None]:
fig, axes = plt.subplots(
    len(generation_in_ba["generation_energy_source"].unique()),
    1,
    figsize=(10, 25),
    sharex=True,
    layout="constrained",
)
for i, f in enumerate(set(generation_in_ba["generation_energy_source"])):
    generation_in_ba.query("generation_energy_source == @f").set_index(
        "datetime_utc"
    ).rename(
        columns={
            "net_generation_reported_mwh": "Reported (MWh)",
            "net_generation_adjusted_mwh": "Adjusted (MWh)",
            "net_generation_imputed_eia_mwh": "Imputed by EIA(MWh)",
        }
    ).plot(
        ax=axes[i],
        legend=False,
        style=["x", "-", "--"],
    )

    axes[i].set_title(
        f.replace("_", " ")
        .title()
        .replace(" W ", " with ")
        .replace(" Wo ", " without "),
        fontsize=10,
    )
    axes[i].set_xlabel("")
    axes[i].tick_params(axis="x", labelsize=8)
    axes[i].set_ylabel("")
    axes[i].tick_params(axis="y", labelsize=8)

handles, labels = plt.gca().get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
fig.suptitle(f"Generation in {ba_code}", fontsize=15)
fig.text(
    -0.02, 0.5, "Net Generation (MWh)", va="center", rotation="vertical", fontsize=12
)
fig.text(0.5, -0.02, "Timestamp (UTC)", ha="center", fontsize=12)

plt.show()

## Total Generation vs Aggregated Fuel Generation

In [None]:
year = 2023
generation_table = "core_eia930__hourly_net_generation_by_energy_source"
operations_table = "core_eia930__hourly_operations"

In [None]:
operations = load_pudl_table(
    operations_table,
    dt=pd.Timestamp(year, 1, 1, 1),
    end_dt=pd.Timestamp(year + 1, 1, 1),
)
# End to begin of hour
operations["datetime_utc"] = operations["datetime_utc"] - pd.Timedelta(hours=1)
display(operations)

In [None]:
generation = load_pudl_table(
    generation_table,
    dt=pd.Timestamp(year, 1, 1, 1),
    end_dt=pd.Timestamp(year + 1, 1, 1),
)
# End to begin of hour
generation["datetime_utc"] = generation["datetime_utc"] - pd.Timedelta(hours=1)
display(generation)

In [None]:
total = (
    operations.set_index(["balancing_authority_code_eia", "datetime_utc"])
    .loc[:, ["net_generation_adjusted_mwh"]]
    .sort_index()
)
aggregated = (
    generation.groupby(["balancing_authority_code_eia", "datetime_utc"])[
        "net_generation_adjusted_mwh"
    ]
    .sum()
    .sort_index()
    .to_frame()
)
pd.concat(
    [
        total.rename(columns={"net_generation_adjusted_mwh": "Total (MWh)"}),
        aggregated.rename(
            columns={"net_generation_adjusted_mwh": "Aggregated Fuel (MWh)"}
        ),
    ],
    axis=1,
).plot(
    kind="scatter",
    x="Total (MWh)",
    y="Aggregated Fuel (MWh)",
    s=1,
    alpha=0.8,
    title=f"Total Generation vs Aggregated Fuel Generation in BA ({year})",
    figsize=(10, 10),
)
plt.show()

In [None]:
residual = pd.DataFrame()
for ba in set(generation["balancing_authority_code_eia"]):
    total = operations.query("balancing_authority_code_eia == @ba").set_index(
        "datetime_utc"
    )[["net_generation_adjusted_mwh"]]
    aggregated = (
        generation.query("balancing_authority_code_eia == @ba")
        .groupby("datetime_utc")["net_generation_adjusted_mwh"]
        .sum()
        .to_frame()
    )
    residual[ba] = (total - aggregated).rename(
        columns={"net_generation_adjusted_mwh": "residual_mwh"}
    )

In [None]:
display(residual)

In [None]:
residual.describe()

In [None]:
ba_where_all_residual_are_zero = residual.columns[(residual == 0).all()].tolist()
display(ba_where_all_residual_are_zero)

In [None]:
residual.drop(columns=ba_where_all_residual_are_zero).sum().abs().plot(
    kind="bar",
    ylabel="Sum (MWh)",
    title="Sum of Absolute Residuals by BA",
    figsize=(20, 7),
)
plt.show()

In [None]:
residual.drop(columns=ba_where_all_residual_are_zero).apply(
    lambda x: 100 * (x != 0).sum() / len(x)
).round(2).plot(
    kind="bar",
    ylabel="Frequency (%)",
    title="Non-zero Residual Frequency by BA",
    figsize=(20, 7),
)
plt.show()

In [None]:
mape = (
    (
        residual.drop(columns=ba_where_all_residual_are_zero)
        .div(
            operations.pivot(
                index="datetime_utc",
                columns="balancing_authority_code_eia",
                values="net_generation_adjusted_mwh",
            ).drop(columns=ba_where_all_residual_are_zero)
        )
        .abs()
        .sum()
        .mul(100)
    )
    .div(len(residual))
    .round(2)
)

In [None]:
mape

In [None]:
mape.plot(
    kind="bar",
    ylabel="MAPE (%)",
    title="Mean Absolute Percentage Error by BA",
    figsize=(20, 7),
)
plt.show()

## Operations Data

In [None]:
year = 2024
operations_table = "out_eia930__hourly_operations"

In [None]:
operations = load_pudl_table(
    operations_table,
    dt=pd.Timestamp(year, 1, 1, 1, 0, 0),
    end_dt=pd.Timestamp(year, 6, 1),
)
# End to begin of hour
operations["datetime_utc"] = operations["datetime_utc"] - pd.Timedelta(hours=1)
display(operations)

## Reported Demand vs PUDL Imputed Demand

In [None]:
ba_code = "AZPS"
operations_in_ba = operations.query("balancing_authority_code_eia == @ba_code")

In [None]:
ax = (
    operations_in_ba.set_index("datetime_utc")[
        ["demand_adjusted_mwh", "demand_imputed_pudl_mwh"]
    ]
    .loc[: pd.Timestamp(2024, 2, 1).tz_localize("UTC")]
    .rename(
        columns={
            "demand_adjusted_mwh": "Adjusted (MWh)",
            "demand_imputed_pudl_mwh": "Imputed by PUDL (MWh)",
        }
    )
    .plot(
        figsize=(20, 8),
        style=["o", "+"],
        fillstyle="none",
        xlabel="Timestamp (UTC)",
        ylabel="Demand (MWh)",
        title=f"Adjusted and Imputed Demand in {ba_code}",
    )
)

In [None]:
operations_in_ba[
    abs(
        operations_in_ba["demand_reported_mwh"]
        - operations_in_ba["demand_imputed_pudl_mwh"]
    )
    > 0.5
]["demand_imputed_pudl_mwh_imputation_code"]

## Interchange

In [None]:
ba_code = "CISO"
operations_in_ba = operations.query("balancing_authority_code_eia == @ba_code")

In [None]:
is_na_interchange = pd.to_numeric(operations_in_ba["interchange_adjusted_mwh"]).isna()

ax = (
    operations_in_ba[~is_na_interchange]
    .rename(columns={"interchange_adjusted_mwh": "Adjusted (MWh)"})
    .plot(
        x="datetime_utc",
        y="Adjusted (MWh)",
        figsize=(12, 8),
        style="o",
        fillstyle="none",
        title=f"{ba_code} Adjusted Interchange",
    )
)
pd.DataFrame(
    {"Missing": [0] * is_na_interchange.sum()},
    index=operations_in_ba[is_na_interchange]["datetime_utc"],
).plot(
    ax=ax,
    color="red",
    style="x",
    fillstyle="none",
)

ax.set_xlabel("Timestamp (UTC)", fontsize=12)
ax.set_ylabel("Interchange (MWh)", fontsize=12)
plt.show()

# Balance

In [None]:
year = 2024
generation_table = "core_eia930__hourly_net_generation_by_energy_source"
operations_table = "out_eia930__hourly_operations"

In [None]:
operations = load_pudl_table(
    operations_table,
    dt=pd.Timestamp(year, 1, 1, 1),
    end_dt=pd.Timestamp(year, 2, 1),
)
# End to begin of hour
operations["datetime_utc"] = operations["datetime_utc"] - pd.Timedelta(hours=1)
display(operations)

In [None]:
generation = load_pudl_table(
    generation_table,
    dt=pd.Timestamp(year, 1, 1, 1),
    end_dt=pd.Timestamp(year, 2, 1),
)
# End to begin of hour
generation["datetime_utc"] = generation["datetime_utc"] - pd.Timedelta(hours=1)
display(generation)

In [None]:
balance = (
    pd.DataFrame(
        {
            "balancing_authority_code_eia": operations["balancing_authority_code_eia"],
            "datetime_utc": operations["datetime_utc"],
            "total_net_generation_mwh": operations["net_generation_adjusted_mwh"],
            "imputed_demand_mwh": operations["demand_imputed_pudl_mwh"],
            "adjusted_demand_mwh": operations["demand_adjusted_mwh"],
            "adjusted_interchange_mwh": pd.to_numeric(
                operations["interchange_adjusted_mwh"]
            ).fillna(0),
            "adjusted_demand_and_total_net_generation_mwh": (
                operations["demand_adjusted_mwh"]
                - operations["net_generation_adjusted_mwh"]
                + pd.to_numeric(operations["interchange_adjusted_mwh"]).fillna(0)
            ),
            "imputed_demand_and_total_net_generation_mwh": (
                operations["demand_imputed_pudl_mwh"]
                - operations["net_generation_adjusted_mwh"]
                + pd.to_numeric(operations["interchange_adjusted_mwh"]).fillna(0)
            ),
        },
    )
    .set_index(["balancing_authority_code_eia", "datetime_utc"])
    .sort_index()
)
balance["aggregated_net_generation_mwh"] = (
    generation.groupby(["balancing_authority_code_eia", "datetime_utc"])[
        "net_generation_adjusted_mwh"
    ]
    .sum()
    .sort_index()
    .to_frame()
)
balance["imputed_demand_and_aggregated_net_generation_mwh"] = (
    balance["imputed_demand_mwh"]
    - balance["aggregated_net_generation_mwh"]
    + balance["adjusted_interchange_mwh"]
)
balance["adjusted_demand_and_aggregated_net_generation_mwh"] = (
    balance["adjusted_demand_mwh"]
    - balance["aggregated_net_generation_mwh"]
    + balance["adjusted_interchange_mwh"]
)
display(balance)

In [None]:
ba_code = "AZPS"
balance_in_ba = balance.reset_index().query("balancing_authority_code_eia == @ba_code")
fig, axes = plt.subplots(4, 1, figsize=(20, 12), sharex=True, sharey=False)
balance_in_ba[
    [
        "datetime_utc",
        "adjusted_demand_and_total_net_generation_mwh",
        "adjusted_demand_and_aggregated_net_generation_mwh",
        "imputed_demand_and_total_net_generation_mwh",
        "imputed_demand_and_aggregated_net_generation_mwh",
    ]
].rename(
    columns={
        "adjusted_demand_and_total_net_generation_mwh": "Adjusted Demand - Total Generation + Interchange",
        "adjusted_demand_and_aggregated_net_generation_mwh": "Adjusted Demand - Aggregated Generation + Interchange",
        "imputed_demand_and_total_net_generation_mwh": "Imputed Demand - Total Generation + Interchange",
        "imputed_demand_and_aggregated_net_generation_mwh": "Imputed Demand - Aggregated Generation + Interchange",
    }
).set_index("datetime_utc").plot(
    style=["<", ">", "s", "x"],
    fillstyle="none",
    markersize=12,
    alpha=0.75,
    figsize=(20, 12),
    title=f"{ba_code} ({year})",
    ylabel="Balance (MWh)",
    ax=axes[0],
)
balance_in_ba.set_index("datetime_utc")[["adjusted_interchange_mwh"]].rename(
    columns={"interchange_mwh": "Interchange (MWh)"}
).plot(
    ax=axes[1],
    ylabel="Interchange (MWh)",
)
balance_in_ba.set_index("datetime_utc")[
    ["total_net_generation_mwh", "aggregated_net_generation_mwh"]
].rename(
    columns={
        "total_net_generation_mwh": "Total Generation (MWh)",
        "aggregated_net_generation_mwh": "Aggregated Generation (MWh)",
    }
).plot(
    ax=axes[2],
    ylabel="Net Generation (MWh)",
    style=["-", "o"],
    fillstyle="none",
)
balance_in_ba.set_index("datetime_utc")[
    ["adjusted_demand_mwh", "imputed_demand_mwh"]
].rename(
    columns={
        "adjusted_demand_mwh": "Adjusted Demand (MWh)",
        "imputed_demand_mwh": "Imputed Demand (MWh)",
    }
).plot(
    ax=axes[3],
    ylabel="Demand (MWh)",
    style=["-", "o"],
    fillstyle="none",
)

axes[3].set_xlabel("Timestamp (UTC)", fontsize=12)

plt.show()

## Compare Interchange Fields

In [None]:
operations.query("balancing_authority_code_eia == 'MISO'").set_index("datetime_utc")[
    [
        "interchange_reported_mwh",
        "interchange_adjusted_mwh",
        "interchange_imputed_eia_mwh",
    ]
].astype(float).plot(
    style=["o", "-", "+"],
    fillstyle="none",
    markersize=12,
    figsize=(20, 8),
    xlabel="Timestamp (UTC)",
    ylabel="Interchange (MWh)",
    title=f"MISO Interchange ({year})",
)

plt.show()