In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from oge.emissions import (
    return_monthly_plant_fuel_sulfur_content,
    return_monthly_national_fuel_sulfur_content,
    return_annual_national_fuel_sulfur_content,
)
from oge.load_data import load_pudl_table

# Get Sulfur Content for Years 2008 through 2022
Get sulfur content (in percent) at plant/month, ESC/month, ESC/year resolutions.

In [None]:
year = range(2008, 2023)
plant = {}
national = {}
annual = {}

for y in year:
    print(f"Processing {y}")
    plant[y] = return_monthly_plant_fuel_sulfur_content(y)
    national[y] = return_monthly_national_fuel_sulfur_content(y)
    annual[y] = return_annual_national_fuel_sulfur_content(y)

## Annual Data
Here we consider the sulfur content for each energy source code and year combination.

In [None]:
for i, y in enumerate(year):
    if i == 0:
        all_annual = annual[y].rename(columns={"sulfur_content_pct": f"{y}"}).copy()
    else:
        all_annual = pd.merge(
            all_annual,
            annual[y].rename(columns={"sulfur_content_pct": f"{y}"}),
            how="outer",
            on="energy_source_code",
        )
display(all_annual)

### Plot Annual Average Across Years

In [None]:
all_annual["average"] = all_annual.mean(axis=1, numeric_only=True, skipna=True)
all_annual.rename(columns={"energy_source_code": "Energy Source Code"}).plot(
    kind="bar",
    x="Energy Source Code",
    y="average",
    legend=False,
    title="Average Sulfur Content Percentage",
    figsize=(10, 10),
)
plt.show()

### Plot Annual for All Years
We only consider energy source code that are non-zero

In [None]:
reduced_annual = all_annual[all_annual["average"] > 0]
reduced_annual.rename(columns={"energy_source_code": "Energy Source Code"}).drop(
    columns="average"
).plot(
    kind="bar",
    x="Energy Source Code",
    stacked=True,
    title="Sulfur Content Percentage",
    legend=year,
    figsize=(10, 10),
)
plt.show()

Sulfur content appears to be constant across years for each energy source code. Note that `SC` (coal-derived syngas) is non-zero for 2008 only.

## National Data
Here we consider sulfur content for each energy source code and for each report date across all years

In [None]:
for j, y in enumerate(year):
    national[y].insert(1, "month", national[y]["report_date"].dt.month)
    if j == 0:
        all_national = (
            national[y]
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns="report_date")
            .copy()
        )
    else:
        all_national = pd.merge(
            all_national,
            national[y]
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns="report_date"),
            how="outer",
            on=["energy_source_code", "month"],
        )
    national[y] = national[y].drop(columns="month")

display(all_national)

### Plot National Data
Compare monthly data across years for each energy source code 

In [None]:
reduced_national = {
    e: all_national.query("energy_source_code == @e")
    .drop(columns="energy_source_code")
    .set_index("month")
    for e in reduced_annual["energy_source_code"].to_list()
}

In [None]:
fig, axes = plt.subplots(ncols=4, nrows=3)
for e, ax in zip(reduced_national, axes.flatten()):
    reduced_national[e].plot(
        title=e,
        figsize=(20, 10),
        ylabel="Sulfur Content Percentage",
        ax=ax,
        legend=False,
    )

handles, labels = plt.gca().get_legend_handles_labels()
fig.legend(handles, labels, loc="center right")
fig.tight_layout()
plt.show()

To the exception of `KER` (Kerosene), monthly values vary across years and the variation is different from year-to-year.

## Plant Data
Let's focus on some `BIT` (Bituminous Coal) and `PC` (Petroleum Coke) plants as these fuel types have the most sulfur content.

In [None]:
for j, y in enumerate(year):
    plant[y].insert(1, "month", plant[y]["report_date"].dt.month)
    if j == 0:
        all_plant = (
            plant[y]
            .query("energy_source_code == 'BIT' or energy_source_code == 'PC'")
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns="report_date")
            .copy()
        )
    else:
        all_plant = pd.merge(
            all_plant,
            plant[y]
            .query("energy_source_code == 'BIT' or energy_source_code == 'PC'")
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns=["report_date", "prime_mover_code"]),
            how="outer",
            on=["plant_id_eia", "energy_source_code", "month"],
        )
    plant[y] = plant[y].drop(columns="month")

display(all_plant)

### Select `BIT` and `PC` Plants with Data for All years and at least 10 Months

In [None]:
all_plant_keep = all_plant[
    all_plant.select_dtypes(include=["number"]).notnull().all(axis=1)
]
display(all_plant_keep)

In [None]:
all_plant_keep_idx = (
    all_plant_keep[
        all_plant_keep.groupby("plant_id_eia")["month"].transform("size") >= 10
    ]
    .groupby(["energy_source_code"])["plant_id_eia"]
    .unique()
)

display(all_plant_keep_idx)

### Plot Plant Data

In [None]:
bit_idx = np.random.choice(all_plant_keep_idx.loc["BIT"], size=1)[0]
pc_idx = np.random.choice(all_plant_keep_idx.loc["PC"], size=1)[0]

In [None]:
bit_idx

In [None]:
bit_plant = all_plant_keep.set_index("plant_id_eia").loc[bit_idx].round(2)
display(bit_plant)

In [None]:
pc_plant = all_plant_keep.set_index("plant_id_eia").loc[pc_idx].round(2)
display(pc_plant)

In [None]:
fig, axes = plt.subplots(2)
bit_plant.select_dtypes(include="number").plot(
    x="month",
    title=f"BIT, plant #{bit_idx}",
    ylabel="Sulfur Content Percentage",
    figsize=(20, 10),
    ax=axes[0],
    legend=False,
)
pc_plant.select_dtypes(include="number").plot(
    x="month",
    title=f"PC, plant #{pc_idx}",
    ylabel="Sulfur Content Percentage",
    figsize=(20, 10),
    ax=axes[1],
    legend=False,
)

handles, labels = plt.gca().get_legend_handles_labels()
fig.legend(handles, labels, loc="center right")
fig.tight_layout()
plt.show()

### State Level Average

In [None]:
plant_state = load_pudl_table(
    "core_eia__entity_plants", columns=["plant_id_eia", "state"]
)

In [None]:
esc_keep = reduced_annual["energy_source_code"].to_list()
for j, y in enumerate(year):
    plant[y].insert(1, "month", plant[y]["report_date"].dt.month)
    if j == 0:
        plant_average = (
            plant[y]
            .query("energy_source_code == @esc_keep")
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns="report_date")
            .copy()
        )
    else:
        plant_average = pd.merge(
            plant_average,
            plant[y]
            .query("energy_source_code == @esc_keep")
            .rename(columns={"sulfur_content_pct": f"{y}"})
            .drop(columns=["report_date", "prime_mover_code"]),
            how="outer",
            on=["plant_id_eia", "energy_source_code", "month"],
        )
    plant[y] = plant[y].drop(columns="month")

plant_average = (
    plant_average.drop(columns=["month", "prime_mover_code"])
    .groupby(["plant_id_eia", "energy_source_code"])
    .mean()
    .mean(axis=1)
)

display(plant_average)

In [None]:
display(f"Number of plant with no sulfur content: {plant_average.isna().sum()}")

In [None]:
plant_average = plant_average.dropna()

In [None]:
display("Breakdown by energy source code:")
display(plant_average.reset_index()["energy_source_code"].value_counts())

In [None]:
plant_average.name = "average_sulfur_content"
plant_average = plant_average.reset_index()
plant_average = pd.merge(plant_average, plant_state, on="plant_id_eia", how="left")

display(plant_average)

In [None]:
state_average = plant_average.groupby(["energy_source_code", "state"])[
    "average_sulfur_content"
].mean()

display(state_average)

In [None]:
state_average.reset_index().rename(
    columns={"energy_source_code": "Energy Source Code"}
).pivot(
    columns="state", index="Energy Source Code", values="average_sulfur_content"
).plot(
    kind="bar",
    title="Sulfur Content Percentage",
    figsize=(10, 20),
    stacked=True,
)
plt.show()