# Update `OTH` Energy Source Code

In [None]:
%reload_ext autoreload
%autoreload 2

# Useful high-level external modules.
import numpy as np
import pandas as pd
import plotly.express as px

import sys

sys.path.append("../../src")

import oge.load_data as load_data
import oge.data_cleaning as data_cleaning
import oge.filepaths as filepaths

## Load the PUDL Fuel Content Table

In [None]:
fuel_heat_content = load_data.load_pudl_table(
    "out_eia923__generation_fuel_combined",
    year=2010,
    end_year=2014,
    columns=[
        "plant_id_eia",
        "report_date",
        "energy_source_code",
        "fuel_mmbtu_per_unit",
        "fuel_consumed_mmbtu",
    ],
)
fuel_heat_content = fuel_heat_content.replace([np.inf, -np.inf], np.nan)
fuel_heat_content = fuel_heat_content.dropna()
fuel_heat_content = fuel_heat_content[fuel_heat_content["fuel_consumed_mmbtu"] != 0]
fuel_heat_content = fuel_heat_content[fuel_heat_content["fuel_mmbtu_per_unit"] != 0]
fuel_heat_content

## Identify Plants Reporting `OTH`


In [None]:
plants_with_oth = fuel_heat_content[
    fuel_heat_content["energy_source_code"] == "OTH"
].copy()
plants_with_oth = (
    plants_with_oth.groupby("plant_id_eia")[
        ["fuel_mmbtu_per_unit", "fuel_consumed_mmbtu"]
    ]
    .mean()
    .reset_index()
)

plants_with_oth

In [None]:
# load the existing manual fuel map table and check which plants already have been manually matched
# any plants with a missing `updated_energy_source_code` column need to be manually matched
manual_table = pd.read_csv(
    filepaths.reference_table_folder("updated_oth_energy_source_codes.csv")
)
plants_with_oth = plants_with_oth.merge(
    manual_table, how="left", on="plant_id_eia", validate="1:1"
)
plants_with_oth

## Look at Specific Plant

In [None]:
plant_id = 10673

In [None]:
fuel_heat_content.query("plant_id_eia == @plant_id")

In [None]:
fuel_heat_content.query("plant_id_eia == @plant_id and energy_source_code == 'OTH'")

## Visualize Heat Content by Fuel

In [None]:
# To manually match a fuel type, examine the average `fuel_mmbtu_per_unit` from
# `plants_with_oth` and identify which heat content matches best
px.box(
    fuel_heat_content.sort_values(by="fuel_mmbtu_per_unit"),
    x="energy_source_code",
    y="fuel_mmbtu_per_unit",
    title=f"Range of heat contents for each fuel",
    width=1000,
    height=600,
)

In [None]:
plant_data = fuel_heat_content[
    (fuel_heat_content["plant_id_eia"] == 1745)
    & (fuel_heat_content["energy_source_code"] == "OTH")
]
plant_data

In [None]:
px.box(
    plant_data,
    x="energy_source_code",
    y="fuel_mmbtu_per_unit",
    title=f"Range of heat contents for each fuel",
    width=1000,
    height=600,
)