### About
This notebook can be used to explore the processing steps in `pudl.analysis.allocate_gen_fuel`

In [None]:
import pandas as pd

import oge.load_data as load_data

from pudl.analysis import allocate_gen_fuel

year = 2022

In [None]:
# pick a plant to investigate
plant_to_investigate = 47

In [None]:
gf = load_data.load_pudl_table("denorm_generation_fuel_combined_eia923", year)
bf = load_data.load_pudl_table("denorm_boiler_fuel_eia923", year)
gen = load_data.load_pudl_table("denorm_generation_eia923", year)
gens = load_data.load_pudl_table("denorm_generators_eia", year)
bga = load_data.load_pudl_table("boiler_generator_assn_eia860", year)

gf, bf, gen, bga, gens = allocate_gen_fuel.select_input_data(
    gf=gf, bf=bf, gen=gen, bga=bga, gens=gens
)

In [None]:
bf[bf["plant_id_eia"] == plant_to_investigate]

In [None]:
gen[gen["plant_id_eia"] == plant_to_investigate]

In [None]:
gf[gf["plant_id_eia"] == plant_to_investigate][["fuel_consumed_mmbtu"]].sum()

In [None]:
gf[gf["plant_id_eia"] == plant_to_investigate]

In [None]:
freq = "MS"
bf, gens_at_freq, gen = allocate_gen_fuel.standardize_input_frequency(
    bf, gens, gen, freq
)
# Add any startup energy source codes to the list of energy source codes
gens_at_freq = allocate_gen_fuel.adjust_msw_energy_source_codes(gens_at_freq, gf, bf)
gens_at_freq = allocate_gen_fuel.add_missing_energy_source_codes_to_gens(
    gens_at_freq, gf, bf
)

In [None]:
gens_at_freq[gens_at_freq["plant_id_eia"] == plant_to_investigate]

In [None]:
gen_assoc = allocate_gen_fuel.associate_generator_tables(
    gens=gens_at_freq, gf=gf, gen=gen, bf=bf, bga=bga
)

In [None]:
gen_assoc[gen_assoc["plant_id_eia"] == plant_to_investigate]

In [None]:
# Generate a fraction to use to allocate net generation and fuel consumption by.
# These two methods create a column called `frac`, which will be a fraction
# to allocate net generation from the gf table for each `IDX_PM_ESC` group
gen_pm_fuel = allocate_gen_fuel.prep_alloction_fraction(gen_assoc)

In [None]:
gen_pm_fuel[gen_pm_fuel["plant_id_eia"] == plant_to_investigate]

In [None]:
# Net gen allocation
net_gen_alloc = allocate_gen_fuel.allocate_gen_fuel_by_gen_esc(gen_pm_fuel).pipe(
    allocate_gen_fuel._test_gen_pm_fuel_output, gf=gf, gen=gen
)
allocate_gen_fuel.test_gen_fuel_allocation(gen, net_gen_alloc)

In [None]:
net_gen_alloc[net_gen_alloc["plant_id_eia"] == plant_to_investigate][
    ["net_generation_mwh"]
].sum()

In [None]:
gf[gf["plant_id_eia"] == plant_to_investigate][["net_generation_mwh"]].sum()

In [None]:
gf_923 = load_data.load_pudl_table("generation_fuel_eia923", year)

In [None]:
gf_923[gf_923["plant_id_eia"] == plant_to_investigate]

In [None]:
net_gen_alloc[net_gen_alloc["plant_id_eia"] == plant_to_investigate].groupby(
    [
        "report_date",
        "plant_id_eia",
        "prime_mover_code",
        "energy_source_code",
    ]
)["net_generation_mwh"].sum()

In [None]:
(
    net_gen_alloc[net_gen_alloc["plant_id_eia"] == plant_to_investigate]
    .groupby(
        [
            "report_date",
            "plant_id_eia",
            "prime_mover_code",
            "energy_source_code",
        ]
    )["net_generation_mwh"]
    .sum()
    - gf[gf["plant_id_eia"] == plant_to_investigate]
    .groupby(
        [
            "report_date",
            "plant_id_eia",
            "prime_mover_code",
            "energy_source_code",
        ]
    )["net_generation_mwh"]
    .sum()
).head(50)

In [None]:
# fuel allocation
fuel_alloc = allocate_gen_fuel.allocate_fuel_by_gen_esc(gen_pm_fuel)

In [None]:
# check total fuel after allocation
fuel_alloc[fuel_alloc["plant_id_eia"] == plant_to_investigate][
    ["fuel_consumed_mmbtu"]
].sum()

In [None]:
fuel_alloc[fuel_alloc["plant_id_eia"] == plant_to_investigate]

In [None]:
# check that allocation fractions sum to 1
fuel_alloc[fuel_alloc["plant_id_eia"] == plant_to_investigate].groupby(
    [
        "report_date",
        "plant_id_eia",
        "prime_mover_code",
        "energy_source_code",
    ]
)["frac"].sum()

In [None]:
# see where tehre is a difference between inputs and outputs
(
    fuel_alloc[fuel_alloc["plant_id_eia"] == plant_to_investigate]
    .groupby(
        [
            "report_date",
            "plant_id_eia",
            "prime_mover_code",
            "energy_source_code",
        ]
    )["fuel_consumed_mmbtu"]
    .sum()
    - gf[gf["plant_id_eia"] == plant_to_investigate]
    .groupby(
        [
            "report_date",
            "plant_id_eia",
            "prime_mover_code",
            "energy_source_code",
        ]
    )["fuel_consumed_mmbtu"]
    .sum()
)