# About this notebook
This notebook is used for testing code related to allocating the EIA-923 data before integrating it into PUDL and the data pipeline. 

I've also been using this to simply load EIA tables from the PUDL database to explore the EIA data as it exists in PUDL.

In [1]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../../../hourly-egrid/')

# Useful high-level external modules.
import numpy as np
import pandas as pd
import sqlalchemy as sa
import warnings
import plotly.express as px

import pudl

import src.data_cleaning as data_cleaning
import src.load_data as load_data
import pudl.analysis.allocate_net_gen as allocate_gen_fuel


In [23]:
year = 2020

pudl_db = 'sqlite:///../data/downloads/pudl/pudl_data/sqlite/pudl.sqlite'
pudl_engine = sa.create_engine(pudl_db)

pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,
    freq='MS',
    start_date=f'{year}-01-01',
    end_date=f'{year}-12-31'
)

## Load EIA data

In [None]:
# extract all of the tables from pudl_out early in the process and select
# only the columns we need. this is for speed and clarity.

remove_this = pudl.analysis.allocate_net_gen
drop_interim_cols = False

IDX_GENS = ["report_date", "plant_id_eia", "generator_id"]
"""Id columns for generators."""

IDX_GENS_PM_ESC = [
    "report_date",
    "plant_id_eia",
    "generator_id",
    "prime_mover_code",
    "energy_source_code",
]

IDX_PM_ESC = ["report_date", "plant_id_eia", "energy_source_code", "prime_mover_code"]
"""Id columns for plant, prime mover & fuel type records."""

IDX_B_PM_ESC = [
    "report_date",
    "plant_id_eia",
    "boiler_id",
    "energy_source_code",
    "prime_mover_code",
]
"""Id columns for plant, boiler, prime mover & fuel type records."""

IDX_ESC = ["report_date", "plant_id_eia", "energy_source_code"]

IDX_U_ESC = ["report_date", "plant_id_eia", "energy_source_code", "unit_id_pudl"]

# extract all of the tables from pudl_out early in the process and select
# only the columns we need. this is for speed and clarity.
gens = pudl_out.gens_eia860().loc[
    :,
    IDX_GENS
    + [
        "prime_mover_code",
        "unit_id_pudl",
        "capacity_mw",
        "fuel_type_count",
        "operational_status",
        "retirement_date",
    ]
    + list(pudl_out.gens_eia860().filter(like="energy_source_code"))
    + list(pudl_out.gens_eia860().filter(like="startup_source_code")),
]
gf = (
    pudl_out.gf_eia923()
    .loc[
        :,
        IDX_PM_ESC
        + [
            "net_generation_mwh",
            "fuel_consumed_mmbtu",
            "fuel_consumed_for_electricity_mmbtu",
        ],
    ]
    .pipe(remove_this.manually_fix_energy_source_codes)
)
bf = (
    pudl_out.bf_eia923()
    .merge(
        pd.read_sql("boilers_entity_eia", pudl_out.pudl_engine),
        how="left",
        on=["plant_id_eia", "boiler_id"],
    )
    .loc[:, IDX_B_PM_ESC + ["fuel_consumed_mmbtu"]]
).pipe(
    remove_this.distribute_annually_reported_data_to_months,
    key_columns=["plant_id_eia", "boiler_id", "energy_source_code"],
    data_column_name="fuel_consumed_mmbtu",
)
# allocate the boiler fuel data to generators
bf = remove_this.allocate_bf_data_to_gens(bf, gens, pudl_out)

# add any startup energy source codes to the list of energy source codes
# fix MSW codes
gens = remove_this.adjust_energy_source_codes(gens, gf, bf)
# fix prime mover codes in gens so that they match the codes in the gf table
missing_pm = gens[gens["prime_mover_code"].isna()]
if not missing_pm.empty:
    warnings.warn(
        f"{len(missing_pm)} generators are missing prime mover codes in gens_eia860. "
        "This will result in incorrect allocation."
    )
# duplicate each entry in the gens table 12 times to create an entry for each month of the year
gens = remove_this.create_monthly_gens_records(gens)

gen = (
    pudl_out.gen_original_eia923().loc[:, IDX_GENS + ["net_generation_mwh"]]
    # removes 4 records with NaN generator_id as of pudl v0.5
    .dropna(subset=IDX_GENS)
).pipe(
    remove_this.distribute_annually_reported_data_to_months,
    key_columns=["plant_id_eia", "generator_id"],
    data_column_name="net_generation_mwh",
)

# the gen table is missing some generator ids. Let's fill this using the gens table, leaving a missing value for net generation
gen = gen.merge(
    gens[["plant_id_eia", "generator_id", "report_date"]],
    how="outer",
    on=["plant_id_eia", "generator_id", "report_date"],
)

In [None]:
# do the association!
gen_assoc = remove_this.associate_generator_tables(
    gf=gf, gen=gen, gens=gens, bf=bf, pudl_out=pudl_out
)

In [None]:
gen_assoc[(gen_assoc[["fuel_consumed_mmbtu_gf_tbl","fuel_consumed_for_electricity_mmbtu_gf_tbl","fuel_consumed_mmbtu_bf_tbl","fuel_consumed_mmbtu_gf_tbl_fuel","fuel_consumed_for_electricity_mmbtu_gf_tbl_fuel"]] < 0).any(axis=1)]

In [None]:
# Generate a fraction to use to allocate net generation and fuel consumption by.
# These two methods create a column called `frac`, which will be a fraction
# to allocate net generation from the gf table for each `IDX_PM_ESC` group
gen_pm_fuel = remove_this.prep_alloction_fraction(gen_assoc)

In [None]:
gen_pm_fuel

In [None]:
gen_pm_fuel[(gen_pm_fuel[["fuel_consumed_mmbtu_gf_tbl","fuel_consumed_for_electricity_mmbtu_gf_tbl","fuel_consumed_mmbtu_bf_tbl","fuel_consumed_mmbtu_gf_tbl_fuel","fuel_consumed_for_electricity_mmbtu_gf_tbl_fuel","fuel_consumed_mmbtu_bf_tbl_pm_fuel","fuel_consumed_mmbtu_bf_tbl_unit_fuel"]] < 0).any(axis=1)]

In [None]:
# fuel allocation
fuel_alloc = remove_this.allocate_fuel_by_gen_esc(gen_pm_fuel)

In [None]:
plant_id_to_investigate = 3399

In [None]:
gen_assoc[gen_assoc['plant_id_eia'] == plant_id_to_investigate]

In [None]:
gens[gens['plant_id_eia'] == plant_id_to_investigate]

In [None]:
gens.loc[gens['prime_mover_code'].isna(), ['plant_id_eia','generator_id']].drop_duplicates()

In [None]:
gens[gens['generator_id'].str.contains('ST')]

In [None]:
gen[gen['plant_id_eia'] == plant_id_to_investigate]

In [None]:
gf[gf['plant_id_eia'] == plant_id_to_investigate]

In [None]:
bf[(bf['plant_id_eia'] == plant_id_to_investigate) & (bf["report_date"] == "2020-01-01")]