In [None]:
# import packages
import numpy as np
import pandas as pd
import argparse
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../hourly-egrid/")

# import local modules
import src.data_cleaning as data_cleaning
import src.load_data as load_data
import src.impute_hourly_profiles as impute_hourly_profiles
import src.eia930 as eia930
import src.output_data as output_data

from src.column_checks import get_dtypes


# Investigate CEMS plants that report steam load

In [None]:
# load data from csv
year = 2020
path_prefix = ''

cems = pd.read_csv(f'../data/outputs/{path_prefix}{year}/cems_{year}.csv', dtype=get_dtypes())
eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}{year}/eia923_allocated_{year}.csv', dtype=get_dtypes())

In [None]:
cems_agg = cems.groupby(["plant_id_eia","subplant_id","report_date"]).sum()[["gross_generation_mwh","net_generation_mwh","steam_load_1000_lb","fuel_consumed_mmbtu", "fuel_consumed_for_electricity_mmbtu"]].reset_index()
eia_agg = eia923_allocated.groupby(["plant_id_eia","subplant_id","report_date"]).sum()[["net_generation_mwh","fuel_consumed_mmbtu", "fuel_consumed_for_electricity_mmbtu"]].reset_index()

cems_steam = cems_agg[cems_agg["steam_load_1000_lb"] > 0]
cems_steam = cems_steam.merge(eia_agg, how="left", on=["plant_id_eia","subplant_id","report_date"], suffixes=("_cems","_eia"))
cems_steam

# Get a list of all CHP plants

In [None]:
# get a list of all plants that are identified as CHP by sector in EIA-860
pudl_out = load_data.initialize_pudl_out(year)
chp_sectors = ['IPP CHP', 'Industrial CHP', 'Commercial CHP',]
chp_plants = pudl_out.plants_eia860()[["plant_id_eia","sector_name_eia"]]
chp_plants = chp_plants[chp_plants.sector_name_eia.isin(chp_sectors)]

# get a list of all plants that have a CHP flag associated with the generator in EIA-860
chp_gens = pudl_out.gens_eia860()[["plant_id_eia","generator_id","associated_combined_heat_power"]]
chp_gens = chp_gens[chp_gens["associated_combined_heat_power"] == True]
plants_with_chp_gens = chp_gens[["plant_id_eia","associated_combined_heat_power"]].drop_duplicates()


In [None]:
plants_with_chp_gens.merge(chp_plants, how="outer", on="plant_id_eia")

# Fix CHP Allocation

In [None]:
eia923_allocated = data_cleaning.calculate_electric_allocation_factor(eia923_allocated)
eia923_allocated

In [None]:
cems[(cems["plant_id_eia"] == 126)]#.sum()

In [None]:
eia923_allocated[(eia923_allocated["plant_id_eia"] == 126) & (eia923_allocated["prime_mover_code"] == "IC")].sum() #& (eia923_allocated["hourly_data_source"] == "cems")

In [None]:
plant_chp_allocation = eia923_allocated.groupby(["plant_id_eia","report_date"], dropna=False).sum()[["net_generation_mwh","fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
plant_chp_allocation["fuel_ratio"] = plant_chp_allocation["fuel_consumed_for_electricity_mmbtu"] / plant_chp_allocation["fuel_consumed_mmbtu"]
plant_chp_allocation.loc[(plant_chp_allocation["fuel_consumed_for_electricity_mmbtu"] == 0) & (plant_chp_allocation["fuel_consumed_mmbtu"] == 0),"fuel_ratio"] = 1
plant_chp_allocation = data_cleaning.calculate_electric_allocation_factor(plant_chp_allocation)
plant_chp_allocation

In [None]:
# calculate a subplant fuel ratio
subplant_fuel_ratio = eia923_allocated.groupby(["plant_id_eia", "subplant_id","report_date"], dropna=False).sum()[["fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
subplant_fuel_ratio["subplant_fuel_ratio"] = subplant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] / subplant_fuel_ratio["fuel_consumed_mmbtu"]
subplant_fuel_ratio.loc[(subplant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] == 0) & (subplant_fuel_ratio["fuel_consumed_mmbtu"] == 0),"subplant_fuel_ratio"] = 1
# calculate a plant fuel ratio to fill missing values where there is not a matching subplant in CEMS
plant_fuel_ratio = eia923_allocated.groupby(["plant_id_eia", "report_date"], dropna=False).sum()[["fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
plant_fuel_ratio["plant_fuel_ratio"] = plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] / plant_fuel_ratio["fuel_consumed_mmbtu"]
plant_fuel_ratio.loc[(plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] == 0) & (plant_fuel_ratio["fuel_consumed_mmbtu"] == 0),"plant_fuel_ratio"] = 1

In [None]:
subplant_fuel_ratio[subplant_fuel_ratio["plant_id_eia"] == 7]

In [None]:
plant_fuel_ratio[plant_fuel_ratio["plant_id_eia"] == 7]

In [None]:
def adjust_cems_for_chp(cems, eia923_allocated):
    """
    Adjusts CEMS fuel consumption and emissions data for CHP.

    Steps:
        1. Calculate the ratio between `fuel_consumed_for_electricity_mmbtu` and `fuel_consumed_mmbtu` in EIA-923
        2. Use this ratio to calculate a `fuel_consumed_for_electricity_mmbtu` from the `fuel_consumed_mmbtu` data reported in CEMS
        3. Calculate an electric allocation factor using the fuel and net generation data
        4. Use the allocation factor to adjust emissions
    Args:
        cems: dataframe of hourly cems data after cleaning and gross to net calculations
        eia923_allocated: dataframe of EIA-923 data after allocation
    """
    # calculate a subplant fuel ratio
    subplant_fuel_ratio = eia923_allocated.groupby(["plant_id_eia", "subplant_id","report_date"], dropna=False).sum()[["fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
    subplant_fuel_ratio["subplant_fuel_ratio"] = subplant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] / subplant_fuel_ratio["fuel_consumed_mmbtu"]
    subplant_fuel_ratio.loc[(subplant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] == 0) & (subplant_fuel_ratio["fuel_consumed_mmbtu"] == 0),"subplant_fuel_ratio"] = 1
    # calculate a plant fuel ratio to fill missing values where there is not a matching subplant in CEMS
    plant_fuel_ratio = eia923_allocated.groupby(["plant_id_eia", "report_date"], dropna=False).sum()[["fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
    plant_fuel_ratio["plant_fuel_ratio"] = plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] / plant_fuel_ratio["fuel_consumed_mmbtu"]
    plant_fuel_ratio.loc[(plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] == 0) & (plant_fuel_ratio["fuel_consumed_mmbtu"] == 0),"plant_fuel_ratio"] = 1

    # merge the fuel ratios into cems and fill missing subplant ratios with plant ratios
    cems = cems.merge(subplant_fuel_ratio[["plant_id_eia", "subplant_id","report_date", "subplant_fuel_ratio"]], how="left", on=["plant_id_eia", "subplant_id","report_date"])
    cems = cems.merge(plant_fuel_ratio[["plant_id_eia", "report_date", "plant_fuel_ratio"]], how="left", on=["plant_id_eia", "report_date"])
    cems["subplant_fuel_ratio"] = cems["subplant_fuel_ratio"].fillna(cems["plant_fuel_ratio"])

    # if there are any missing ratios, assume that the ratio is 1
    cems["subplant_fuel_ratio"] = cems["subplant_fuel_ratio"].fillna(1)

    # calculate fuel_consumed_for_electricity_mmbtu
    cems["fuel_consumed_for_electricity_mmbtu"] = cems["fuel_consumed_mmbtu"] * cems["subplant_fuel_ratio"]

    # add adjusted emissions columns
    # TODO: remove data_cleaning
    cems = data_cleaning.adjust_emissions_for_CHP(cems)

    return cems
    


In [None]:
cems = cems.drop(columns=["fuel_consumed_for_electricity_mmbtu","co2_mass_lb_for_electricity","ch4_mass_lb_for_electricity","n2o_mass_lb_for_electricity","nox_mass_lb_for_electricity","so2_mass_lb_for_electricity","fuel_ratio"])

In [None]:
cems = adjust_cems_for_chp(cems, eia923_allocated)

In [None]:
eia923_allocated[eia923_allocated["plant_id_eia"] == 2018].sum()

In [None]:
cems[(cems["plant_id_eia"] == 7)]#.sum()

In [None]:
plant_fuel_ratio = eia923_allocated.groupby(["plant_id_eia", "report_date"], dropna=False).sum()[["fuel_consumed_mmbtu","fuel_consumed_for_electricity_mmbtu"]].reset_index()
plant_fuel_ratio["fuel_ratio"] = plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] / plant_fuel_ratio["fuel_consumed_mmbtu"]
plant_fuel_ratio.loc[(plant_fuel_ratio["fuel_consumed_for_electricity_mmbtu"] == 0) & (plant_fuel_ratio["fuel_consumed_mmbtu"] == 0),"fuel_ratio"] = 1