In [3]:
import pandas as pd

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../hourly-egrid/")
from src.column_checks import get_dtypes
import src.load_data as load_data

# NOX

When calculating NOx:
Apply emission rates in the following order based on availability:  
    1. boiler-specific ozone-season nox emission rates reported in eia923 schedule 8C  
    2. boiler-specific annual nox emission rates reported in eia923 scehdule 8C  
    3. fuel, prime mover, and boiler firing type specific emission factors  


When calculating nox for gen fuel:
1. First calculate based on fuel, prime mover, and boiler firing type specific emission factors  
2. Then once we've aggregated by generator, ig 

Control-specific emission rates:
- [x] Load control-specific nox rates from EIA-923 8C
- [x] Load control-boiler associations
- [ ] Load control attributes
- [ ] Map controls to boilers
- [ ] Map boilers to generators 
- [ ] Calculate generator-specific emission rates

Boiler firing types

In [122]:
year = 2020

## Export current NOx and 

### Load boiler firing type data

In [147]:
boiler_design_parameters_eia860 = load_data.load_boiler_design_parameters_eia860(year)

# TODO: Apply dtypes to this dataframe
boiler_design_parameters_eia860

Unnamed: 0,plant_id_eia,boiler_id,operational_status,firing_type_1,firing_type_2,firing_type_3,boiler_bottom_type
0,3,1,OP,TF,,,DRY
1,3,2,OP,TF,,,DRY
2,3,3,RE,TF,,,DRY
3,3,4,OP,TF,,,DRY
4,3,5,OP,TF,,,DRY
...,...,...,...,...,...,...,...
4630,62949,HRSG2,CO,DB,,,
4631,62949,HRSG3,CO,DB,,,
4632,64020,HRSG3,OP,OT,,,
4633,64020,HRSG4,OP,OT,,,


In [148]:
boiler_design_parameters_eia860.boiler_bottom_type.unique()

array(['DRY', nan, 'WET'], dtype=object)

In [None]:
firing_type_description = {"CB":"CELLBURNER",
                            "CY":"CYCLONE",
                            "DB":"DUCTBURNER",
                            "FB":"FLUIDIZED",
                            "SS":"STOKER",
                            "TF":"TANGENTIAL",
                            "VF":"VERTICAL",
                            "WF":"WALL",
                            "OT":"OTHER",
                            }

In [146]:
boiler_design_parameters_eia860[boiler_design_parameters_eia860.duplicated(subset=["plant_id_eia","boiler_id"], keep=False)]

Unnamed: 0,plant_id_eia,boiler_id,operational_status,firing_type_1,firing_type_2,firing_type_3,boiler_bottom_type


### Boiler and Generator-Specific NOx rates

In [139]:
def calculate_boiler_specific_nox_rates(year):
    nox_rates = load_nox_emission_rates(year)
    nox_rates = calculate_non_ozone_season_nox_rate(nox_rates)
    weighted_nox_rates = calculate_weighted_nox_rates(year, nox_rates, "boiler_id")
    
    return weighted_nox_rates

def calculate_generator_specific_nox_rates(year):
    nox_rates = load_nox_emission_rates(year)
    nox_rates = calculate_non_ozone_season_nox_rate(nox_rates)
    weighted_nox_rates = calculate_weighted_nox_rates(year, nox_rates, "generator_id")
    
    return weighted_nox_rates

def load_nox_emission_rates(year):
    # load the emissions control data
    emissions_controls_eia923 = load_data.load_emissions_controls_eia923(year)

    # create a dataframe that contains only operating NOx emission control data
    nox_rates = emissions_controls_eia923[
        ~emissions_controls_eia923["nox_control_id"].isna()
    ]
    nox_rates = nox_rates[nox_rates["operational_status"] == "OP"]
    nox_rates = nox_rates[
        [
            "plant_id_eia",
            "nox_control_id",
            "hours_in_service",
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
        ]
    ].dropna(
        subset=[
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
        ],
        thresh=1,
    )

    return nox_rates


def calculate_weighted_nox_rates(year, nox_rates, aggregation_level):
    """Aggregates nox rate data from nox_control_id to generator_id"""
    # load the association tables
    boiler_nox_association_eia860 = load_data.load_boiler_nox_association_eia860(year)
    
    # merge boiler ids associated with each nox_control_id
    nox_rates = nox_rates.merge(
        boiler_nox_association_eia860[
            [
                "plant_id_eia",
                "nox_control_id",
                "boiler_id",
            ]
        ],
        how="left",
        on=["plant_id_eia", "nox_control_id"],
    )

    # if there are any missing boiler_ids, fill using the nox_control_id, which is likely to match a boiler
    nox_rates["boiler_id"] = nox_rates["boiler_id"].fillna(nox_rates["nox_control_id"])

    if aggregation_level == "generator_id":
        boiler_generator_assn = load_data.initialize_pudl_out(year).bga_eia860()
        # associate a generator_id with each record
        nox_rates = nox_rates.merge(
            boiler_generator_assn[["plant_id_eia", "boiler_id", "generator_id"]],
            how="left",
            on=["plant_id_eia", "boiler_id"],
        )

    # calculate a weighted average for each boiler or generator
    weighted_nox_rates = calculate_weighted_averages(
        nox_rates,
        groupby_columns=["plant_id_eia", aggregation_level],
        data_cols=[
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
            "non_ozone_season_nox_emission_rate_lb_per_mmbtu",
        ],
        weight_col="hours_in_service",
    )
    return weighted_nox_rates


def calculate_weighted_averages(df, groupby_columns, data_cols, weight_col):
    """helper function for calculating weighted averages of one or more columns in a dataframe."""
    wa = df.copy()
    for data_col in data_cols:
        wa[f"{data_col}_data_times_weight"] = wa[data_col] * wa[weight_col]
        wa[f"{data_col}_weight_where_notnull"] = wa[weight_col] * pd.notnull(
            wa[data_col]
        )
    g = wa.groupby(groupby_columns, dropna=False)
    result = pd.DataFrame()
    for data_col in data_cols:
        result[data_col] = (
            g[f"{data_col}_data_times_weight"].sum()
            / g[f"{data_col}_weight_where_notnull"].sum()
        )
    result = result.reset_index()

    return result

def calculate_non_ozone_season_nox_rate(weighted_nox_rates):
    annual_col = "annual_nox_emission_rate_lb_per_mmbtu"
    oz_col = "ozone_season_nox_emission_rate_lb_per_mmbtu"
    non_oz_col = "non_ozone_season_nox_emission_rate_lb_per_mmbtu"

    # ozone season is May - Sept (5 months). 
    # To get the average emission rate for the 7 non-ozone season months, we assume similar operation across all months
    # annual_avg = [(5* oz_avg) + (7 * non_oz_avg)] / 12
    weighted_nox_rates[non_oz_col] = ((12 * weighted_nox_rates[annual_col]) - (5 * weighted_nox_rates[oz_col])) / 7

    # if there are any rates that we calculate as negative, replace with zero
    weighted_nox_rates.loc[weighted_nox_rates[non_oz_col] < 0, non_oz_col] = 0

    return weighted_nox_rates
