In [1]:
import pandas as pd

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../hourly-egrid/")
from src.column_checks import get_dtypes
import src.load_data as load_data

# NOX

When calculating NOx:
Apply emission rates in the following order based on availability:  
    1. boiler-specific ozone-season nox emission rates reported in eia923 schedule 8C  
    2. boiler-specific annual nox emission rates reported in eia923 scehdule 8C  
    3. fuel, prime mover, and boiler firing type specific emission factors  


When calculating nox for gen fuel:
1. First calculate based on fuel, prime mover, and boiler firing type specific emission factors  
2. Then once we've aggregated by generator, ig 

Control-specific emission rates:
- [x] Load control-specific nox rates from EIA-923 8C
- [x] Load control-boiler associations
- [ ] Load control attributes
- [ ] Map controls to boilers
- [ ] Map boilers to generators 
- [ ] Calculate generator-specific emission rates

Boiler firing types

In [2]:
year = 2020

## Export current NOx and 

In [28]:
import pudl.analysis.allocate_net_gen as allocate_gen_fuel
import src.data_cleaning as data_cleaning
import src.validation as validation

pudl_out = load_data.initialize_pudl_out(year=year)

# allocate net generation and heat input to each generator-fuel grouping
gen_fuel_allocated = allocate_gen_fuel.allocate_gen_fuel_by_generator_energy_source(
    pudl_out, drop_interim_cols=True
)

# manually update energy source code when OTH
gen_fuel_allocated = data_cleaning.update_energy_source_codes(gen_fuel_allocated)

# round all values to the nearest tenth of a unit
gen_fuel_allocated.loc[
    :,
    [
        "net_generation_mwh",
        "fuel_consumed_mmbtu",
        "fuel_consumed_for_electricity_mmbtu",
    ],
] = gen_fuel_allocated.loc[
    :,
    [
        "net_generation_mwh",
        "fuel_consumed_mmbtu",
        "fuel_consumed_for_electricity_mmbtu",
    ],
].round(
    1
)

validation.test_for_missing_energy_source_code(gen_fuel_allocated)
validation.test_for_negative_values(gen_fuel_allocated)

# create a table that identifies the primary fuel of each generator and plant
primary_fuel_table = data_cleaning.create_primary_fuel_table(gen_fuel_allocated, pudl_out)


# calculate co2 emissions for each generator-fuel based on allocated fuel consumption
gen_fuel_allocated = data_cleaning.calculate_ghg_emissions_from_fuel_consumption(
    df=gen_fuel_allocated,
    year=year,
    include_co2=True,
    include_ch4=True,
    include_n2o=True,
)



     report_date  plant_id_eia generator_id prime_mover_code  unit_id_pudl operational_status energy_source_code_1
7276  2020-01-01         59012        STEAM             <NA>          <NA>               <NA>                     
    Checking that there are no missing energy source codes associated with non-zero fuel consumption...  OK
    Checking that fuel and emissions values are positive...  OK


In [40]:
gen_fuel_allocated

Unnamed: 0,report_date,plant_id_eia,energy_source_code,prime_mover_code,generator_id,energy_source_code_num,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb
0,2020-01-01,1,DFO,IC,1,energy_source_code_1,,,,,,
1,2020-01-01,1,DFO,IC,2,energy_source_code_1,,,,,,
2,2020-01-01,1,DFO,IC,3,energy_source_code_1,,,,,,
3,2020-01-01,1,DFO,IC,5,energy_source_code_1,,,,,,
4,2020-01-01,1,WND,WT,WT1,energy_source_code_1,15.2,133.0,133.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
354380,2020-12-01,64816,NG,IC,GEN1,energy_source_code_1,,,,,,
354381,2020-12-01,64816,NG,IC,GEN2,energy_source_code_1,,,,,,
354382,2020-12-01,64816,NG,IC,GEN3,energy_source_code_1,,,,,,
354383,2020-12-01,64836,SUN,PV,CATAL,energy_source_code_1,,,,,,


In [60]:
gen_fuel_allocated[gen_fuel_allocated.duplicated(subset=["report_date","plant_id_eia","generator_id","prime_mover_code","energy_source_code"], keep=False)]

Unnamed: 0,report_date,plant_id_eia,energy_source_code,prime_mover_code,generator_id,energy_source_code_num,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb
12132,2020-01-01,10426,DFO,ST,GEN2,energy_source_code_5,0.0,0.0,0.0,0.000000e+00,0.000000,0.000000
12133,2020-01-01,10426,DFO,ST,GEN3,energy_source_code_5,0.0,0.0,0.0,0.000000e+00,0.000000,0.000000
12134,2020-01-01,10426,DFO,ST,GEN4,energy_source_code_5,0.0,0.0,0.0,0.000000e+00,0.000000,0.000000
12135,2020-01-01,10426,DFO,ST,GEN5,energy_source_code_5,0.0,0.0,0.0,0.000000e+00,0.000000,0.000000
12140,2020-01-01,10426,DFO,ST,GEN2,energy_source_code_4,8.4,266.6,27.7,4.354111e+04,1.762226,0.351912
...,...,...,...,...,...,...,...,...,...,...,...,...
336953,2020-12-01,10426,DFO,ST,GEN3,energy_source_code_4,7.7,259.4,29.3,4.236521e+04,1.714634,0.342408
336954,2020-12-01,10426,DFO,ST,GEN4,energy_source_code_4,30.6,1141.6,128.7,1.864461e+05,7.545976,1.506912
336955,2020-12-01,10426,DFO,ST,GEN5,energy_source_code_4,48.4,1992.5,224.7,3.254151e+05,13.170425,2.630100
343800,2020-12-01,56258,DFO,GT,CT1,energy_source_code_2,236.1,2172.0,2172.0,3.547310e+05,14.356920,2.867040


In [None]:
def load_boiler_firing_type(year):

    boiler_design_parameters_eia860 = load_data.load_boiler_design_parameters_eia860(year)

    firing_type_description = {"CB":"CELLBURNER",
                                "CY":"CYCLONE",
                                "DB":"DUCTBURNER",
                                "FB":"FLUIDIZED",
                                "SS":"STOKER",
                                "TF":"TANGENTIAL",
                                "VF":"VERTICAL",
                                "WF":"WALL",
                                "OT":"OTHER",
                                }

    # only keep boilers that are operational
    boiler_firing_type = boiler_design_parameters_eia860.copy()[boiler_design_parameters_eia860["operational_status"] == "OP"]

    boiler_firing_type["boiler_firing_type"] = boiler_firing_type["firing_type_1"].map(firing_type_description)

    boiler_firing_type[
        "boiler_bottom_type"
    ] = boiler_firing_type["boiler_bottom_type"].replace(
        {"D": "DRY", "W": "WET"}
    )

    boiler_firing_type = boiler_firing_type[["plant_id_eia","boiler_id","boiler_bottom_type","boiler_firing_type"]].dropna(subset=["boiler_bottom_type","boiler_firing_type"], thresh=1)

    return boiler_firing_type

In [94]:
gen_fuel_allocated[gen_fuel_allocated["fuel_consumed_mmbtu"] == 0]

Unnamed: 0,report_date,plant_id_eia,energy_source_code,prime_mover_code,generator_id,energy_source_code_num,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb
23,2020-01-01,7,NG,ST,2,energy_source_code_1,-335.0,0.0,0.0,0.0,0.0,0.0
24,2020-01-01,9,DFO,GT,1,energy_source_code_2,0.0,0.0,0.0,0.0,0.0,0.0
26,2020-01-01,10,DFO,GT,GT10,energy_source_code_2,0.0,0.0,0.0,0.0,0.0,0.0
27,2020-01-01,10,DFO,GT,GT2,energy_source_code_2,0.0,0.0,0.0,0.0,0.0,0.0
28,2020-01-01,10,DFO,GT,GT3,energy_source_code_2,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
354238,2020-12-01,64432,MWH,BA,BBA,energy_source_code_1,0.0,0.0,0.0,0.0,0.0,0.0
354241,2020-12-01,64436,MWH,BA,WILB,energy_source_code_1,0.0,0.0,0.0,0.0,0.0,0.0
354256,2020-12-01,64460,SUN,PV,FACJF,energy_source_code_1,0.0,0.0,0.0,0.0,0.0,0.0
354275,2020-12-01,64523,SUN,PV,ATASC,energy_source_code_1,0.0,0.0,0.0,0.0,0.0,0.0


In [116]:
# get a dataframe with all unique generator-pm-esc combinations for emitting energy source types with data reported
CLEAN_FUELS = ["SUN", "MWH", "WND", "WAT", "WH", "PUR", "NUC"]

gen_keys_for_nox = gen_fuel_allocated.copy()[~gen_fuel_allocated["fuel_consumed_mmbtu"].isna()]
gen_keys_for_nox = gen_keys_for_nox[["plant_id_eia","generator_id","prime_mover_code","energy_source_code"]].drop_duplicates()
gen_keys_for_nox = gen_keys_for_nox[~gen_keys_for_nox["energy_source_code"].isin(CLEAN_FUELS)]
gen_keys_for_nox

Unnamed: 0,plant_id_eia,generator_id,prime_mover_code,energy_source_code
22,7,1,ST,NG
23,7,2,ST,NG
24,9,1,GT,DFO
25,9,1,GT,NG
26,10,GT10,GT,DFO
...,...,...,...,...
335858,7947,1,GT,DFO
339685,52061,GEN3,CT,DFO
341502,54950,CTG1,CT,DFO
354327,64748,CR18A,FC,NG


In [117]:
nox_emission_factors = load_data.load_nox_emission_factors()
# remove duplicate factors
nox_emission_factors = nox_emission_factors.drop_duplicates(subset=["prime_mover_code","energy_source_code","boiler_bottom_type","boiler_firing_type"])

# create a table of factors for pm-fuel records that don't have boiler-specific emission factors
#nox_emission_factors_pm_fuel = nox_emission_factors[nox_emission_factors["boiler_bottom_type"].isna() & nox_emission_factors["boiler_firing_type"].isna()].drop(columns=["boiler_bottom_type","boiler_firing_type"])

In [118]:
# load the boiler firing type info
boiler_firing_type = load_boiler_firing_type(year)

# identify the boiler firing type for each generator
boiler_generator_assn = load_data.initialize_pudl_out(year).bga_eia860()
# associate a generator_id with each record
boiler_firing_type = boiler_firing_type.merge(
    boiler_generator_assn[["plant_id_eia", "boiler_id", "generator_id"]],
    how="left",
    on=["plant_id_eia", "boiler_id"],
)

# merge the gen keys with the boiler firing types
gen_nox_factors = gen_keys_for_nox.merge(boiler_firing_type, how="left", on=["plant_id_eia","generator_id"], validate="m:m")
gen_nox_factors

Unnamed: 0,plant_id_eia,generator_id,prime_mover_code,energy_source_code,boiler_id,boiler_bottom_type,boiler_firing_type
0,7,1,ST,NG,1,DRY,TANGENTIAL
1,7,2,ST,NG,2,DRY,TANGENTIAL
2,9,1,GT,DFO,,,
3,9,1,GT,NG,,,
4,10,GT10,GT,DFO,,,
...,...,...,...,...,...,...,...
22449,7947,1,GT,DFO,,,
22450,52061,GEN3,CT,DFO,,,
22451,54950,CTG1,CT,DFO,,,
22452,64748,CR18A,FC,NG,,,


In [119]:
# merge in the emission factors for spedcific boiler types
gen_nox_factors = gen_nox_factors.merge(nox_emission_factors, how="left", on=["prime_mover_code","energy_source_code","boiler_bottom_type","boiler_firing_type"], validate="m:1")
# merge in pm-fuel specific emission factors and use to fill efs where boiler-specific factors are not available
#gen_nox_factors = gen_nox_factors.merge(nox_emission_factors_pm_fuel, how="left", on=["prime_mover_code","energy_source_code"], suffixes=(None,"_pm_fuel"), validate="m:1")
#for col in ["emission_factor","emission_factor_numerator","emission_factor_denominator"]:
#    gen_nox_factors[col] = gen_nox_factors[col].fillna(gen_nox_factors[f"{col}_pm_fuel"])
gen_nox_factors

Unnamed: 0,plant_id_eia,generator_id,prime_mover_code,energy_source_code,boiler_id,boiler_bottom_type,boiler_firing_type,emission_factor,emission_factor_numerator,emission_factor_denominator
0,7,1,ST,NG,1,DRY,TANGENTIAL,0.17,lb,mcf
1,7,2,ST,NG,2,DRY,TANGENTIAL,0.17,lb,mcf
2,9,1,GT,DFO,,,,5.124,lb,barrels
3,9,1,GT,NG,,,,0.32,lb,mmbtu
4,10,GT10,GT,DFO,,,,5.124,lb,barrels
...,...,...,...,...,...,...,...,...,...,...
22449,7947,1,GT,DFO,,,,5.124,lb,barrels
22450,52061,GEN3,CT,DFO,,,,5.124,lb,barrels
22451,54950,CTG1,CT,DFO,,,,5.124,lb,barrels
22452,64748,CR18A,FC,NG,,,,,lb,mcf


In [121]:
missing_nox_efs = gen_nox_factors.loc[gen_nox_factors["emission_factor"].isna(), ["prime_mover_code","energy_source_code","boiler_bottom_type","boiler_firing_type"]].drop_duplicates().sort_values(by=["energy_source_code","prime_mover_code","boiler_firing_type","boiler_bottom_type",])
if len(missing_nox_efs) > 0:
    print("Warning: NOx emission factors are missing for the following boiler types. An emission factor of zero will be used for these boilers.")
    print(missing_nox_efs)

      prime_mover_code energy_source_code boiler_bottom_type boiler_firing_type
20403               FC                LFG                NaN                NaN
6931                CE                 NG                NaN                NaN
13248               FC                 NG                NaN                NaN
19938               FC                OBG                NaN                NaN


In [87]:
gen_nox_factors[gen_nox_factors.duplicated(subset=["plant_id_eia","generator_id","prime_mover_code","energy_source_code"], keep=False)]

Unnamed: 0,plant_id_eia,generator_id,prime_mover_code,energy_source_code,boiler_id,boiler_bottom_type,boiler_firing_type,emission_factor,emission_factor_numerator,emission_factor_denominator,emission_factor_pm_fuel,emission_factor_numerator_pm_fuel,emission_factor_denominator_pm_fuel
6,3,A1ST,CA,NG,6A,,DUCTBURNER,0.328,lb,mcf,0.320,lb,mmbtu
7,3,A1ST,CA,NG,6B,,DUCTBURNER,0.328,lb,mcf,0.320,lb,mmbtu
8,3,A2ST,CA,NG,7A,,DUCTBURNER,0.328,lb,mcf,0.320,lb,mmbtu
9,3,A2ST,CA,NG,7B,,DUCTBURNER,0.328,lb,mcf,0.320,lb,mmbtu
10,3,A1CT,CT,NG,6A,,DUCTBURNER,0.328,lb,mcf,0.328,lb,mcf
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22917,64020,STG2,CA,NG,HRSG4,,OTHER,0.320,lb,mmbtu,0.320,lb,mmbtu
22918,64020,CTG3,CT,NG,HRSG3,,OTHER,0.328,lb,mcf,0.328,lb,mcf
22919,64020,CTG3,CT,NG,HRSG4,,OTHER,0.328,lb,mcf,0.328,lb,mcf
22920,64020,CTG4,CT,NG,HRSG3,,OTHER,0.328,lb,mcf,0.328,lb,mcf


In [44]:
boiler_firing_type = boiler_firing_type.drop_duplicates(subset=["plant_id_eia","generator_id","boiler_bottom_type","boiler_firing_type"])

In [45]:
boiler_firing_type[boiler_firing_type.duplicated(subset=["plant_id_eia","generator_id"], keep=False)]

Unnamed: 0,plant_id_eia,boiler_id,boiler_bottom_type,boiler_firing_type,generator_id
184,609,1,,CELLBURNER,3ST
185,609,1,,CELLBURNER,3A
186,609,1,,CELLBURNER,3B
187,609,1,,CELLBURNER,3C
188,609,2,,DUCTBURNER,3ST
...,...,...,...,...,...
3999,57664,HRSG3,,DUCTBURNER,CT3
4000,57664,HRSG3,,DUCTBURNER,CT4
4001,57664,HRSG4,DRY,DUCTBURNER,ST2
4002,57664,HRSG4,DRY,DUCTBURNER,CT3


### Boiler and Generator-Specific NOx rates

In [139]:
def calculate_boiler_specific_nox_rates(year):
    nox_rates = load_nox_emission_rates(year)
    nox_rates = calculate_non_ozone_season_nox_rate(nox_rates)
    weighted_nox_rates = calculate_weighted_nox_rates(year, nox_rates, "boiler_id")
    
    return weighted_nox_rates

def calculate_generator_specific_nox_rates(year):
    nox_rates = load_nox_emission_rates(year)
    nox_rates = calculate_non_ozone_season_nox_rate(nox_rates)
    weighted_nox_rates = calculate_weighted_nox_rates(year, nox_rates, "generator_id")
    
    return weighted_nox_rates

def load_nox_emission_rates(year):
    # load the emissions control data
    emissions_controls_eia923 = load_data.load_emissions_controls_eia923(year)

    # create a dataframe that contains only operating NOx emission control data
    nox_rates = emissions_controls_eia923[
        ~emissions_controls_eia923["nox_control_id"].isna()
    ]
    nox_rates = nox_rates[nox_rates["operational_status"] == "OP"]
    nox_rates = nox_rates[
        [
            "plant_id_eia",
            "nox_control_id",
            "hours_in_service",
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
        ]
    ].dropna(
        subset=[
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
        ],
        thresh=1,
    )

    return nox_rates


def calculate_weighted_nox_rates(year, nox_rates, aggregation_level):
    """Aggregates nox rate data from nox_control_id to generator_id"""
    # load the association tables
    boiler_nox_association_eia860 = load_data.load_boiler_nox_association_eia860(year)
    
    # merge boiler ids associated with each nox_control_id
    nox_rates = nox_rates.merge(
        boiler_nox_association_eia860[
            [
                "plant_id_eia",
                "nox_control_id",
                "boiler_id",
            ]
        ],
        how="left",
        on=["plant_id_eia", "nox_control_id"],
    )

    # if there are any missing boiler_ids, fill using the nox_control_id, which is likely to match a boiler
    nox_rates["boiler_id"] = nox_rates["boiler_id"].fillna(nox_rates["nox_control_id"])

    if aggregation_level == "generator_id":
        boiler_generator_assn = load_data.initialize_pudl_out(year).bga_eia860()
        # associate a generator_id with each record
        nox_rates = nox_rates.merge(
            boiler_generator_assn[["plant_id_eia", "boiler_id", "generator_id"]],
            how="left",
            on=["plant_id_eia", "boiler_id"],
        )

    # calculate a weighted average for each boiler or generator
    weighted_nox_rates = calculate_weighted_averages(
        nox_rates,
        groupby_columns=["plant_id_eia", aggregation_level],
        data_cols=[
            "annual_nox_emission_rate_lb_per_mmbtu",
            "ozone_season_nox_emission_rate_lb_per_mmbtu",
            "non_ozone_season_nox_emission_rate_lb_per_mmbtu",
        ],
        weight_col="hours_in_service",
    )
    return weighted_nox_rates


def calculate_weighted_averages(df, groupby_columns, data_cols, weight_col):
    """helper function for calculating weighted averages of one or more columns in a dataframe."""
    wa = df.copy()
    for data_col in data_cols:
        wa[f"{data_col}_data_times_weight"] = wa[data_col] * wa[weight_col]
        wa[f"{data_col}_weight_where_notnull"] = wa[weight_col] * pd.notnull(
            wa[data_col]
        )
    g = wa.groupby(groupby_columns, dropna=False)
    result = pd.DataFrame()
    for data_col in data_cols:
        result[data_col] = (
            g[f"{data_col}_data_times_weight"].sum()
            / g[f"{data_col}_weight_where_notnull"].sum()
        )
    result = result.reset_index()

    return result

def calculate_non_ozone_season_nox_rate(weighted_nox_rates):
    annual_col = "annual_nox_emission_rate_lb_per_mmbtu"
    oz_col = "ozone_season_nox_emission_rate_lb_per_mmbtu"
    non_oz_col = "non_ozone_season_nox_emission_rate_lb_per_mmbtu"

    # ozone season is May - Sept (5 months). 
    # To get the average emission rate for the 7 non-ozone season months, we assume similar operation across all months
    # annual_avg = [(5* oz_avg) + (7 * non_oz_avg)] / 12
    weighted_nox_rates[non_oz_col] = ((12 * weighted_nox_rates[annual_col]) - (5 * weighted_nox_rates[oz_col])) / 7

    # if there are any rates that we calculate as negative, replace with zero
    weighted_nox_rates.loc[weighted_nox_rates[non_oz_col] < 0, non_oz_col] = 0

    return weighted_nox_rates
