In [209]:
import pandas as pd
import numpy as np

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../open-grid-emissions/")
from src.column_checks import get_dtypes
import src.load_data as load_data
from src.data_cleaning import *
from src.emissions import *

from pudl.analysis.allocate_net_gen import distribute_annually_reported_data_to_months

# Filling missing nox so2 in CEMS

In [222]:
year = 2020

In [224]:
# load the CEMS data
cems = load_data.load_cems_data(year)


# remove non-grid connected plants
cems = remove_plants(
    cems,
    non_grid_connected=True,
    remove_states=["PR"],
    steam_only_plants=False,
    distribution_connected_plants=False,
)

# manually remove steam-only units
cems = manually_remove_steam_units(cems)

# add a report date
cems = load_data.add_report_date(cems)

# remove data for any unit-months where there are incomplete data reported
# this is generally when there is a single observation reported for an entire month
cems = remove_incomplete_unit_months(cems)

# TODO: identify and remove any hourly values that appear to be outliers

# add a fuel type to each observation
cems = assign_fuel_type_to_cems(cems, year)

    Removing 45 plants that are not grid-connected
    Removing 0 plants located in the following states: ['PR']
    Removing 3 units that only produce steam and do not report to EIA
    Removing 275 unit-months with incomplete hourly data


In [225]:
# replace all "missing" CO2 values with zero
cems["nox_mass_lb"] = cems["nox_mass_lb"].fillna(0)

# replace 0 reported CO2 values with missing values, if there was reported heat input
cems.loc[
    (cems["nox_mass_lb"] == 0) & (cems["fuel_consumed_mmbtu"] > 0),
    "nox_mass_lb",
] = np.NaN

# create a new df with all observations with missing co2 data
missing_nox = cems[cems["nox_mass_lb"].isnull()]
missing_nox

Unnamed: 0,plant_id_eia,unitid,datetime_utc,operating_time_hours,gross_generation_mwh,steam_load_1000_lb,fuel_consumed_mmbtu,co2_mass_lb,nox_mass_lb,so2_mass_lb,plant_id_epa,co2_mass_measurement_code,nox_mass_measurement_code,so2_mass_measurement_code,report_date,energy_source_code
828,3,1,2020-02-04 18:00:00+00:00,0.25,0.0,0.0,0.97500,112.500000,,0.00050,3,Measured,Calculated,Measured,2020-02-01,NG
19841,3,4,2020-04-04 23:00:00+00:00,0.75,0.0,0.0,6.80625,1349.999878,,4.10625,3,Measured,Measured,Measured,2020-04-01,BIT
20748,3,4,2020-05-12 18:00:00+00:00,0.50,0.0,0.0,6.40000,1300.000000,,1.92500,3,Measured,Measured,Measured,2020-05-01,BIT
20749,3,4,2020-05-12 19:00:00+00:00,1.00,0.0,0.0,24.90000,5200.000000,,7.40000,3,Measured,Measured,Measured,2020-05-01,BIT
23432,3,4,2020-09-01 14:00:00+00:00,0.50,0.0,0.0,2.70000,550.000000,,1.60000,3,Measured,Measured,Measured,2020-09-01,BIT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35173531,57703,CT02,2020-11-28 13:00:00+00:00,0.03,0.0,0.0,0.03699,4.320000,,0.00003,57703,Measured,Calculated,Measured,2020-11-01,NG
35173650,57703,CT02,2020-12-03 12:00:00+00:00,0.05,0.0,0.0,0.14075,16.500002,,0.00010,57703,Measured,Calculated,Measured,2020-12-01,NG
35175978,57703,CT03,2020-03-09 12:00:00+00:00,0.02,0.0,0.0,0.00552,0.640000,,0.00000,57703,Measured,Calculated,Measured,2020-03-01,NG
35176360,57703,CT03,2020-03-25 10:00:00+00:00,0.03,0.0,0.0,0.02673,3.240000,,0.00003,57703,Measured,Calculated,Measured,2020-03-01,NG


In [226]:
# replace all "missing" CO2 values with zero
cems["so2_mass_lb"] = cems["so2_mass_lb"].fillna(0)

# replace 0 reported CO2 values with missing values, if there was reported heat input
cems.loc[
    (cems["so2_mass_lb"] == 0) & (cems["fuel_consumed_mmbtu"] > 0),
    "so2_mass_lb",
] = np.NaN

# create a new df with all observations with missing co2 data
missing_so2 = cems[cems["so2_mass_lb"].isnull()]
missing_so2

Unnamed: 0,plant_id_eia,unitid,datetime_utc,operating_time_hours,gross_generation_mwh,steam_load_1000_lb,fuel_consumed_mmbtu,co2_mass_lb,nox_mass_lb,so2_mass_lb,plant_id_epa,co2_mass_measurement_code,nox_mass_measurement_code,so2_mass_measurement_code,report_date,energy_source_code
9489,3,2,2020-01-30 15:00:00+00:00,0.25,0.00,0.0,0.03125,0.00,0.00125,,3,Measured,Calculated,Measured,2020-01-01,NG
91388,10,1,2020-05-28 02:00:00+00:00,0.25,0.25,0.0,0.02500,0.00,0.00025,,10,Measured,Calculated,Measured,2020-05-01,NG
93199,10,1,2020-08-11 13:00:00+00:00,1.00,75.00,0.0,0.60000,0.00,0.07200,,10,Measured,Calculated,Measured,2020-08-01,NG
228896,47,CCT1,2020-01-22 14:00:00+00:00,1.00,3.00,0.0,40.00000,4800.00,15.70000,,47,LME,LME,LME,2020-01-01,NG
229927,47,CCT1,2020-03-05 13:00:00+00:00,1.00,3.00,0.0,40.00000,4800.00,15.70000,,47,LME,LME,LME,2020-03-01,NG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35173467,57703,CT02,2020-11-25 21:00:00+00:00,0.02,0.00,0.0,0.01372,1.60,,,57703,Measured,Calculated,Measured,2020-11-01,NG
35174524,57703,CT03,2020-01-08 22:00:00+00:00,0.03,0.00,0.0,0.01683,1.98,0.00072,,57703,Measured,Calculated,Measured,2020-01-01,NG
35175978,57703,CT03,2020-03-09 12:00:00+00:00,0.02,0.00,0.0,0.00552,0.64,,,57703,Measured,Calculated,Measured,2020-03-01,NG
35176549,57703,CT03,2020-04-02 07:00:00+00:00,0.02,0.00,0.0,0.00616,0.72,0.00012,,57703,Measured,Calculated,Measured,2020-04-01,NG


In [230]:
epa_eia_crosswalk = load_data.load_epa_eia_crosswalk(year)
boiler_to_unit_crosswalk = epa_eia_crosswalk[["plant_id_eia","unitid","boiler_id"]].drop_duplicates()
boiler_to_unit_crosswalk

Unnamed: 0,plant_id_eia,unitid,boiler_id
0,3,1,1
1,3,2,2
2,3,3,3
3,3,4,4
4,3,5,5
...,...,...,...
31757,64876,,
31759,64877,,
31761,64878,,
31763,64879,,


In [237]:
unit_controlled_nox_rates = emissions.calculate_unit_specific_controlled_nox_rates(year)
unit_controlled_nox_rates

Unnamed: 0,plant_id_eia,unitid,controlled_annual_nox_ef_lb_per_mmbtu,controlled_ozone_season_nox_ef_lb_per_mmbtu,controlled_non_ozone_season_nox_ef_lb_per_mmbtu
0,3,1,0.052000,,
1,3,2,0.052000,,
2,3,4,0.250000,0.196000,0.288571
3,3,5,0.079000,0.073000,0.083286
4,3,6A,0.009000,0.009000,0.009000
...,...,...,...,...,...
1754,60903,2,0.010000,0.010000,0.010000
1755,60926,,0.007991,0.009477,0.006929
1756,60927,,0.014000,0.007521,0.018628
1757,61028,,0.007495,0.007495,0.007495


In [238]:
# merge the controlled nox rates into missing_nox
missing_nox = missing_nox.merge(
    unit_controlled_nox_rates,
    how="left",
    on=["plant_id_eia", "unitid"],
    validate="m:1",
)
# calculate the controlled nox emissions based on the month
missing_nox = missing_nox.assign(
    controlled_nox_mass_lb=lambda x: np.where(
        ((x.report_date.dt.month >= 5) & (x.report_date.dt.month <= 9)),
        x.fuel_consumed_mmbtu * x.controlled_ozone_season_nox_ef_lb_per_mmbtu,
        x.fuel_consumed_mmbtu * x.controlled_non_ozone_season_nox_ef_lb_per_mmbtu,
    )
)

In [240]:
# load emission factors
nox_emission_factors = load_data.load_nox_emission_factors()
# remove duplicate factors
nox_emission_factors = nox_emission_factors.drop_duplicates(
    subset=[
        "prime_mover_code",
        "energy_source_code",
        "boiler_bottom_type",
        "boiler_firing_type",
    ]
)



In [243]:
# load the boiler firing type and merge in prime mover
boiler_firing_type = emissions.load_boiler_firing_type(year).merge(
    pd.read_sql("boilers_entity_eia", pudl_out.pudl_engine),
    how="left",
    on=["plant_id_eia", "boiler_id"],
    validate="m:1",
)

In [None]:
epa_eia_crosswalk = load_data.load_epa_eia_crosswalk(year)
boiler_to_unit_crosswalk = epa_eia_crosswalk[["plant_id_eia","unitid","boiler_id"]].drop_duplicates()

In [247]:
# merge in all possible fuel-specific emission factors for the pm boiler design parameters
boiler_factors = boiler_firing_type.merge(nox_emission_factors, how="left", on=["prime_mover_code","boiler_bottom_type","boiler_firing_type"])

# associate a unit
boiler_factors = boiler_factors.merge(boiler_to_unit_crosswalk, how="left", on=["plant_id_eia","boiler_id"])

# aggregate by unit fuel
boiler_factors = boiler_factors.groupby(["plant_id_eia","unitid","energy_source_code","emission_factor_denominator"], dropna=False)["emission_factor"].mean().reset_index()

boiler_factors

Unnamed: 0,plant_id_eia,boiler_id,boiler_bottom_type,boiler_firing_type,prime_mover_code,energy_source_code,emission_factor,emission_factor_numerator,emission_factor_denominator,unitid
0,3,1,DRY,TANGENTIAL,ST,BIT,10.0000,lb,ton,1
1,3,1,DRY,TANGENTIAL,ST,BLQ,1.5000,lb,ton,1
2,3,1,DRY,TANGENTIAL,ST,DFO,1.0080,lb,barrels,1
3,3,1,DRY,TANGENTIAL,ST,LIG,7.1000,lb,ton,1
4,3,1,DRY,TANGENTIAL,ST,NG,0.1700,lb,mcf,1
...,...,...,...,...,...,...,...,...,...,...
21331,60927,1BHRSG,,DUCTBURNER,CA,WO,3.8724,lb,barrels,
21332,61028,HRSG,DRY,OTHER,CA,NG,0.3280,lb,mcf,
21333,61028,HRSG-2,DRY,OTHER,CA,NG,0.3280,lb,mcf,
21334,64020,HRSG3,,OTHER,,,,,,
