# Screen Anomalies in CEMS Data 

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from oge.load_data import load_cems_data
from oge.helpers import create_plant_ba_table
from oge.anomaly_screening import AnomalyScreeningFirstStep

## Load CEMS Data

In [None]:
year = 2022
cems = load_cems_data(year)

## Map Plant to BA

In [None]:
plant2ba = create_plant_ba_table(year).set_index("plant_id_eia")["ba_code"].to_dict()


def get_ba(plant):
    try:
        return plant2ba[plant]
    except KeyError:
        return np.NaN

## Flag Extreme Values

In [None]:
def screening(field, global_cut_multiplier=10):
    screening = []
    for i, ts in enumerate(cems.groupby(["plant_id_eia", "emissions_unit_id_epa"])):
        if i % 500 == 0:
            print(i)
        AS = AnomalyScreeningFirstStep(
            ts[1][["plant_id_eia", "emissions_unit_id_epa", field]],
            field,
            global_cut_multiplier=global_cut_multiplier,
        )
        AS.flag_negative_values()
        AS.flag_zero_values()
        if (
            len(
                AS.get_filtered_df().query(
                    "category != 'MISSING' and category != 'ZERO'"
                )
            )
            > 0
        ):
            AS.flag_global_extreme_values()
            df = AS.get_filtered_df()
            if len(df.query("category == 'GLOBAL_EXTREME'")) > 0:
                global_extreme_id = df.query("category == 'GLOBAL_EXTREME'")["index"]
                median = cems.loc[df["index"]].query(f"{field} > 0")[field].median()
                screening.append(
                    pd.concat(
                        [
                            pd.DataFrame(
                                df.groupby("category").size().to_dict(),
                                index=pd.MultiIndex.from_tuples([ts[0]]),
                            ),
                            pd.DataFrame(
                                {
                                    "MEAN_DEVIATION_OF_GLOBAL_EXTREME_FROM_MEDIAN": [
                                        (cems.loc[global_extreme_id][field] / median)
                                        .mean()
                                        .round(1)
                                    ]
                                },
                                index=pd.MultiIndex.from_tuples([ts[0]]),
                            ),
                        ],
                        axis=1,
                    )
                )

    global_extreme = (
        pd.concat(screening)
        .fillna(0)
        .astype(
            {
                "MISSING": "int",
                "OKAY": "int",
                "ZERO": "int",
                "GLOBAL_EXTREME": "int",
                "MEAN_DEVIATION_OF_GLOBAL_EXTREME_FROM_MEDIAN": "float",
            }
        )
    )
    global_extreme.index.set_names(
        ["plant_id_eia", "emissions_unit_id_epa"], inplace=True
    )
    return global_extreme

### Generation

In [None]:
generation_screening = screening("gross_generation_mwh")
generation_screening.assign(
    BA=[get_ba(i) for i in generation_screening.index.get_level_values(0)]
)

In [None]:
check_generation = cems.query(
    "plant_id_eia == 6824 and emissions_unit_id_epa == '2'"
).set_index("datetime_utc")["gross_generation_mwh"]
ax = check_generation.plot()
ax.set_xlabel("")
ax.set_ylabel("Gross Generation (MWh)")
plt.show()

### Fuel Consumed

In [None]:
fuel_consumed_screening = screening("fuel_consumed_mmbtu")
fuel_consumed_screening.assign(
    BA=[get_ba(i) for i in fuel_consumed_screening.index.get_level_values(0)]
)

In [None]:
check_fuel_consumed = cems.query(
    "plant_id_eia == 50732 and emissions_unit_id_epa == 'ETBLR2'"
).set_index("datetime_utc")["fuel_consumed_mmbtu"]
ax = check_fuel_consumed.plot()
ax.set_xlabel("")
ax.set_ylabel("Fuel Consumed (MMBtu)")
plt.show()

### CO2 Emission

In [None]:
co2_emission_screening = screening("co2_mass_lb")
co2_emission_screening.assign(
    BA=[get_ba(i) for i in co2_emission_screening.index.get_level_values(0)]
)

In [None]:
check_co2_emission = cems.query(
    "plant_id_eia == 55419 and emissions_unit_id_epa == '700'"
).set_index("datetime_utc")["co2_mass_lb"]
ax = check_co2_emission.plot(ylim=(1000, 500000))
ax.set_xlabel("")
ax.set_ylabel("CO2 Emission (LB)")
plt.show()

### NOx Emission

In [None]:
nox_emission_screening = screening("nox_mass_lb", global_cut_multiplier=10)
nox_emission_screening.assign(
    BA=[get_ba(i) for i in nox_emission_screening.index.get_level_values(0)]
)

In [None]:
check_nox_emission = cems.query(
    "plant_id_eia == 880067 and emissions_unit_id_epa == 'BLR19'"
).set_index("datetime_utc")["nox_mass_lb"]
ax = check_nox_emission.plot()
ax.set_xlabel("")
ax.set_ylabel("NOx Emission (LB)")
plt.show()

### SO2 Emission

In [None]:
so2_emission_screening = screening("so2_mass_lb", global_cut_multiplier=10)
so2_emission_screening.assign(
    BA=[get_ba(i) for i in so2_emission_screening.index.get_level_values(0)]
)

In [None]:
check_so2_emission = cems.query(
    "plant_id_eia == 60340 and emissions_unit_id_epa == 'B0004'"
).set_index("datetime_utc")["so2_mass_lb"]
ax = check_so2_emission.plot()
ax.set_xlabel("")
ax.set_ylabel("SO2 Emission (LB)")
plt.show()