In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

In [None]:
emissions_controls_eia923_names = [
    "report_date",
    "plant_id_eia",
    "equipment_tech_description",
    "particulate_control_id_eia",
    "so2_control_id_eia",
    "nox_control_id_eia",
    "mercury_control_id_eia",
    "operational_status",
    "hours_in_service",  # not yet in pudl
    "annual_nox_emission_rate_lb_per_mmbtu",
    "ozone_season_nox_emission_rate_lb_per_mmbtu",  # not yet in pudl
    "particulate_emission_rate_lb_per_mmbtu",
    "particulate_removal_efficiency_annual",
    "particulate_removal_efficiency_at_full_load",
    "particulate_test_date",
    "so2_removal_efficiency_annual",
    "so2_removal_efficiency_at_full_load",
    "so2_test_date",
    "fgd_sorbent_consumption_1000_tons",
    "fgd_electricity_consumption_mwh",
    "mercury_removal_efficiency",
    "mercury_emission_rate_lb_per_trillion_btu",
    "acid_gas_removal_efficiency",
]

emissions_controls_eia923 = pd.read_excel(
    io=downloads_folder(
        f"eia923/f923_{year}/EIA923_Schedule_8_Annual_Environmental_Information_{year}_Final.xlsx"
    ),
    sheet_name="8C Air Emissions Control Info",
    header=4,
    names=emissions_controls_eia923_names,
    na_values=".",
)

In [None]:
cems

In [None]:
from pudl.metadata.fields import apply_pudl_dtypes

In [None]:
tables_we_load = [
    "out_eia923__monthly_generation_fuel_combined",
    "out_eia923__monthly_boiler_fuel",
    "out_eia923__monthly_generation",
    "out_eia__yearly_generators",
    "core_eia860__assn_boiler_generator",
    "core_eia860__scd_generators",
    "core_eia__codes_firing_types",
    "core_epa__assn_eia_epacamd",
    "out_eia__yearly_boilers",
    "core_eia__entity_plants",
    "out_eia__yearly_plants",
    "core_eia__entity_utilities",
    "core_eia860__scd_boilers",
    "core_eia860__assn_yearly_boiler_emissions_control_equipment",
]
dtype_dict = {}
for table in tables_we_load:
    try:
        t = load_data.load_pudl_table(table, year)
    except:
        t = load_data.load_pudl_table(table)
    dtype_dict.update(dict(t.dtypes))

In [None]:
cems = pd.read_parquet(
    downloads_folder("pudl/core_epacems__hourly_emissions.parquet"),
    filters=[["year", "==", year]],
)

In [None]:
cems.dtypes

In [None]:
cems = apply_dtypes(cems)

In [None]:
cems.dtypes

In [None]:
cems["operating_datetime_utc"] = cems["operating_datetime_utc"].dt.tz_localize("UTC")

In [None]:
cems["operating_datetime_utc"].dt.tz_localize(None).astype("datetime64[s]")

In [None]:
cems["operating_datetime_utc"]

In [None]:
cems["operating_datetime_utc"].dtype

In [None]:
sorted_keys = list(dtype_dict.keys())
sorted_keys.sort()
sorted_dtype_dict = {i: dtype_dict[i] for i in sorted_keys}

In [None]:
sorted_dtype_dict