In [1]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger

configure_root_logger()
logger = get_logger("test")


year = 2022
path_prefix = f"{year}/"

In [3]:
datetime_cols = [
    "boiler_operating_date",
    "boiler_retirement_date",
    "current_planned_generator_operating_date",
    "datetime_utc",
    "datetime_local",
    "emission_control_operating_date",
    "emission_control_retirement_date",
    "generator_operating_date",
    "generator_retirement_date",
    "new_source_review_date",
    "operating_datetime_utc",
    "original_planned_generator_operating_date",
    "other_modifications_date",
    "planned_derate_date",
    "planned_generator_retirement_date",
    "planned_repower_date",
    "planned_uprate_date",
    "plant_operating_date",
    "plant_retirement_date",
    "report_date",
    "uprate_derate_completed_date",
]

for col in datetime_cols:
    if "_utc" in col:
        print(col)

datetime_utc
operating_datetime_utc


In [2]:
cems = pd.read_parquet(
    downloads_folder("pudl/core_epacems__hourly_emissions.parquet"),
    filters=[["year", "==", year]],
)

In [4]:
cems["operating_datetime_utc"]

0          2022-10-01 05:00:00
1          2022-10-01 06:00:00
2          2022-10-01 07:00:00
3          2022-10-01 08:00:00
4          2022-10-01 09:00:00
                   ...        
35166787   2022-10-01 00:00:00
35166788   2022-10-01 01:00:00
35166789   2022-10-01 02:00:00
35166790   2022-10-01 03:00:00
35166791   2022-10-01 04:00:00
Name: operating_datetime_utc, Length: 35166792, dtype: datetime64[ms]

In [7]:
cems["operating_datetime_utc"].dt.tz_localize(None).astype(
    "datetime64[s]"
).dt.tz_localize("UTC")

0          2022-10-01 05:00:00+00:00
1          2022-10-01 06:00:00+00:00
2          2022-10-01 07:00:00+00:00
3          2022-10-01 08:00:00+00:00
4          2022-10-01 09:00:00+00:00
                      ...           
35166787   2022-10-01 00:00:00+00:00
35166788   2022-10-01 01:00:00+00:00
35166789   2022-10-01 02:00:00+00:00
35166790   2022-10-01 03:00:00+00:00
35166791   2022-10-01 04:00:00+00:00
Name: operating_datetime_utc, Length: 35166792, dtype: datetime64[s, UTC]

In [19]:
cols = [
    "plant_id_eia",
    "generator_id",
    "plant_id_epa",
    "emissions_unit_id_epa",
    "unit_id_pudl",
    "subplant_id",
]

sort_order = [
    "plant_id_eia",
    "subplant_id",
    "unit_id_pudl",
    "generator_id",
    "emissions_unit_id_epa",
]

scp = load_data.load_pudl_table("core_epa__assn_eia_epacamd_subplant_ids")[
    cols
].sort_values(by=sort_order)
sco = pd.read_csv(
    outputs_folder("2022/subplant_crosswalk_2022.csv.zip"), compression="zip"
)[cols].sort_values(by=sort_order)

In [24]:
sco[sco["plant_id_eia"] == 54]

Unnamed: 0,plant_id_eia,generator_id,plant_id_epa,emissions_unit_id_epa,unit_id_pudl,subplant_id
137,54,GT2,54.0,SCT2,,1
138,54,GT1,54.0,SCT1,,2
139,54,GT3,54.0,SCT3,,3
140,54,GT4,54.0,SCT4,,4
141,54,GT5,54.0,SCT5,,5
142,54,GT6,54.0,SCT6,,6
143,54,GT7,54.0,SCT7,,7
144,54,GT10,54.0,SCT10,,8
145,54,GT9,54.0,SCT9,,9


In [25]:
scp[scp["plant_id_eia"] == 54]

Unnamed: 0,plant_id_eia,generator_id,plant_id_epa,emissions_unit_id_epa,unit_id_pudl,subplant_id
145,54,1,54,1,,0
146,54,2,54,2,,1
147,54,3,54,3,,2
148,54,4,54,4,,3
149,54,5,54,5,,4
150,54,6,54,6,,5
151,54,7,54,7,,6
152,54,GT1,54,SCT1,,7
153,54,GT10,54,SCT10,,8
154,54,GT11,54,GT11,,9


In [5]:
scp[scp["plant_id_eia"] == 57788]

Unnamed: 0,plant_id_eia,plant_id_epa,subplant_id,unit_id_pudl,emissions_unit_id_epa,generator_id
27317,57788,57788,0,,TG1,TG1
27318,57788,57788,1,,TG2,TG2
