In [1]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src/")

import oge.download_data as download_data
import oge.load_data as load_data
from oge.column_checks import get_dtypes, apply_dtypes, DATA_COLUMNS
from oge.filepaths import *
import oge.impute_hourly_profiles as impute_hourly_profiles
import oge.data_cleaning as data_cleaning
import oge.output_data as output_data
import oge.emissions as emissions
import oge.helpers as helpers
import oge.validation as validation
import oge.gross_to_net_generation as gross_to_net_generation
import oge.eia930 as eia930
from oge.logging_util import get_logger, configure_root_logger
import oge.constants as constants

configure_root_logger()
logger = get_logger("test")


year = 2023
path_prefix = f"{year}/"



In [3]:
gens = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year=2005,
    end_year=2023,
    columns=[
        "plant_id_eia",
        "generator_id",
        "report_date",
        "capacity_mw",
        "energy_source_code_1",
        "operational_status",
        "operational_status_code",
        "planned_generator_retirement_date",
    ],
)

In [4]:
gens[gens["plant_id_eia"] == 55303]

Unnamed: 0,plant_id_eia,generator_id,report_date,capacity_mw,energy_source_code_1,operational_status,operational_status_code,planned_generator_retirement_date
424153,55303,A004,2007-01-01,93.3,NG,existing,SB,NaT
424154,55303,A003,2007-01-01,93.3,NG,existing,SB,NaT
424155,55303,A002,2007-01-01,93.3,NG,existing,SB,NaT
424156,55303,A001,2007-01-01,93.3,NG,existing,SB,NaT
444570,55303,A004,2006-01-01,93.3,NG,existing,SB,NaT
444571,55303,A003,2006-01-01,93.3,NG,existing,SB,NaT
444572,55303,A002,2006-01-01,93.3,NG,existing,SB,NaT
444573,55303,A001,2006-01-01,93.3,NG,existing,SB,NaT
464473,55303,A004,2005-01-01,93.3,NG,existing,SB,NaT
464474,55303,A003,2005-01-01,93.3,NG,existing,SB,NaT


In [5]:
# load intermediate output data as needed
cems = pd.read_csv(
    outputs_folder(f"{year}/cems_subplant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_subplant = pd.read_csv(
    outputs_folder(f"{year}/partial_cems_subplant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_plant = pd.read_csv(
    outputs_folder(f"{year}/partial_cems_plant_{year}.csv.zip"),
    compression="zip",
    parse_dates=["datetime_utc", "report_date"],
)
eia923_allocated = pd.read_csv(
    outputs_folder(f"{year}/eia923_allocated_{year}.csv.zip"),
    compression="zip",
    parse_dates=["report_date"],
)
plant_attributes = pd.read_csv(
    outputs_folder(f"{year}/plant_static_attributes_{year}.csv.zip"), compression="zip"
)
primary_fuel_table = pd.read_csv(
    outputs_folder(f"{year}/primary_fuel_table_{year}.csv.zip"), compression="zip"
)
monthly_eia_data_to_shape = eia923_allocated[
    (eia923_allocated["hourly_data_source"] == "eia")
]