In [1]:
# import packages
import pandas as pd

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src")

from oge.column_checks import get_dtypes
from oge.filepaths import *
import oge.data_cleaning as data_cleaning

year = 2020

# Functions for loading intermediate outputs

In [None]:
# load data from csv
year = 2020
path_prefix = f"{year}/"

cems = pd.read_csv(
    outputs_folder(f"{path_prefix}/cems_subplant_{year}.csv"),
    dtype=get_dtypes(),
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_plant = pd.read_csv(
    outputs_folder(f"{path_prefix}/partial_cems_plant_{year}.csv"),
    dtype=get_dtypes(),
    parse_dates=["datetime_utc", "report_date"],
)
partial_cems_subplant = pd.read_csv(
    outputs_folder(f"{path_prefix}/partial_cems_subplant_{year}.csv"),
    dtype=get_dtypes(),
    parse_dates=["datetime_utc", "report_date"],
)
eia923_allocated = pd.read_csv(
    outputs_folder(f"{path_prefix}/eia923_allocated_{year}.csv"),
    dtype=get_dtypes(),
    parse_dates=["report_date"],
)
plant_attributes = pd.read_csv(
    outputs_folder(f"{path_prefix}/plant_static_attributes_{year}.csv"),
    dtype=get_dtypes(),
)

In [None]:
# load data from csv
year = 2020
path_prefix = f"{year}/"

eia923_allocated, primary_fuel_table = data_cleaning.clean_eia923(year, False)
cems = data_cleaning.clean_cems(year, False, primary_fuel_table)

In [None]:
data = cems[cems["plant_id_eia"] == 673]

data.groupby(["plant_id_eia", "unitid", "report_date"]).sum()

In [None]:
eia923_allocated[eia923_allocated["plant_id_eia"] == 50949].to_csv("test.csv")

## Explore Results

In [None]:
# load all power sector results and concat together

resolution = "annual"

all_data = []
for ba in os.listdir(results_folder(f"2021/power_sector_data/{resolution}/us_units")):
    df = pd.read_csv(
        results_folder(f"2021/power_sector_data/{resolution}/us_units/{ba}")
    )
    df["ba_code"] = ba.split(".")[0]
    all_data.append(df)

all_data = pd.concat(all_data, axis=0)

all_data = all_data.groupby("fuel_category", dropna=False).sum()

In [None]:
# calculations for exploring aggregated data
all_data["co2_mass_lb_for_electricity"] / all_data["net_generation_mwh"]
all_data["nox_mass_lb_for_electricity"] / all_data["net_generation_mwh"]
all_data["so2_mass_lb_for_electricity"] / all_data["net_generation_mwh"]

for pol in ["co2", "nox", "so2"]:
    for fuel in ["biomass", "geothermal", "waste", "other"]:
        calc = (
            all_data.loc[fuel, f"{pol}_mass_lb_for_electricity"]
            / all_data.loc["total", f"{pol}_mass_lb_for_electricity"]
        )
        print(f"{pol} {fuel}: {calc}")

## Explore the difference between adjusted and unadjusted (for biomass) factors

In [None]:
all_data["pctdiff"] = (
    all_data.generated_co2_rate_lb_per_mwh_for_electricity_adjusted
    - all_data.generated_co2_rate_lb_per_mwh_for_electricity
) / all_data.generated_co2_rate_lb_per_mwh_for_electricity


all_data.loc[
    all_data["fuel_category"] == "total",
    [
        "ba_code",
        "pctdiff",
        "generated_co2_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_co2_rate_lb_per_mwh_for_electricity",
    ],
].sort_values(by="pctdiff").head(25)