In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys
sys.path.append('../../../open-grid-emissions/src/')

import load_data
from column_checks import get_dtypes
from filepaths import *
import output_data
import emissions
import validation


In [None]:
oge_version_number = "0.2.0"

## Define Functions

In [None]:
def calculate_emissions_for_ba_year_fuel(
    region, ba_year_data, fuel_category_table, year
):
    # assign a fuel category to each plant
    ba_year_data = ba_year_data.merge(
        fuel_category_table[["plant_id_eia", "fuel_category"]],
        how="left",
        on="plant_id_eia",
        validate="m:1",
    )

    # check that there are no missing fuel categories
    if len(ba_year_data[ba_year_data["fuel_category"].isna()]) > 0:
        print(f"Warning: there are missing fuel categories in {region}")

    # replace negative generation values with zero
    ba_year_data.loc[ba_year_data["net_generation_mwh"] < 0, "net_generation_mwh"] = 0

    # aggregate by fuel category
    ba_year_data = (
        ba_year_data.groupby(["fuel_category", "datetime_utc"], dropna=False)
        .sum()
        .reset_index()
    )

    # create columns for adjusted emissions for all pollutants
    # biomass adjustment does not affect these pollutants, only co2
    for pol in ["ch4", "n2o", "nox", "so2"]:
        ba_year_data[f"{pol}_mass_lb_for_electricity_adjusted"] = ba_year_data[
            f"{pol}_mass_lb_for_electricity"
        ]

    # calculate co2eq emissions
    ba_year_data = emissions.calculate_co2e_mass(
        ba_year_data,
        year,
        gwp_horizon=100,
        ar5_climate_carbon_feedback=True,
    )

    return ba_year_data


def calculate_month_hour_emission_factors(
    region, ba, ba_data, local_tz_to_use, datetime_to_use, ba_reference
):
    # groupby to make sure we don't have duplicate timestamps
    ba_data = (
        ba_data.groupby(["fuel_category", "datetime_utc"], dropna=False)
        .sum()
        .reset_index()
    )

    # convert the datetime column to a datetime dtype
    ba_data["datetime_utc"] = pd.to_datetime(ba_data["datetime_utc"])

    # load the local timezone to which each ba reports data to EIA-930
    ba_local_tz = ba_reference.loc[
        ba_reference["ba_code"] == ba, local_tz_to_use
    ].values[0]

    # create a local datetime column
    ba_data["datetime_local"] = ba_data["datetime_utc"].dt.tz_convert(ba_local_tz)

    # create columns for month and hour
    ba_data["year"] = ba_data[datetime_to_use].dt.year
    ba_data["month"] = ba_data[datetime_to_use].dt.month
    ba_data["hour"] = ba_data[datetime_to_use].dt.hour

    # groupby month-hour
    ba_data = (
        ba_data.groupby(["fuel_category", "year", "month", "hour"]).sum().reset_index()
    )

    # calculate emission factors
    for pol in ["co2", "co2e", "nox", "so2"]:
        for pol_type in ["for_electricity", "for_electricity_adjusted"]:
            ba_data[f"generated_{pol}_rate_lb_per_mwh_{pol_type}"] = (
                ba_data[f"{pol}_mass_lb_{pol_type}"] / ba_data["net_generation_mwh"]
            )

    # add a column for ba code
    ba_data["ba_code"] = region

    # only keep relevant columns
    key_columns = [
        "ba_code",
        "fuel_category",
        "year",
        "month",
        "hour",
    ]

    factor_columns = [
        "generated_co2_rate_lb_per_mwh_for_electricity",
        "generated_co2e_rate_lb_per_mwh_for_electricity",
        "generated_nox_rate_lb_per_mwh_for_electricity",
        "generated_so2_rate_lb_per_mwh_for_electricity",
        "generated_co2_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_co2e_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_nox_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_so2_rate_lb_per_mwh_for_electricity_adjusted",
    ]

    ba_data = ba_data[key_columns + factor_columns]

    # set all emissions for zero carbon fuels to zero
    zero_carbon_fuels = [
        "solar",
        "wind",
        "hydro",
        "nuclear",
        "variable_renewables",
        "batteries",
        "power_storage",
        "storage",
    ]
    ba_data.loc[ba_data["fuel_category"].isin(zero_carbon_fuels), factor_columns] = 0

    return ba_data


def load_plant_fuel_category_for_iso(region, year):
    """Loads the ISO-specific fuel category for each plant in a single BA."""
    if region == "ERCO-H":
        ba = "ERCO"
    else:
        ba = region

    # load plant attribute data for the ba
    try:
        plant_attributes = pd.read_csv(
            outputs_folder(f"{year}/plant_static_attributes_{year}.csv"),
            dtype=get_dtypes(),
        )[["plant_id_eia", "plant_primary_fuel", "ba_code"]]
    except FileNotFoundError:
        plant_attributes = pd.read_csv(
            outputs_folder(f"{year+1}/plant_static_attributes_{year+1}.csv"),
            dtype=get_dtypes(),
        )[["plant_id_eia", "plant_primary_fuel", "ba_code"]]
    plant_attributes = plant_attributes[plant_attributes["ba_code"] == ba]

    # merge special flags into plant attributes
    special_categories = load_special_category_flags(year)
    plant_attributes = plant_attributes.merge(
        special_categories, how="left", on="plant_id_eia", validate="1:1"
    )

    # merge fuel category into plant attributes
    energy_source_groups = pd.read_csv(
        manual_folder("energy_source_groups.csv"), dtype=get_dtypes()
    )[["energy_source_code", f"fuel_category_{region}"]]
    plant_attributes = plant_attributes.merge(
        energy_source_groups,
        how="left",
        left_on="plant_primary_fuel",
        right_on="energy_source_code",
        validate="m:1",
    ).drop(columns="energy_source_code")

    # update special fuel categories
    if region == "PJM":
        # identify multi-fuel plants in PJM
        plant_attributes.loc[
            (plant_attributes["multiple_fuels"] == 1), "fuel_category_PJM"
        ] = "multi_fuel"
    if region == "NYIS":
        # identify multi-fuel plants
        plant_attributes.loc[
            (plant_attributes["multiple_fuels"] == 1), "fuel_category_NYIS"
        ] = "dual_fuel"
    if region == "ERCO-H":
        # identify combined cycle gas plants
        plant_attributes.loc[
            (plant_attributes["combined_cycle"] == 1)
            & (plant_attributes["fuel_category_ERCO-H"] == "gas"),
            "fuel_category_ERCO-H",
        ] = "gas_cc"

    # rename the fuel category column
    plant_attributes = plant_attributes.rename(
        columns={f"fuel_category_{region}": "fuel_category"}
    )

    return plant_attributes[["plant_id_eia", "fuel_category"]]


def load_special_category_flags(year):
    """Loads a dataframe identifying whether any plants are multi-fuel or combined-cycle plants"""
    # identify multi fuel and combined cycle gas plants
    gen_types = load_data.load_pudl_table("generators_eia860", year=year)[
        ["plant_id_eia", "generator_id", "multiple_fuels", "prime_mover_code"]
    ]

    # create a column identifying where a generator is part of a combined cycle plant
    cc_codes = ["CA", "CC", "CS", "CT"]
    gen_types["combined_cycle"] = 0
    gen_types.loc[gen_types["prime_mover_code"].isin(cc_codes), "combined_cycle"] = 1

    # if there is a missing multiple fuel flag, assume that the generator does not burn multiple fuels
    gen_types["multiple_fuels"] = gen_types["multiple_fuels"].fillna(0).astype(int)

    # sum the boolean flags
    gen_types = (
        gen_types.groupby(["plant_id_eia"], dropna=False)[
            ["multiple_fuels", "combined_cycle"]
        ]
        .sum()
        .reset_index()
    )

    # replace the summed values with 1 if greater than zero
    gen_types.loc[gen_types["multiple_fuels"] > 0, "multiple_fuels"] = 1
    gen_types.loc[gen_types["combined_cycle"] > 0, "combined_cycle"] = 1

    return gen_types


def load_egrid_plant_data_for_year(year, ipcc_version, gwp_horizon):
    egrid_plant = validation.load_egrid_plant_file(year)

    # if there is a missing value for electric allocation factor, fill with 100%
    egrid_plant["chp_electric_allocation_factor"] = egrid_plant[
        "chp_electric_allocation_factor"
    ].fillna(1)

    # calculate _for_electricity values
    for pol in ["co2", "ch4", "n2o", "nox", "so2"]:
        egrid_plant[f"{pol}_mass_lb_for_electricity"] = (
            egrid_plant[f"{pol}_mass_lb"]
            * egrid_plant["chp_electric_allocation_factor"]
        )

    # egrid 2018-2020 uses the AR4 GWP
    # code adapted from `emissions.calculate_co2e_mass()`
    df_gwp = load_data.load_ipcc_gwp()
    gwp_to_use = df_gwp[df_gwp.ipcc_version == ipcc_version]
    ch4_gwp = gwp_to_use.loc[
        (gwp_to_use.gwp_horizon == gwp_horizon) & (gwp_to_use.gas == "ch4"),
        "gwp",
    ].item()
    n2o_gwp = gwp_to_use.loc[
        (gwp_to_use.gwp_horizon == gwp_horizon) & (gwp_to_use.gas == "n2o"),
        "gwp",
    ].item()

    egrid_plant["co2e_mass_lb_for_electricity"] = (
        egrid_plant["co2_mass_lb_for_electricity"]
        + (ch4_gwp * egrid_plant["ch4_mass_lb_for_electricity"].fillna(0))
        + (n2o_gwp * egrid_plant["n2o_mass_lb_for_electricity"].fillna(0))
    )

    return egrid_plant


def calculate_egrid_emission_factors(egrid_plant, year):

    data_columns = [
        "net_generation_mwh",
        "co2_mass_lb_for_electricity",
        "ch4_mass_lb_for_electricity",
        "n2o_mass_lb_for_electricity",
        "co2e_mass_lb_for_electricity",
        "nox_mass_lb_for_electricity",
        "so2_mass_lb_for_electricity",
        "co2_mass_lb_for_electricity_adjusted",
        "co2e_mass_lb_for_electricity_adjusted",
        "nox_mass_lb_for_electricity_adjusted",
        "so2_mass_lb_for_electricity_adjusted",
    ]

    # groupby and do same calculations as below
    # explicitly drop na ba and fuel values
    egrid_plant = (
        egrid_plant.groupby(["ba_code", "fuel_category"], dropna=True)[data_columns]
        .sum()
        .reset_index()
    )

    # if there are any negative generation values, replace with zero
    egrid_plant.loc[egrid_plant["net_generation_mwh"] < 0, "net_generation_mwh"] = 0

    # calculate emission factors
    for pol in ["co2", "co2e", "nox", "so2"]:
        for pol_type in ["for_electricity", "for_electricity_adjusted"]:
            egrid_plant[f"generated_{pol}_rate_lb_per_mwh_{pol_type}"] = (
                egrid_plant[f"{pol}_mass_lb_{pol_type}"]
                / egrid_plant["net_generation_mwh"]
            )

    factor_columns = [
        "generated_co2_rate_lb_per_mwh_for_electricity",
        "generated_co2e_rate_lb_per_mwh_for_electricity",
        "generated_nox_rate_lb_per_mwh_for_electricity",
        "generated_so2_rate_lb_per_mwh_for_electricity",
        "generated_co2_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_co2e_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_nox_rate_lb_per_mwh_for_electricity_adjusted",
        "generated_so2_rate_lb_per_mwh_for_electricity_adjusted",
    ]

    if year == 2018:
        egrid_plant["emission_factor_source"] = f"eGRID{year}v2"
    else:
        egrid_plant["emission_factor_source"] = f"eGRID{year}"

    egrid_plant = egrid_plant[
        [
            "emission_factor_source",
            "ba_code",
            "fuel_category",
        ]
        + factor_columns
    ]

    # set all emissions for zero carbon fuels to zero
    zero_carbon_fuels = [
        "solar",
        "wind",
        "hydro",
        "nuclear",
        "variable_renewables",
        "batteries",
        "power_storage",
        "storage",
    ]
    egrid_plant.loc[
        egrid_plant["fuel_category"].isin(zero_carbon_fuels), factor_columns
    ] = 0

    # replace inf values with na
    egrid_plant = egrid_plant.replace(np.inf, np.NaN)

    return egrid_plant


def fill_missing_egrid_region_fuels(fuel_categories, egrid_factors, year):
    # make sure that all fuel-regions are represented
    # create a dataframe with all expected region-fuels
    expected_fuel_categories = list(fuel_categories.fuel_category.unique())

    if year == 2018:
        ef_source = f"eGRID{year}v2"
    else:
        ef_source = f"eGRID{year}"

    # identify a list of all BAs that exist in that year
    all_egrid_bas = list(egrid_factors["ba_code"].unique())

    complete_categories = []
    for region in all_egrid_bas:
        for fuel in expected_fuel_categories:
            row_dict = pd.DataFrame(
                {
                    "emission_factor_source": [ef_source],
                    "ba_code": [region],
                    "fuel_category": [fuel],
                }
            )
            complete_categories.append(row_dict)

    complete_categories = pd.concat(complete_categories, axis=0)

    egrid_factors = egrid_factors.merge(
        complete_categories,
        how="outer",
        on=["emission_factor_source", "ba_code", "fuel_category"],
    )

    return egrid_factors


def add_national_average_egrid_values(egrid_factors):
    # calculate national average values for each year
    national_factors = (
        egrid_factors.dropna()
        .groupby(["emission_factor_source", "fuel_category"], dropna=False)
        .mean()
        .reset_index()
    )
    national_factors["ba_code"] = "national"

    # add the national factors
    egrid_factors = pd.concat([egrid_factors, national_factors], axis=0)

    return egrid_factors


def add_national_average_oge_values(oge_factors):
    # calculate national average values for each year
    national_factors = (
        oge_factors.dropna()
        .groupby(
            ["emission_factor_source", "fuel_category", "year", "month", "hour"],
            dropna=False,
        )
        .mean()
        .reset_index()
    )
    national_factors["ba_code"] = "national"

    # add the national factors
    oge_factors = pd.concat([oge_factors, national_factors], axis=0)

    return oge_factors


def format_df_for_output(df, fuel_mix_source, id_columns):
    df["fuel_mix_source"] = fuel_mix_source
    # change "ba_code" to region
    df = df.rename(columns={"ba_code": "region"})
    # change to long format
    df = df.melt(
        id_vars=id_columns,
        var_name="column_name",
        value_name="emission_factor_lb_per_mwh",
    )

    df["pollutant"] = df["column_name"].str.split("_", expand=True)[1]
    df = df.assign(
        emission_factor_adjustment=lambda x: np.where(
            x.column_name.str.contains("_adjusted"),
            "for_electricity_adjusted",
            "for_electricity",
        )
    )

    # re order columns
    df = df[
        id_columns
        + ["pollutant", "emission_factor_adjustment", "emission_factor_lb_per_mwh"]
    ]

    return df


# Calculate eGRID Emission Factors

### Calculate factors for EIA regions

In [None]:
egrid_years = [2018, 2019, 2020]

# load energy source group mapping and merge with egrid
fuel_categories = pd.read_csv(
    manual_folder("energy_source_groups.csv"), dtype=get_dtypes()
)[["energy_source_code", "fuel_category_eia930"]]
fuel_categories = fuel_categories.rename(
    columns={"fuel_category_eia930": "fuel_category"}
)

# load the egrid data and calculate all relevant emissions
egrid_factors = []
for year in egrid_years:
    # load the egrid data
    egrid_plant = load_egrid_plant_data_for_year(
        year, ipcc_version="AR4", gwp_horizon=100
    )

    # merge fuel categories into egrid data
    egrid_plant = egrid_plant.merge(
        fuel_categories,
        how="left",
        left_on="plant_primary_fuel",
        right_on="energy_source_code",
        validate="m:1",
    )

    # aggregate the data to regions and calculate emission factors
    egrid_plant = calculate_egrid_emission_factors(egrid_plant, year)

    egrid_plant = fill_missing_egrid_region_fuels(fuel_categories, egrid_plant, year)

    egrid_plant["year"] = year

    egrid_factors.append(egrid_plant)

# concat all of the years together
egrid_factors = pd.concat(egrid_factors, axis=0)

egrid_factors = add_national_average_egrid_values(egrid_factors)

egrid_factors = format_df_for_output(egrid_factors, fuel_mix_source="EIA", id_columns=["fuel_mix_source", "emission_factor_source", "region", "fuel_category", "year"])

egrid_factors


In [None]:
egrid_factors.to_csv(
    outputs_folder("egrid_emission_factors_for_eia_regions.csv"), index=False
)

In [None]:
# graph the data to spot check
px.box(egrid_factors, x="fuel_category", y="generated_co2_rate_lb_per_mwh_for_electricity", color="emission_factor_source", hover_data=["ba_code"])

### Calculate eGRID Factors for ISO regions

In [None]:
iso_list = ["BPAT", "CISO", "ERCO", "ERCO-H", "ISNE", "MISO", "NYIS", "PJM", "SWPP"]

# create a map of eia plant ids to egrid plant ids
egrid_crosswalk = pd.read_csv(
    manual_folder("eGRID2020_crosswalk_of_EIA_ID_to_EPA_ID.csv")
)
eia_to_egrid_id = dict(
    zip(
        list(egrid_crosswalk["plant_id_eia"]),
        list(egrid_crosswalk["plant_id_egrid"]),
    )
)

iso_egrid_factors = []
for region in iso_list:
    print(region)
    if region == "ERCO-H":
        ba = "ERCO"
    else:
        ba = region
    # load and concat multiple years of data
    ba_year_data = []
    for year in egrid_years:

        # load iso-specific fuel categories and map ids
        fuel_category_table = load_plant_fuel_category_for_iso(region, year)
        fuel_category_table["plant_id_egrid"] = fuel_category_table["plant_id_eia"]
        fuel_category_table["plant_id_egrid"].update(
            fuel_category_table["plant_id_egrid"].map(eia_to_egrid_id)
        )
        fuel_category_table = fuel_category_table.drop_duplicates(
            subset=["plant_id_egrid"]
        )

        # load the egrid data
        egrid_plant = load_egrid_plant_data_for_year(
            year, ipcc_version="AR4", gwp_horizon=100
        )
        # filter to data for the ba
        egrid_plant = egrid_plant[egrid_plant["ba_code"] == ba]

        # merge fuel categories into egrid data
        egrid_plant = egrid_plant.merge(
            fuel_category_table,
            how="left",
            on="plant_id_egrid",
            validate="1:1",
        )

        # aggregate the data to regions and calculate emission factors
        egrid_plant = calculate_egrid_emission_factors(egrid_plant, year)

        egrid_plant = fill_missing_egrid_region_fuels(
            fuel_category_table, egrid_plant, year
        )

        # add a year column
        egrid_plant["year"] = year

        # replace the ba code with the region name
        egrid_plant["ba_code"] = egrid_plant["ba_code"].replace(ba, region)

        ba_year_data.append(egrid_plant)

    # concat all of the years together
    ba_year_data = pd.concat(ba_year_data, axis=0)

    # ba_year_data = add_national_average_egrid_values(ba_year_data)

    # append the data for each region-year to the larger dataframe
    iso_egrid_factors.append(ba_year_data)


iso_egrid_factors = pd.concat(iso_egrid_factors, axis=0)

iso_egrid_factors = format_df_for_output(
    iso_egrid_factors,
    fuel_mix_source="ISO",
    id_columns=[
        "fuel_mix_source",
        "emission_factor_source",
        "region",
        "fuel_category",
        "year",
    ],
)

iso_egrid_factors


In [None]:
egrid_factors_combined = pd.concat([egrid_factors, iso_egrid_factors], axis=0)

egrid_factors_combined["year"] = egrid_factors_combined["year"].astype(int)
egrid_factors_combined["emission_factor_lb_per_mwh"] = egrid_factors_combined["emission_factor_lb_per_mwh"].round(3)
egrid_factors_combined = egrid_factors_combined.sort_values(by=["fuel_mix_source","year","region","fuel_category","pollutant", "emission_factor_adjustment","emission_factor_source"])

egrid_factors_combined

In [None]:
egrid_factors_combined.to_csv(
    outputs_folder("egrid_emission_factors.csv"), index=False
)

# Calculate Time-varying emission factors from OGE

### Create Emission factors for EIA-930 regions

In [None]:
years_to_load = [2019, 2020, 2021]

# get a list of all balancing areas for which there is data in EIA-930
ba_reference = load_data.load_ba_reference()
bas_in_eia930 = ba_reference[
    (~ba_reference["timezone_reporting_eia930"].isna())
    & (ba_reference["us_ba"] == "Yes")
    & (~(ba_reference["ba_category"] == "miscellaneous"))
    & (~(ba_reference["retirement_date"].dt.year < min(years_to_load)))
]
ba_list = list(bas_in_eia930.ba_code.unique())


local_tz_to_use = "timezone_local"
datetime_to_use = "datetime_utc"

oge_factors_eia = []
for ba in ba_list:
    print(ba)
    # load and concat multiple years of data
    ba_data = []
    for year in years_to_load:
        fuel_category_table = pd.read_csv(
            outputs_folder(f"{year}/plant_static_attributes_{year}.csv"),
            dtype=get_dtypes(),
        )[["plant_id_eia", "fuel_category_eia930"]].rename(
            columns={"fuel_category_eia930": "fuel_category"}
        )

        # start by loading data for a single BA
        try:
            ba_year_data = pd.read_csv(
                results_folder(f"{year}/plant_data/hourly/us_units/{ba}.csv")
            )

            # make sure the data frame is not empty
            if len(ba_year_data) > 0:
                ba_data.append(
                    calculate_emissions_for_ba_year_fuel(
                        ba, ba_year_data, fuel_category_table, year
                    )
                )
        except FileNotFoundError:
            pass

    try:
        # concat data for all years together
        ba_data = pd.concat(ba_data, axis=0)

        region = ba

        ba_data = calculate_month_hour_emission_factors(
            region, ba, ba_data, local_tz_to_use, datetime_to_use, ba_reference
        )

        oge_factors_eia.append(ba_data)
    # let us know if there was not any data to concatenate
    except ValueError:
        print(f"No data available for {ba}")
        pass

oge_factors_eia = pd.concat(oge_factors_eia, axis=0)

# only keep data that are in the data years we want
oge_factors_eia = oge_factors_eia[oge_factors_eia["year"].isin(years_to_load)]

oge_factors_eia["emission_factor_source"] = f"OGE v{oge_version_number}"

oge_factors_eia = add_national_average_oge_values(oge_factors_eia)

oge_factors_eia = format_df_for_output(
    oge_factors_eia,
    fuel_mix_source="EIA",
    id_columns=["fuel_mix_source", "emission_factor_source", "region", "fuel_category", "year", "month", "hour"],
)


### Create Emission Factors for ISO Regions

In [None]:
years_to_load = [2019, 2020, 2021]
iso_list = ["BPAT", "CISO", "ERCO", "ERCO-H", "ISNE", "MISO", "NYIS", "PJM", "SWPP"]

ba_reference = load_data.load_ba_reference()

local_tz_to_use = "timezone_local"
datetime_to_use = "datetime_utc"

oge_factors_iso = []
for region in iso_list:
    print(region)

    if region == "ERCO-H":
        ba = "ERCO"
    else:
        ba = region

    # load and concat multiple years of data
    ba_data = []
    for year in years_to_load:

        fuel_category_table = load_plant_fuel_category_for_iso(region, year)

        # start by loading data for a single BA
        try:
            ba_year_data = pd.read_csv(
                results_folder(f"{year}/plant_data/hourly/us_units/{ba}.csv")
            )
            # make sure the data frame is not empty
            if len(ba_year_data) > 0:
                ba_data.append(
                    calculate_emissions_for_ba_year_fuel(
                        region, ba_year_data, fuel_category_table, year
                    )
                )
        except FileNotFoundError:
            pass

    try:
        # concat data for all years together
        ba_data = pd.concat(ba_data, axis=0)

        ba_data = calculate_month_hour_emission_factors(
            region, ba, ba_data, local_tz_to_use, datetime_to_use, ba_reference
        )

        oge_factors_iso.append(ba_data)
    # let us know if there was not any data to concatenate
    except ValueError:
        print(f"No data available for {ba}")
        pass

oge_factors_iso = pd.concat(oge_factors_iso, axis=0)

# only keep data that are in the data years we want
oge_factors_iso = oge_factors_iso[oge_factors_iso["year"].isin(years_to_load)]

oge_factors_iso["emission_factor_source"] = f"OGE v{oge_version_number}"

oge_factors_iso = format_df_for_output(
    oge_factors_iso,
    fuel_mix_source="ISO",
    id_columns=["fuel_mix_source", "emission_factor_source", "region", "fuel_category", "year", "month", "hour"],
)


In [None]:
oge_factors_combined = pd.concat([oge_factors_eia, oge_factors_iso], axis=0)

oge_factors_combined["year"] = oge_factors_combined["year"].astype(int)
oge_factors_combined["month"] = oge_factors_combined["month"].astype(int)
oge_factors_combined["hour"] = oge_factors_combined["hour"].astype(int)
oge_factors_combined["emission_factor_lb_per_mwh"] = oge_factors_combined["emission_factor_lb_per_mwh"].round(3)
oge_factors_combined = oge_factors_combined.sort_values(by=["fuel_mix_source","year","region","fuel_category","pollutant", "emission_factor_adjustment","emission_factor_source", "month","hour"])

oge_factors_combined

In [None]:
oge_factors_combined.to_csv(
    outputs_folder("oge_emission_factors.csv"), index=False
)

## Plot Data

In [None]:
# compare honth hour averages across years
region = "CISO"
fuel_source = "EIA"
pollutant = "co2"
adjustment = "for_electricity"

zero_carbon_fuels = [
        "solar",
        "wind",
        "hydro",
        "nuclear",
        "variable_renewables",
        "batteries",
        "power_storage",
        "storage"
    ]

data_to_plot = oge_factors_combined[
    (oge_factors_combined["region"] == region)
    & (oge_factors_combined["fuel_mix_source"] == fuel_source)
    & (oge_factors_combined["pollutant"] == pollutant)
    & (oge_factors_combined["emission_factor_adjustment"] == adjustment)
    & (~oge_factors_combined["fuel_category"].isin(zero_carbon_fuels))
]

data_to_plot["month-hour"] = (
    data_to_plot["month"].astype(str) + " " + data_to_plot["hour"].astype(str) + ":00"
)

px.line(
    data_to_plot,
    x="month-hour",
    y="emission_factor_lb_per_mwh",
    color="year",
    facet_col="fuel_category",
    facet_col_wrap=1,
    height=800,
).update_yaxes(matches=None)


In [None]:
# compare ISO and EIA categories
region = "SWPP"
pollutant = "co2"
adjustment = "for_electricity"

data_to_plot = oge_factors_combined[
    (oge_factors_combined["region"] == region)
    & (oge_factors_combined["pollutant"] == pollutant)
    & (oge_factors_combined["emission_factor_adjustment"] == adjustment)
    & (~oge_factors_combined["fuel_category"].isin(zero_carbon_fuels))
]

data_to_plot["date"] = (
    data_to_plot["year"].astype(str)
    + "-"
    + data_to_plot["month"].astype(str)
    + " "
    + data_to_plot["hour"].astype(str)
    + ":00"
)

px.line(
    data_to_plot,
    x="date",
    y="emission_factor_lb_per_mwh",
    color="fuel_mix_source",
    facet_col="fuel_category",
    facet_col_wrap=1,
    height=800,
).update_yaxes(matches=None)
