In [None]:
import os
import pandas as pd

In [None]:
import sys

# NOTE(milo): Need to add 'src' to the path to fix import errors.
sys.path.append("../../src")

import src.data_cleaning as data_cleaning
import src.load_data as load_data
import src.filepaths as filepaths

In [None]:
def load_so2_uncontrolled_efs(path_to_xlsx):
    """
    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html
    """
    column_names = [
        "readable_fuel_name",
        "eia_fuel_code",
        "source_and_tables",
        "emissions_units",
        "cyclone_firing_boiler",
        "fluidized_bed_firing_boiler",
        "stoker_boiler",
        "tangential_firing_boiler",
        "all_other_boiler_types",
        "combustion_turbine",
        "internal_combustion_engine",
    ]

    column_dtypes = {
        "readable_fuel_name": "str",
        "eia_fuel_code": "str",
        "source_and_tables": "str",
        "emissions_units": "str",
    }

    # Every other column is a float.
    for column_name in column_names:
        if column_name not in column_dtypes:
            column_dtypes[column_name] = "float64"

    # NOTE(milo): Header starts on 2 for this one!
    df = pd.read_excel(
        io=path_to_xlsx,
        sheet_name="epa_a_01",
        header=2,
        names=column_names,
        dtype=column_dtypes,
        na_values=".",
        skipfooter=1,
    )

    return df

In [None]:
def load_nox_uncontrolled_efs(path_to_xlsx):
    """
    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html
    """
    column_names = [
        "readable_fuel_name",
        "eia_fuel_code",
        "source_and_tables",
        "emissions_units",
        "cyclone_firing_boiler",
        "fluidized_bed_firing_boiler",
        "stoker_boiler",
        "tangential_firing_boiler_dry_bottom",
        "tangential_firing_boiler_wet_bottom",
        "all_other_boiler_types_dry_bottom",
        "all_other_boiler_types_wet_bottom",
        "combustion_turbine",
        "internal_combustion_engine",
    ]

    column_dtypes = {
        "readable_fuel_name": "str",
        "eia_fuel_code": "str",
        "source_and_tables": "str",
        "emissions_units": "str",
    }

    # Every other column is a float.
    for column_name in column_names:
        if column_name not in column_dtypes:
            column_dtypes[column_name] = "float64"

    # NOTE(milo): Header starts on 3 for this one!
    df = pd.read_excel(
        io=path_to_xlsx,
        sheet_name="epa_a_02",
        header=3,
        names=column_names,
        dtype=column_dtypes,
        na_values=".",
        skipfooter=1,
    )

    return df

In [None]:
base_folder = filepaths.manual_folder("eia_electric_power_annual")
so2_uncontrolled_efs_path = os.path.join(
    base_folder, "epa_a_01_so2_uncontrolled_efs.xlsx"
)
nox_uncontrolled_efs_path = os.path.join(
    base_folder, "epa_a_02_nox_uncontrolled_efs.xlsx"
)

In [None]:
df_so2_uncontrolled_efs = load_so2_uncontrolled_efs(so2_uncontrolled_efs_path)
df_nox_uncontrolled_efs = load_nox_uncontrolled_efs(nox_uncontrolled_efs_path)

In [None]:
def make_so2_emission_factor_rows(input_df):
    """
    Convert the EIA emission factors excel format into the one we store in emission_factors_for_so2.csv.

    Columns:
        prime_mover_code,
        energy_source_code,
        boiler_firing_type,
        emission_factor,
        emission_factor_numerator,
        emission_factor_denominator,
        multiply_by_sulfur_content
    """
    min_column_idx = 4

    map_eia_units_to_ours = {
        "Lbs per MG": {
            "emission_factor_numerator": "lb",
            "emission_factor_denominator": "thousand gallons",
        },
        "Lbs per MMCF": {
            "emission_factor_numerator": "lb",
            "emission_factor_denominator": "Mcf",
        },
        "Lbs per ton": {
            "emission_factor_numerator": "lb",
            "emission_factor_denominator": "short ton",
        },
    }

    map_eia_combustion_system_to_ours = {
        "cyclone_firing_boiler": {
            "prime_mover_code": "TODO",
            "boiler_bottom_type": "N/A",
            "boiler_firing_type": "STOKER",
            "multiple_by_sulfur_content": 0,
        },
        "fluidized_bed_firing_boiler": {
            "prime_mover_code": "TODO",
            "boiler_bottom_type": "N/A",
            "boiler_firing_type": "FLUIDIZED",
            "multiple_by_sulfur_content": 0,
        },
        "stoker_boiler": {
            "prime_mover_code": "ST",  # Steam
            "boiler_bottom_type": "N/A",
            "boiler_firing_type": "STOKER",
            "multiple_by_sulfur_content": 0,
        },
        "tangential_firing_boiler": {
            "prime_mover_code": "ST",  # Steam
            "boiler_bottom_type": "N/A",
            "boiler_firing_type": "TANGENTIAL",
            "multiple_by_sulfur_content": 0,
        },
        "all_other_boiler_types": {
            "prime_mover_code": "UNK",
            "boiler_firing_type": "N/A",
            "multiply_by_sulfur_content": 0,
        },
        "combustion_turbine": {
            "prime_mover_code": "GT",  # Gas combustion turbine.
            "boiler_firing_type": "N/A",
            "multiply_by_sulfur_content": 0,
        },
        "internal_combustion_engine": {
            "prime_mover_code": "IC",
            "boiler_firing_type": "N/A",
            "multiply_by_sulfur_content": 0,
        },
    }

    for i in range(len(input_df)):
        row = input_df.iloc[i]
        units = row["emissions_units"].replace(" **", "")  # Remove asterisks.
        print(units)
        mapped_units_dict = map_eia_units_to_ours[units]
        for colname in input_df.columns[min_column_idx:]:
            emission_factor = row[colname]
            print(colname, ":", emission_factor)

In [None]:
make_so2_emission_factor_rows(df_so2_uncontrolled_efs)