In [11]:
import os
import pandas as pd

In [9]:
import sys

# NOTE(milo): Need to add 'src' to the path to fix import errors.
sys.path.append("../../src")

import src.data_cleaning as data_cleaning
import src.load_data as load_data
import src.filepaths as filepaths

In [27]:
def load_so2_uncontrolled_efs(path_to_xlsx):
    """
    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html
    """
    column_names = [
        'readable_fuel_name',
        'eia_fuel_code',
        'source_and_tables',
        'emissions_units',
        'cyclone_firing_boiler',
        'fluidized_bed_firing_boiler',
        'stoker_boiler',
        'tangential_firing_boiler',
        'all_other_boiler_types',
        'combustion_turbine',
        'internal_combustion_engine'
    ]

    column_dtypes = {
        'readable_fuel_name': 'str',
        'eia_fuel_code': 'str',
        'source_and_tables': 'str',
        'emissions_units': 'str',
    }
    
    # Every other column is a float.
    for column_name in column_names:
        if column_name not in column_dtypes:
            column_dtypes[column_name] = 'float64'
    
    # NOTE(milo): Header starts on 2 for this one!
    df = pd.read_excel(
        io=path_to_xlsx,
        sheet_name="epa_a_01",
        header=2,
        names=column_names,
        dtype=column_dtypes,
        na_values=".",
        skipfooter=1,
    )

    return df

In [28]:
def load_nox_uncontrolled_efs(path_to_xlsx):
    """
    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html
    """    
    column_names = [
        'readable_fuel_name',
        'eia_fuel_code',
        'source_and_tables',
        'emissions_units',
        'cyclone_firing_boiler',
        'fluidized_bed_firing_boiler',
        'stoker_boiler',
        'tangential_firing_boiler_dry_bottom',
        'tangential_firing_boiler_wet_bottom',
        'all_other_boiler_types_dry_bottom',
        'all_other_boiler_types_wet_bottom',
        'combustion_turbine',
        'internal_combustion_engine'
    ]
    
    column_dtypes = {
        'readable_fuel_name': 'str',
        'eia_fuel_code': 'str',
        'source_and_tables': 'str',
        'emissions_units': 'str',
    }

    # Every other column is a float.
    for column_name in column_names:
        if column_name not in column_dtypes:
            column_dtypes[column_name] = 'float64'
    
    # NOTE(milo): Header starts on 3 for this one!
    df = pd.read_excel(
        io=path_to_xlsx,
        sheet_name="epa_a_02",
        header=3,
        names=column_names,
        dtype=column_dtypes,
        na_values=".",
        skipfooter=1,
    )

    return df

In [29]:
base_folder = filepaths.manual_folder('eia_electric_power_annual')
so2_uncontrolled_efs_path = os.path.join(base_folder, 'epa_a_01_so2_uncontrolled_efs.xlsx')
nox_uncontrolled_efs_path = os.path.join(base_folder, 'epa_a_02_nox_uncontrolled_efs.xlsx')

In [30]:
so2_uncontrolled_efs = load_so2_uncontrolled_efs(so2_uncontrolled_efs_path)
nox_uncontrolled_efs = load_nox_uncontrolled_efs(nox_uncontrolled_efs_path)