In [2]:
# import packages
import pandas as pd
import numpy as np
import os
# import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src")

import oge.load_data as load_data
from oge.column_checks import get_dtypes
from oge.filepaths import *

In [6]:
plant_ba = load_data.load_pudl_table(
    "core_eia860__scd_plants",
    year=2005,
    end_year=2022,
    columns=[
        "plant_id_eia",
        "balancing_authority_code_eia",
        "balancing_authority_name_eia",
        "utility_id_eia",
        "transmission_distribution_owner_name",
    ],
)
# merge utility name
utilities_eia = load_data.load_pudl_table(
    "core_eia__entity_utilities", columns=["utility_id_eia", "utility_name_eia"]
)
plant_ba = plant_ba.merge(
    utilities_eia, how="left", on="utility_id_eia", validate="m:1"
)
# merge plant state
plant_states = load_data.load_pudl_table(
    "core_eia__entity_plants", columns=["plant_id_eia", "state"]
)
plant_ba = plant_ba.merge(plant_states, how="left", on="plant_id_eia", validate="m:1")

# convert the dtype of the balancing authority code column from string to object
# this will allow for missing values to be filled
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].astype(object)
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(value=np.NaN)

# load the ba name reference
ba_name_to_ba_code = pd.read_csv(reference_table_folder("ba_reference.csv"))
ba_name_to_ba_code = dict(
    zip(
        ba_name_to_ba_code["ba_name"],
        ba_name_to_ba_code["ba_code"],
    )
)

# specify a ba code for certain utilities
utility_as_ba_code = pd.read_csv(reference_table_folder("utility_name_ba_code_map.csv"))
utility_as_ba_code = dict(
    zip(
        utility_as_ba_code["name"],
        utility_as_ba_code["ba_code"],
    )
)

# fill missing BA codes first based on the BA name, then utility name, then on the transmisison owner name
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["balancing_authority_name_eia"].map(ba_name_to_ba_code))
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["balancing_authority_name_eia"].map(utility_as_ba_code))
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["utility_name_eia"].map(utility_as_ba_code))
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["transmission_distribution_owner_name"].map(utility_as_ba_code))

In [10]:
name_list = sorted(
    list(
        pd.concat(
            [
                plant_ba.loc[
                    plant_ba["balancing_authority_code_eia"].isna(),
                    "transmission_distribution_owner_name",
                ].dropna(),
                plant_ba.loc[
                    plant_ba["balancing_authority_code_eia"].isna(), "utility_name_eia"
                ].dropna(),
            ]
        ).unique()
    )
)

with open("test.txt", "w") as f:
    for line in name_list:
        f.write(f"{line}\n")

In [9]:
# what are all the utility names and td owner names not mapped to a BA?
# look through the list and make sure none overlap with balancing authority names
sorted(
    list(
        pd.concat(
            [
                plant_ba.loc[
                    plant_ba["balancing_authority_code_eia"].isna(),
                    "transmission_distribution_owner_name",
                ].dropna(),
                plant_ba.loc[
                    plant_ba["balancing_authority_code_eia"].isna(), "utility_name_eia"
                ].dropna(),
            ]
        ).unique()
    )
)

[' Placid Refining  Co LLC',
 ' Willmar Municipal Utilities',
 '12710',
 '158th Fighter Wing',
 '180 Raritan Energy Solutions, LLC',
 '3 Phases Renewables',
 '4-County Electric Power Assn',
 '500 Virginia Solar, LP',
 '5045 Wind Partners LLC',
 '510 REPP One LLC',
 '8309 Tujunga Avenue Corp',
 'A & N Electric Coop',
 'A B Energy Inc',
 'A E Staley Manufacturing Co',
 'ABC Coke - Drummond Co, Inc',
 'AC Landfill Energy LLC',
 'ACE Cogeneration Co',
 'ACI Energy Partners LLC',
 'ADA Carbon Solutions LLC',
 'AE Operations LLC',
 'AE Power Services LLC',
 'AE Staley Manufacturing Co',
 'AEP Generating Co',
 'AEP Generating Company',
 'AEP Onsite Partners',
 'AEP Texas Central Company',
 'AEP Texas Com & Ind Retail LP',
 'AEP Texas North Company',
 'AER NY Gen LLC',
 'AERA Energy LLC-Lost Hills',
 'AERA Energy LLC-Oxford',
 'AES Alamitos LLC',
 'AES Alternative Energy',
 'AES Beaver Valley',
 'AES Cayuga LLC',
 'AES Cypress LLC',
 'AES Deepwater Inc',
 'AES Distributed Energy',
 'AES ES Wes