In [2]:
# import packages
import pandas as pd
import numpy as np
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../src")

import oge.load_data as load_data
from oge.column_checks import get_dtypes
from oge.filepaths import *


year = 2021
path_prefix = f"{year}/"

In [None]:
pudl_out = load_data.initialize_pudl_out(year=year)

plant_ba = pudl_out.plants_eia860().loc[
    :,
    [
        "plant_id_eia",
        "balancing_authority_code_eia",
        "balancing_authority_name_eia",
        "utility_name_eia",
        "transmission_distribution_owner_name",
        "state",
    ],
]

# convert the dtype of the balancing authority code column from string to object
# this will allow for missing values to be filled
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].astype(object)
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(value=np.NaN)

# specify a ba code for certain utilities
utility_as_ba_code = pd.read_csv(manual_folder("utility_name_ba_code_map.csv"))
utility_as_ba_code = dict(
    zip(
        utility_as_ba_code["name"],
        utility_as_ba_code["ba_code"],
    )
)

# fill missing BA codes first based on the BA name, then utility name, then on the transmisison owner name
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["balancing_authority_name_eia"].map(utility_as_ba_code))
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["utility_name_eia"].map(utility_as_ba_code))
plant_ba["balancing_authority_code_eia"] = plant_ba[
    "balancing_authority_code_eia"
].fillna(plant_ba["transmission_distribution_owner_name"].map(utility_as_ba_code))

In [None]:
# what are all the utility names not mapped to a BA?
list(
    plant_ba.loc[
        plant_ba["balancing_authority_code_eia"].isna(), "utility_name_eia"
    ].unique()
)

In [None]:
# what are all the utility names not mapped to a BA?
list(
    plant_ba.loc[
        plant_ba["balancing_authority_code_eia"].isna(),
        "transmission_distribution_owner_name",
    ].unique()
)

In [None]:
plant_ba[
    plant_ba["balancing_authority_code_eia"].isna()
    & (plant_ba["utility_name_eia"] == "Pacific Gas & Electric Co")
]