In [1]:
# import packages
import pandas as pd
import numpy as np
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../../open-grid-emissions/src/oge")


import load_data
import helpers
from filepaths import *

year = 2023
path_prefix = f"{year}/"

os.environ["PUDL_BUILD"] = "nightly"

In [2]:
# load data for each generator
gens = load_data.load_pudl_table(
    "core_eia860__scd_generators",
    year,
    columns=[
        "report_date",
        "plant_id_eia",
        "generator_id",
        "prime_mover_code",
        "capacity_mw",
        "generator_retirement_date",
        "operational_status",
        "operational_status_code",
        "current_planned_generator_operating_date",
        "planned_generator_retirement_date",
    ],
)

gens_entity = load_data.load_pudl_table(
    "core_eia__entity_generators",
    columns=[
        "plant_id_eia",
        "generator_id",
        "generator_operating_date",
        "original_planned_generator_operating_date",
    ],
)

plants_entity = load_data.load_pudl_table(
    "core_eia__entity_plants",
    columns=["plant_id_eia", "latitude", "longitude", "city", "county", "state"],
)

plant_ba = load_data.load_pudl_table(
    "core_eia860__scd_plants",
    year,
    columns=["plant_id_eia", "balancing_authority_code_eia"],
).rename(columns={"balancing_authority_code_eia": "ba_code"})

# merge the ba code into the gens data
gens = (
    gens.merge(
        gens_entity, how="left", on=["plant_id_eia", "generator_id"], validate="m:1"
    )
    .merge(plant_ba, how="left", on="plant_id_eia", validate="m:1")
    .merge(plants_entity, how="left", on="plant_id_eia", validate="m:1")
)

# drop data for generators outside of continental US
gens = gens[~gens["state"].isin(["AK", "HI"])]

# if there are longitudes in the wrong hemisphere, try reversing the sign
gens.loc[gens["longitude"] > 0, "longitude"] = (
    -1 * gens.loc[gens["longitude"] > 0, "longitude"]
)

# fix coordinates
# NOTE: This is the bounding box for the continental US and the southern canadian provinces
LONGITUDE_MIN = -125.25
LONGITUDE_MAX = -67
LATITUDE_MIN = 24.5
LATITUDE_MAX = 49.25

# are there any old station locations that are outside of North America?
gens.loc[
    (gens["longitude"] < LONGITUDE_MIN)
    | (gens["longitude"] > LONGITUDE_MAX)
    | (gens["latitude"] < LATITUDE_MIN)
    | (gens["latitude"] > LATITUDE_MAX),
    ["latitude", "longitude"],
] = np.NaN

# fill in missing locations
# get lat/lon
# loop this process twice to try and address any geocoder errors
loop_count = 1
missing_coord = gens[gens["longitude"].isna() | gens["latitude"].isna()]
while loop_count <= 2 and len(missing_coord) > 0:
    if len(missing_coord) > 0:
        print(f"Finding coordinates for {len(missing_coord)} missing locations")
        # only get coordinates when state, county and city are available
        for i in missing_coord.index:
            state = gens.loc[i, "state"]
            county = gens.loc[i, "county"]
            city = gens.loc[i, "city"]

            lat, lon = helpers.get_coordinates_of_location(state, county, city)
            gens.loc[i, "latitude"] = lat
            gens.loc[i, "longitude"] = lon
        missing_coord = gens[gens["longitude"].isna() | gens["latitude"].isna()]
        loop_count += 1

# load wind data
wind = pd.read_excel(
    downloads_folder(f"eia860/eia860{year}/3_2_Wind_Y{year}.xlsx"),
    sheet_name="Operable",
    header=1,
).rename(
    columns={
        "Plant Code": "plant_id_eia",
        "Generator ID": "generator_id",
        "Design Wind Speed (mph)": "rated_speed_mph",
        "Turbine Hub Height (Feet)": "hub_height_ft",
    }
)

# convert to metric units
wind["rated_speed_m_per_s"] = (wind["rated_speed_mph"] * 0.44704).round(1)
wind["hub_height_m"] = (wind["hub_height_ft"] * 0.3048).round(0)

wind = wind[["plant_id_eia", "generator_id", "rated_speed_m_per_s", "hub_height_m"]]

# merge the wind data into the gens data
gens = gens.merge(wind, how="left", on=["plant_id_eia", "generator_id"])

In [6]:
gens.to_csv(outputs_folder(f"gens_{year}.csv"), index=False)