# Compare Plant Coordinates from PUDL and EIA-860 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from math import acos, asin, cos, degrees, radians, sin, sqrt

from oge.load_data import load_pudl_table, load_raw_eia860_plant_geographical_info

In [None]:
plant_entity_pudl = load_pudl_table(
    "core_eia__entity_plants",
    columns=[
        "plant_id_eia",
        "timezone",
        "latitude",
        "longitude",
        "state",
        "county",
        "city",
    ],
).set_index("plant_id_eia")

plant_entity_eia860 = load_raw_eia860_plant_geographical_info(2022).set_index(
    "plant_id_eia"
)

Take intersection

In [None]:
plant_in_common = plant_entity_pudl.index.intersection(plant_entity_eia860.index)
print(f"Number of plants in both dataset: {len(plant_in_common)}")

Define helper functions to calculate angular distance

In [None]:
def ll2uv(lon: float, lat: float) -> list[float]:
    """Convert (longitude, latitude) to unit vector.

    Args:
        lon (float): longitude of the site (in deg.) measured eastward from
            Greenwich, UK.
        lat (float): latitude of the site (in deg.). Equator is the zero point.

    Returns:
        list[float]: 3-components (x,y,z) unit vector.
    """
    cos_lat = cos(radians(lat))
    sin_lat = sin(radians(lat))
    cos_lon = cos(radians(lon))
    sin_lon = sin(radians(lon))

    uv = [cos_lat * cos_lon, cos_lat * sin_lon, sin_lat]

    return uv


def angular_distance(uv1: list[float], uv2: list[float]) -> float:
    """Calculate the angular distance between two vectors.

    Args:
        uv1 (list[float]): 3-components vector as returned by the `ll2uv` function.
        uv2 (list[float]): 3-components vector as returned by the `ll2uv` function.

    Returns:
        float -- angle (in degrees).
    """
    cos_angle = uv1[0] * uv2[0] + uv1[1] * uv2[1] + uv1[2] * uv2[2]
    if cos_angle >= 1:
        cos_angle = 1
    if cos_angle <= -1:
        cos_angle = -1
    angle = degrees(acos(cos_angle))

    return angle

Calculate angular distance

In [None]:
plant_to_distance = {}
plant_to_distance_gt_1deg = {}
for i in plant_in_common:
    # ensure longitude and latitude are real number not NAs
    try:
        uv_pudl = ll2uv(
            plant_entity_pudl.loc[i, "longitude"], plant_entity_pudl.loc[i, "latitude"]
        )
        uv_eia860 = ll2uv(
            plant_entity_eia860.loc[i, "longitude"],
            plant_entity_eia860.loc[i, "latitude"],
        )
        distance = angular_distance(uv_pudl, uv_eia860)
        plant_to_distance[i] = distance
        if distance > 1:
            plant_to_distance_gt_1deg[i] = distance
    except TypeError:
        continue

In [None]:
plt.subplots(figsize=(12, 6))
plt.hist(plant_to_distance.values(), bins=20, range=(0, 1))
plt.xlabel("Angular Distance between plant in PUDL and EIA-860 (in deg.)", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
print(
    f"Number of plants with an angular distance greater than 1 deg.: {len(plant_to_distance_gt_1deg)}"
)

In [None]:
plt.subplots(figsize=(12, 6))
plt.hist(plant_to_distance_gt_1deg.values(), bins=20, range=(1, 180))
plt.xlabel(
    "Angular Distance between plant in PUDL and EIA-860 separated by more than 1 deg.",
    fontsize=12,
)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
plant_to_distance_gt_1deg