Skip to content

Commit

Permalink
fix: prevent multi counting of capacity
Browse files Browse the repository at this point in the history
  • Loading branch information
rouille committed May 23, 2024
1 parent d74c923 commit a763976
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions src/oge/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,21 +394,33 @@ def add_plant_nameplate_capacity(df: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: original data frame with additional 'capacity_mw' column.
"""
generators_capacity = load_data.load_pudl_table(
generator_capacity = load_data.load_pudl_table(
"generators_eia860",
year=earliest_data_year,
end_year=latest_validated_year,
columns=["plant_id_eia", "generator_id", "report_date", "capacity_mw"],
).sort_values(by=["plant_id_eia", "generator_id", "report_date"], ascending=True)

generator_capacity["capacity_mw"] = generator_capacity.groupby(
["plant_id_eia", "generator_id"]
)["capacity_mw"].bfill()
generator_capacity["capacity_mw"] = generator_capacity.groupby(
["plant_id_eia", "generator_id"]
)["capacity_mw"].ffill()

# keep only the most recent year of data
generator_capacity = generator_capacity.drop_duplicates(
subset=["plant_id_eia", "generator_id"], keep="last"
)
generators_capacity[
generators_capacity["report_date"] == generators_capacity["report_date"].max()
]
plants_capacity = (
generators_capacity.groupby(["plant_id_eia"])["capacity_mw"]

plant_capacity = (
generator_capacity.groupby(["plant_id_eia"])["capacity_mw"]
.sum()
.round(2)
.reset_index()
)

df = df.merge(plants_capacity, how="left", on=["plant_id_eia"], validate="1:1")
df = df.merge(plant_capacity, how="left", on=["plant_id_eia"], validate="1:1")

return df

Expand Down

0 comments on commit a763976

Please sign in to comment.