Skip to content

Commit

Permalink
Merge pull request #364 from singularity-energy/ben/plant
Browse files Browse the repository at this point in the history
Add geographical information to the plant static attributes data frame
  • Loading branch information
rouille committed May 22, 2024
2 parents 7623dd6 + 48ee36c commit 43ff1df
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 68 deletions.
44 changes: 37 additions & 7 deletions src/oge/column_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from oge.logging_util import get_logger

import pandas as pd

logger = get_logger(__name__)


Expand Down Expand Up @@ -190,11 +192,17 @@
"fuel_category_eia930",
"ba_code",
"ba_code_physical",
"state",
"distribution_flag",
"timezone",
"data_availability",
"shaped_plant_id",
"latitude",
"longitude",
"state",
"county",
"city",
"plant_name_eia",
"capacity_mw",
},
"plant_metadata": {
"plant_id_eia",
Expand Down Expand Up @@ -364,9 +372,17 @@
}


def check_columns(df, file_name):
"""
Given a file name and a dataframe to export, check that its columns are as expected.
def check_columns(df: pd.DataFrame, file_name: str):
"""Given a file name and a data frame to export, check that its columns are as
expected.
Args:
df (pd.DataFrame): table to check.
file_name (str): key name of file in `COLUMNS`.
Raises:
ValueError: if `file_name` cannot be found in list of files.
ValueError: if columns are missing in `df`.
"""

cols = set(list(df.columns))
Expand Down Expand Up @@ -491,13 +507,26 @@ def get_dtypes():
"subplant_primary_fuel_from_net_generation_mwh": "str",
"timezone": "str",
"wet_dry_bottom": "str",
"latitude": "float64",
"longitude": "float64",
"county": "str",
"city": "str",
"plant_name_eia": "str",
}

return dtypes_to_use


def apply_dtypes(df):
"""Applies specified dtypes to a dataframe and identifies if a dtype is not specified for a column."""
def apply_dtypes(df: pd.DataFrame) -> pd.DataFrame:
"""Applies specified types to a data frame and identifies if a type is not
specified for a column.
Args:
df (pd.DataFrame): table whose columns will be converted.
Returns:
pd.DataFrame: original data frame with type converted columns.
"""
dtypes = get_dtypes()
datetime_columns = ["datetime_utc", "datetime_local", "report_date"]
cols_missing_dtypes = [
Expand All @@ -507,7 +536,8 @@ def apply_dtypes(df):
]
if len(cols_missing_dtypes) > 0:
logger.warning(
"The following columns do not have dtypes assigned in `column_checks.get_dtypes()`"
"The following columns do not have dtypes assigned in "
"`column_checks.get_dtypes()`"
)
logger.warning(cols_missing_dtypes)
return df.astype({col: dtypes[col] for col in df.columns if col in dtypes})
37 changes: 30 additions & 7 deletions src/oge/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import os
import shutil

import pandas as pd

# import local modules
import oge.download_data as download_data
import oge.data_cleaning as data_cleaning
Expand Down Expand Up @@ -41,13 +43,19 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--year", help="Year for analysis", default=2022, type=int)
parser.add_argument(
"--shape_individual_plants",
help="Assign an hourly profile to each individual plant with EIA-only data, instead of aggregating to the fleet level before shaping.",
help=(
"Assign an hourly profile to each individual plant with EIA-only data, "
"instead of aggregating to the fleet level before shaping."
),
default=True,
action=argparse.BooleanOptionalAction,
)
parser.add_argument(
"--small",
help="Run on subset of data for quicker testing, outputs to outputs/small and results to results/small.",
help=(
"Run on subset of data for quicker testing, outputs to outputs/small and "
"results to results/small."
),
default=False,
action=argparse.BooleanOptionalAction,
)
Expand Down Expand Up @@ -93,11 +101,14 @@ def main(args):
os.makedirs(outputs_folder(f"{path_prefix}"), exist_ok=True)
os.makedirs(outputs_folder(f"{path_prefix}/eia930"), exist_ok=True)
if not args.skip_outputs:
# If we are outputing, wipe results dir so we can be confident there are no old result files (eg because of a file name change)
# If we are outputing, wipe results dir so we can be confident there are no old
# result files (eg because of a file name change)
if os.path.exists(results_folder(f"{path_prefix}")):
shutil.rmtree(results_folder(f"{path_prefix}"))
os.makedirs(results_folder(f"{path_prefix}"), exist_ok=False)
else: # still make sure results dir exists, but exist is ok and we won't be writing to it
else:
# still make sure results dir exists, but exist is ok and we won't be writing
# to it
os.makedirs(results_folder(f"{path_prefix}"), exist_ok=True)
os.makedirs(
results_folder(f"{path_prefix}data_quality_metrics"),
Expand Down Expand Up @@ -438,7 +449,8 @@ def main(args):
eia930.clean_930(year, small=args.small, path_prefix=path_prefix)
else:
logger.info(
f"Not re-running 930 cleaning. If you'd like to re-run, please delete data/outputs/{path_prefix}/eia930/"
"Not re-running 930 cleaning. If you'd like to re-run, "
f"please delete data/outputs/{path_prefix}/eia930/"
)

# If running small, we didn't clean the whole year, so need to use the
Expand Down Expand Up @@ -517,10 +529,12 @@ def main(args):
)
else:
logger.info(
"Not shaping and exporting individual plant data since `shape_individual_plants` is False."
"Not shaping and exporting individual plant data since "
"`shape_individual_plants` is False."
)
logger.info(
"Plants that only report to EIA will be aggregated to the fleet level before shaping."
"Plants that only report to EIA will be aggregated to the fleet level "
"before shaping."
)

# 15. Shape fleet-level data
Expand Down Expand Up @@ -678,6 +692,15 @@ def main(args):
ba_fuel_data, year, path_prefix, args.skip_outputs, include_hourly=False
)

# export plant static attributes to csv
output_data.output_intermediate_data(
plant_attributes.assign(shaped_plant_id=pd.NA),
"plant_static_attributes",
path_prefix,
year,
args.skip_outputs,
)


if __name__ == "__main__":
import sys
Expand Down
Loading

0 comments on commit 43ff1df

Please sign in to comment.