Filter any candidates that are co-located with a substation and coal candidates that overlap with their feedstock sources (we ignore any overlap between natural gas plants and their feedstock sources because pipelines are typically located underground). Additionally, standardize the columns for the distance to the feedstock source.

In [None]:
import geopandas as gpd
from pathlib import Path
import pandas as pd 

electricity_processing_dir = Path.cwd() # pre-processing path

input_candidates_path = electricity_processing_dir / 'candidates'

output_candidates_path = electricity_processing_dir.parent / 'inputs' / 'final_candidates'
output_candidates_path.mkdir(exist_ok=True)

for generator_file in input_candidates_path.glob('*gpkg'):
    gen_tech = generator_file.stem

    candidates_gdf = gpd.read_file(generator_file)

    candidates_gdf = candidates_gdf[candidates_gdf['dist_to_substation_meters'] != 0]

    if 'coal' in gen_tech:
        candidates_gdf.rename(columns={'dist_to_coal_meters': 'dist_to_feedstock_meters'}, inplace=True)
        candidates_gdf = candidates_gdf[candidates_gdf['dist_to_feedstock_meters'] != 0]

    elif 'gas' in gen_tech:
        candidates_gdf.rename(columns={'dist_to_pipeline_meters': 'dist_to_feedstock_meters'}, inplace=True)

    candidates_gdf.to_file(output_candidates_path / f'{gen_tech}.gpkg', driver='GPKG')
    print(f'Saved {gen_tech}.gpkg to {output_candidates_path}')

Saved gas_cc.gpkg to /Users/nicholaskong/Desktop/REAM_lab/hydrogen_siting/electricity_siting/final_candidates
Saved gas_cc_ccs.gpkg to /Users/nicholaskong/Desktop/REAM_lab/hydrogen_siting/electricity_siting/final_candidates
Saved coal_igcc.gpkg to /Users/nicholaskong/Desktop/REAM_lab/hydrogen_siting/electricity_siting/final_candidates
Saved coal_igcc_ccs.gpkg to /Users/nicholaskong/Desktop/REAM_lab/hydrogen_siting/electricity_siting/final_candidates


Calculate the potential for each technology in each load zone.

In [9]:
# Define a dictionary mapping each electricity generating technology to its reference nameplate capacity (MW)
ref_capacity = {
    "gas_cc": 1009,
    "gas_cc_ccs": 943.5,
    "coal_igcc": 764.3,
    "coal_igcc_ccs": 707.7,
}

In [13]:
# Create helper function that calculates the potential capacity per load zone for a given tech
def get_potential_MW(candidates_gdf, tech_name, ref_capacity_MW):
    """
    Inputs: 
    - candidates_gdf: the gdf of candidate sites for the given tech(s)
    - tech_name: the hydrogen production technology that the layer is for
    - ref_capacity: the reference capacity of the candidate (MW)

    Outputs:
    - df: a df with the potential capacity per tech by load zone, structured with the following columns:
        - LOAD_AREA, gen_tech, site_count, potential_MW
    """

    # Count the number of candidate sites in each load zone
    count_by_load_area = candidates_gdf.groupby("LOAD_AREA").size().reset_index(name="site_count")

    # Add a gen_tech column
    count_by_load_area["gen_tech"] = tech_name

    # Calculate total potential capacity in each load zone
    count_by_load_area["potential_MW"] = (count_by_load_area["site_count"] * ref_capacity_MW).astype(int)

    return count_by_load_area

In [None]:
# Create a running list of potential capacity
output_df = pd.DataFrame()

# Import the .shp file of load zones
load_zones_gdf = gpd.read_file(electricity_processing_dir / 'load_zones' / 'load_zones.shp')

# Load final suitable candidate sites for each technology and calculate potential capacity by load zone
for tech_file in output_candidates_path.glob("*.gpkg"):
    gdf = gpd.read_file(tech_file)
    tech_name = tech_file.stem

    nameplate_capacity = ref_capacity[tech_name]

    # Call the helper function to get a df containing the potential of the current tech in each load zone
    potential_df = get_potential_MW(gdf, tech_name, nameplate_capacity)
    
    # Append to output DataFrame
    output_df = pd.concat([output_df, potential_df], ignore_index=True)

# Fill in missing (LOAD_AREA, gen_tech) with 0 
load_areas = pd.Series(load_zones_gdf["LOAD_AREA"].unique(), name="LOAD_AREA")
gen_techs = pd.Series(output_df["gen_tech"].unique(), name="gen_tech")

# Cartesian product without itertools: use merge on dummy key
all_combos = (
    load_areas.to_frame().assign(key=1)
    .merge(gen_techs.to_frame().assign(key=1), on="key")
    .drop("key", axis=1)
)

# Merge with actual data
output_df = (
    all_combos
    .merge(output_df, on=["LOAD_AREA", "gen_tech"], how="left")
    .fillna(0)
)

# Sort
output_df = output_df.sort_values(by=["LOAD_AREA", "gen_tech"]).reset_index(drop=True)


# Save
output_csv_path = electricity_processing_dir.parent / 'inputs' / "gen_tech_potentials.csv"
output_df.to_csv(output_csv_path, index=False)
print(f"Saved technology capacity by load zone to {output_csv_path}")


Saved technology capacity by load zone to /Users/nicholaskong/Desktop/REAM_lab/hydrogen_siting/electricity_siting/gen_tech_potentials.csv
