In [3]:
import os
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree

# ================================================================
# USER CONFIG — CHANGE THESE TWO
# ================================================================
NSHM_BASE_DIR = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\hazard_output_CONUS\hazard_output_CONUS\vs30-760"
COORD_FILE    = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\structure_coordinates.csv"
OUT_FILE      = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data_fixed\nshm_hazard_grid.csv"


# ================================================================
# LOAD STRUCTURE COORDINATE FILE
# ================================================================
print("Loading structure coordinate file...")

coords_df = pd.read_csv(COORD_FILE)

# Expect STRUCTURE_ID, COORDINATES = "(lat, lon)" or two columns LAT, LON
if "COORDINATES" in coords_df.columns:
    # Convert "(lat, lon)" → floats
    coords_df["lat"] = coords_df["COORDINATES"].apply(lambda x: float(x.strip("()").split(",")[0]))
    coords_df["lon"] = coords_df["COORDINATES"].apply(lambda x: float(x.strip("()").split(",")[1]))

elif "LAT" in coords_df.columns and "LON" in coords_df.columns:
    coords_df["lat"] = coords_df["LAT"]
    coords_df["lon"] = coords_df["LON"]

else:
    raise ValueError("Coordinate file must contain COORDINATES or LAT/LON columns.")


# ================================================================
# HELPER — LOAD A SINGLE IMT GRID
# ================================================================
def load_map(imt):
    """
    Loads NSHM map.csv for a given IMT (PGA, SA0P2, SA1P0)
    and returns:
        df   — pandas DataFrame with lon, lat, 475, 975, 2475, 10000
        tree — KD-tree for coordinate lookup
    """
    fname = os.path.join(NSHM_BASE_DIR, imt, "map.csv")
    if not os.path.exists(fname):
        raise FileNotFoundError(f"Missing file: {fname}")

    df = pd.read_csv(fname)

    # KD-tree uses (lat, lon)
    pts = np.vstack([df["lat"].values, df["lon"].values]).T
    tree = cKDTree(pts)

    return df, tree


# ================================================================
# LOAD ALL THREE IMTs
# ================================================================
print("Loading NSHM grids...")

IMTS = {
    "PGA":   None,
    "SA02":  None,   # SA at 0.2 sec → folder SA0P2
    "SA10":  None    # SA at 1.0 sec → folder SA1P0
}

IMTS["PGA"]  = load_map("PGA")
IMTS["SA02"] = load_map("SA0P2")
IMTS["SA10"] = load_map("SA1P0")

print("Loaded:", list(IMTS.keys()))


# ================================================================
# HELPER — PERFORM NSHM LOOKUP
# ================================================================
def nshm_lookup(lat, lon, imt_key):
    """
    Given a lat, lon and IMT key ('PGA', 'SA02', 'SA10'),
    returns the row for the nearest hazard grid node.
    """
    df, tree = IMTS[imt_key]
    dist, idx = tree.query([lat, lon], k=1)
    row = df.iloc[idx]
    return row[["475", "975", "2475", "10000"]]


# ================================================================
# ENRICH EACH STRUCTURE
# ================================================================
print("\nProcessing structures...")

rows = []

for _, r in coords_df.iterrows():
    sid = r["STRUCTURE_ID"]
    lat = r["lat"]
    lon = r["lon"]

    # Lookups
    pga_vals  = nshm_lookup(lat, lon, "PGA")
    sa02_vals = nshm_lookup(lat, lon, "SA02")
    sa10_vals = nshm_lookup(lat, lon, "SA10")

    result = {
        "STRUCTURE_ID": sid,
        "COORDINATES": (lat, lon),

        # PGA
        "PGA_475":   pga_vals["475"],
        "PGA_975":   pga_vals["975"],
        "PGA_2475":  pga_vals["2475"],
        "PGA_10000": pga_vals["10000"],

        # SA(0.2)
        "SA02_475":   sa02_vals["475"],
        "SA02_975":   sa02_vals["975"],
        "SA02_2475":  sa02_vals["2475"],
        "SA02_10000": sa02_vals["10000"],

        # SA(1.0)
        "SA10_475":   sa10_vals["475"],
        "SA10_975":   sa10_vals["975"],
        "SA10_2475":  sa10_vals["2475"],
        "SA10_10000": sa10_vals["10000"]
    }

    rows.append(result)


# ================================================================
# SAVE OUTPUT
# ================================================================
out_df = pd.DataFrame(rows)
out_df.to_csv(OUT_FILE, index=False)

print("\nNSHM grid enrichment complete!")
print("Saved to:", OUT_FILE)


Loading structure coordinate file...
Loading NSHM grids...
Loaded: ['PGA', 'SA02', 'SA10']

Processing structures...

NSHM grid enrichment complete!
Saved to: C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data_fixed\nshm_hazard_grid.csv
