In [21]:
import os
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree

# ================================================================
# USER CONFIG — CHANGE THESE TWO
# ================================================================
NSHM_BASE_DIR = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\hazard_output_CONUS\hazard_output_CONUS\vs30-760"
COORD_FILE    = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\structure_coordinates.csv"
OUT_FILE      = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\nshm_grid_enriched.csv"


# ================================================================
# LOAD STRUCTURE COORDINATE FILE
# ================================================================
print("Loading structure coordinate file...")

coords_df = pd.read_csv(COORD_FILE)

# Expect STRUCTURE_ID, COORDINATES = "(lat, lon)" or two columns LAT, LON
if "COORDINATES" in coords_df.columns:
    # Convert "(lat, lon)" → floats
    coords_df["lat"] = coords_df["COORDINATES"].apply(lambda x: float(x.strip("()").split(",")[0]))
    coords_df["lon"] = coords_df["COORDINATES"].apply(lambda x: float(x.strip("()").split(",")[1]))

elif "LAT" in coords_df.columns and "LON" in coords_df.columns:
    coords_df["lat"] = coords_df["LAT"]
    coords_df["lon"] = coords_df["LON"]

else:
    raise ValueError("Coordinate file must contain COORDINATES or LAT/LON columns.")


# ================================================================
# HELPER — LOAD A SINGLE IMT GRID
# ================================================================
def load_map(imt):
    """
    Loads NSHM map.csv for a given IMT (PGA, SA0P2, SA1P0)
    and returns:
        df   — pandas DataFrame with lon, lat, 475, 975, 2475, 10000
        tree — KD-tree for coordinate lookup
    """
    fname = os.path.join(NSHM_BASE_DIR, imt, "map.csv")
    if not os.path.exists(fname):
        raise FileNotFoundError(f"Missing file: {fname}")

    df = pd.read_csv(fname)

    # KD-tree uses (lat, lon)
    pts = np.vstack([df["lat"].values, df["lon"].values]).T
    tree = cKDTree(pts)

    return df, tree


# ================================================================
# LOAD ALL THREE IMTs
# ================================================================
print("Loading NSHM grids...")

IMTS = {
    "PGA":   None,
    "SA02":  None,   # SA at 0.2 sec → folder SA0P2
    "SA10":  None    # SA at 1.0 sec → folder SA1P0
}

IMTS["PGA"]  = load_map("PGA")
IMTS["SA02"] = load_map("SA0P2")
IMTS["SA10"] = load_map("SA1P0")

print("Loaded:", list(IMTS.keys()))


# ================================================================
# HELPER — PERFORM NSHM LOOKUP
# ================================================================
def nshm_lookup(lat, lon, imt_key):
    """
    Given a lat, lon and IMT key ('PGA', 'SA02', 'SA10'),
    returns the row for the nearest hazard grid node.
    """
    df, tree = IMTS[imt_key]
    dist, idx = tree.query([lat, lon], k=1)
    row = df.iloc[idx]
    return row[["475", "975", "2475", "10000"]]


# ================================================================
# ENRICH EACH STRUCTURE
# ================================================================
print("\nProcessing structures...")

rows = []

for _, r in coords_df.iterrows():
    sid = r["STRUCTURE_ID"]
    lat = r["lat"]
    lon = r["lon"]

    # Lookups
    pga_vals  = nshm_lookup(lat, lon, "PGA")
    sa02_vals = nshm_lookup(lat, lon, "SA02")
    sa10_vals = nshm_lookup(lat, lon, "SA10")

    result = {
        "STRUCTURE_ID": sid,

        # PGA
        "PGA_475":   pga_vals["475"],
        "PGA_975":   pga_vals["975"],
        "PGA_2475":  pga_vals["2475"],
        "PGA_10000": pga_vals["10000"],

        # SA(0.2)
        "SA02_475":   sa02_vals["475"],
        "SA02_975":   sa02_vals["975"],
        "SA02_2475":  sa02_vals["2475"],
        "SA02_10000": sa02_vals["10000"],

        # SA(1.0)
        "SA10_475":   sa10_vals["475"],
        "SA10_975":   sa10_vals["975"],
        "SA10_2475":  sa10_vals["2475"],
        "SA10_10000": sa10_vals["10000"]
    }

    rows.append(result)


# ================================================================
# SAVE OUTPUT
# ================================================================
out_df = pd.DataFrame(rows)
out_df.to_csv(OUT_FILE, index=False)

print("\nNSHM grid enrichment complete!")
print("Saved to:", OUT_FILE)


Loading structure coordinate file...
Loading NSHM grids...
Loaded: ['PGA', 'SA02', 'SA10']

Processing structures...

NSHM grid enrichment complete!
Saved to: C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\nshm_grid_enriched.csv

NSHM grid enrichment complete!
Saved to: C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\nshm_grid_enriched.csv


In [None]:
nshm_schema = {
  "PGA_475": {
    "category": "Mapped peak ground acceleration (475-year return period)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "PGA_475_SEMANTIC": {
    "category": "Mapped peak ground acceleration (475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "PGA_475",
    "bin_rules": [
      { "category": "very_low",  "min": 0.0,  "max": 0.10 },
      { "category": "low",       "min": 0.10, "max": 0.20 },
      { "category": "moderate",  "min": 0.20, "max": 0.35 },
      { "category": "high",      "min": 0.35, "max": 0.55 },
      { "category": "very_high", "min": 0.55, "max": None }
    ],
    "code_map": {
      "none": "Ground shaking (475-year) not available",
      "very_low":  "very low shaking (PGA < 0.10g)",
      "low":       "low shaking (0.10–0.20g)",
      "moderate":  "moderate shaking (0.20–0.35g)",
      "high":      "high shaking (0.35–0.55g)",
      "very_high": "very high shaking (> 0.55g)"
    }
  },

  "PGA_975": {
    "category": "Mapped peak ground acceleration (975-year return period)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "PGA_975_SEMANTIC": {
    "category": "Mapped peak ground acceleration (975-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "PGA_975",
    "bin_rules": [
      { "category": "very_low",  "min": 0.0,  "max": 0.10 },
      { "category": "low",       "min": 0.10, "max": 0.20 },
      { "category": "moderate",  "min": 0.20, "max": 0.35 },
      { "category": "high",      "min": 0.35, "max": 0.55 },
      { "category": "very_high", "min": 0.55, "max": None }
    ],
    "code_map": {
      "none": "Ground shaking (975-year) not available",
      "very_low":  "very low shaking (PGA < 0.10g)",
      "low":       "low shaking (0.10–0.20g)",
      "moderate":  "moderate shaking (0.20–0.35g)",
      "high":      "high shaking (0.35–0.55g)",
      "very_high": "very high shaking (> 0.55g)"
    }
  },

  "PGA_2475": {
    "category": "Mapped peak ground acceleration (2475-year return period)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "PGA_2475_SEMANTIC": {
    "category": "Mapped peak ground acceleration (2475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "PGA_2475",
    "bin_rules": [
      { "category": "very_low",  "min": 0.0,  "max": 0.10 },
      { "category": "low",       "min": 0.10, "max": 0.20 },
      { "category": "moderate",  "min": 0.20, "max": 0.35 },
      { "category": "high",      "min": 0.35, "max": 0.55 },
      { "category": "very_high", "min": 0.55, "max": None }
    ],
    "code_map": {
      "none": "Ground shaking (2475-year) not available",
      "very_low":  "very low shaking (PGA < 0.10g)",
      "low":       "low shaking (0.10–0.20g)",
      "moderate":  "moderate shaking (0.20–0.35g)",
      "high":      "high shaking (0.35–0.55g)",
      "very_high": "very high shaking (> 0.55g)"
    }
  },

  "PGA_10000": {
    "category": "Mapped peak ground acceleration (10000-year return period)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "PGA_10000_SEMANTIC": {
    "category": "Mapped peak ground acceleration (10000-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "PGA_10000",
    "bin_rules": [
      { "category": "very_low",  "min": 0.0,  "max": 0.10 },
      { "category": "low",       "min": 0.10, "max": 0.20 },
      { "category": "moderate",  "min": 0.20, "max": 0.35 },
      { "category": "high",      "min": 0.35, "max": 0.55 },
      { "category": "very_high", "min": 0.55, "max": None }
    ],
    "code_map": {
      "none": "Ground shaking (10000-year) not available",
      "very_low":  "very low shaking (PGA < 0.10g)",
      "low":       "low shaking (0.10–0.20g)",
      "moderate":  "moderate shaking (0.20–0.35g)",
      "high":      "high shaking (0.35–0.55g)",
      "very_high": "very high shaking (> 0.55g)"
    }
  },

  "SA02_475": {
    "category": "Mapped 0.2-second spectral acceleration (475-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA02_475_SEMANTIC": {
    "category": "Mapped 0.2-second spectral acceleration (475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA02_475",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.5  },
      { "category": "moderate",   "min": 0.5, "max": 1.0  },
      { "category": "high",       "min": 1.0, "max": 1.5  },
      { "category": "very_high",  "min": 1.5, "max": 2.0  },
      { "category": "extreme",    "min": 2.0, "max": None }
    ],
    "code_map": {
      "none": "0.2-second spectral acceleration not available",
      "low": "low short-period shaking (SA02 < 0.5g)",
      "moderate": "moderate short-period shaking (0.5–1.0g)",
      "high": "high short-period shaking (1.0–1.5g)",
      "very_high": "very high short-period shaking (1.5–2.0g)",
      "extreme": "extreme short-period shaking (>2.0g)"
    }
  },

  "SA02_975": {
    "category": "Mapped 0.2-second spectral acceleration (975-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA02_975_SEMANTIC": {
    "category": "Mapped 0.2-second spectral acceleration (975-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA02_975",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.5  },
      { "category": "moderate",   "min": 0.5, "max": 1.0  },
      { "category": "high",       "min": 1.0, "max": 1.5  },
      { "category": "very_high",  "min": 1.5, "max": 2.0  },
      { "category": "extreme",    "min": 2.0, "max": None }
    ],
    "code_map": {
      "none": "0.2-second spectral acceleration not available",
      "low": "low short-period shaking (SA02 < 0.5g)",
      "moderate": "moderate short-period shaking (0.5–1.0g)",
      "high": "high short-period shaking (1.0–1.5g)",
      "very_high": "very high short-period shaking (1.5–2.0g)",
      "extreme": "extreme short-period shaking (>2.0g)"
    }
  },

  "SA02_2475": {
    "category": "Mapped 0.2-second spectral acceleration (2475-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA02_2475_SEMANTIC": {
    "category": "Mapped 0.2-second spectral acceleration (2475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA02_2475",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.5  },
      { "category": "moderate",   "min": 0.5, "max": 1.0  },
      { "category": "high",       "min": 1.0, "max": 1.5  },
      { "category": "very_high",  "min": 1.5, "max": 2.0  },
      { "category": "extreme",    "min": 2.0, "max": None }
    ],
    "code_map": {
      "none": "0.2-second spectral acceleration not available",
      "low": "low short-period shaking (SA02 < 0.5g)",
      "moderate": "moderate short-period shaking (0.5–1.0g)",
      "high": "high short-period shaking (1.0–1.5g)",
      "very_high": "very high short-period shaking (1.5–2.0g)",
      "extreme": "extreme short-period shaking (>2.0g)"
    }
  },

  "SA02_10000": {
    "category": "Mapped 0.2-second spectral acceleration (10000-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA02_10000_SEMANTIC": {
    "category": "Mapped 0.2-second spectral acceleration (10000-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA02_10000",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.5  },
      { "category": "moderate",   "min": 0.5, "max": 1.0  },
      { "category": "high",       "min": 1.0, "max": 1.5  },
      { "category": "very_high",  "min": 1.5, "max": 2.0  },
      { "category": "extreme",    "min": 2.0, "max": None }
    ],
    "code_map": {
      "none": "0.2-second spectral acceleration not available",
      "low": "low short-period shaking (SA02 < 0.5g)",
      "moderate": "moderate short-period shaking (0.5–1.0g)",
      "high": "high short-period shaking (1.0–1.5g)",
      "very_high": "very high short-period shaking (1.5–2.0g)",
      "extreme": "extreme short-period shaking (>2.0g)"
    }
  },

  "SA10_475": {
    "category": "Mapped 1.0-second spectral acceleration (475-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA10_475_SEMANTIC": {
    "category": "Mapped 1.0-second spectral acceleration (475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA10_475",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.2 },
      { "category": "moderate",   "min": 0.2, "max": 0.4 },
      { "category": "high",       "min": 0.4, "max": 0.6 },
      { "category": "very_high",  "min": 0.6, "max": 0.8 },
      { "category": "extreme",    "min": 0.8, "max": None }
    ],
    "code_map": {
      "none": "1-second spectral acceleration not available",
      "low": "low long-period shaking (SA10 < 0.2g)",
      "moderate": "moderate long-period shaking (0.2–0.4g)",
      "high": "high long-period shaking (0.4–0.6g)",
      "very_high": "very high long-period shaking (0.6–0.8g)",
      "extreme": "extreme long-period shaking (>0.8g)"
    }
  },

  "SA10_975": {
    "category": "Mapped 1.0-second spectral acceleration (975-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA10_975_SEMANTIC": {
    "category": "Mapped 1.0-second spectral acceleration (975-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA10_975",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.2 },
      { "category": "moderate",   "min": 0.2, "max": 0.4 },
      { "category": "high",       "min": 0.4, "max": 0.6 },
      { "category": "very_high",  "min": 0.6, "max": 0.8 },
      { "category": "extreme",    "min": 0.8, "max": None }
    ],
    "code_map": {
      "none": "1-second spectral acceleration not available",
      "low": "low long-period shaking (SA10 < 0.2g)",
      "moderate": "moderate long-period shaking (0.2–0.4g)",
      "high": "high long-period shaking (0.4–0.6g)",
      "very_high": "very high long-period shaking (0.6–0.8g)",
      "extreme": "extreme long-period shaking (>0.8g)"
    }
  },

  "SA10_2475": {
    "category": "Mapped 1.0-second spectral acceleration (2475-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA10_2475_SEMANTIC": {
    "category": "Mapped 1.0-second spectral acceleration (2475-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA10_2475",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.2 },
      { "category": "moderate",   "min": 0.2, "max": 0.4 },
      { "category": "high",       "min": 0.4, "max": 0.6 },
      { "category": "very_high",  "min": 0.6, "max": 0.8 },
      { "category": "extreme",    "min": 0.8, "max": None }
    ],
    "code_map": {
      "none": "1-second spectral acceleration not available",
      "low": "low long-period shaking (SA10 < 0.2g)",
      "moderate": "moderate long-period shaking (0.2–0.4g)",
      "high": "high long-period shaking (0.4–0.6g)",
      "very_high": "very high long-period shaking (0.6–0.8g)",
      "extreme": "extreme long-period shaking (>0.8g)"
    }
  },

  "SA10_10000": {
    "category": "Mapped 1.0-second spectral acceleration (10000-year)",
    "type": "numerical",
    "datatype": "float",
    "unit": "g",
    "code_map": None
  },
  "SA10_10000_SEMANTIC": {
    "category": "Mapped 1.0-second spectral acceleration (10000-year)",
    "type": "nominal_derived",
    "datatype": "string",
    "unit": None,
    "source": "SA10_10000",
    "bin_rules": [
      { "category": "low",        "min": 0.0, "max": 0.2 },
      { "category": "moderate",   "min": 0.2, "max": 0.4 },
      { "category": "high",       "min": 0.4, "max": 0.6 },
      { "category": "very_high",  "min": 0.6, "max": 0.8 },
      { "category": "extreme",    "min": 0.8, "max": None }
    ],
    "code_map": {
      "none": "1-second spectral acceleration not available",
      "low": "low long-period shaking (SA10 < 0.2g)",
      "moderate": "moderate long-period shaking (0.2–0.4g)",
      "high": "high long-period shaking (0.4–0.6g)",
      "very_high": "very high long-period shaking (0.6–0.8g)",
      "extreme": "extreme long-period shaking (>0.8g)"
    }
  }
}