In [None]:
import os
import requests
import pandas as pd
from tqdm import tqdm
import time
import ast

# --------------------------------------------------------------------
# File paths
# --------------------------------------------------------------------
coord_path = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\structure_coordinates.csv"
out_dir = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data"
os.makedirs(out_dir, exist_ok=True)

# --------------------------------------------------------------------
# Load coordinates
# --------------------------------------------------------------------
df = pd.read_csv(coord_path)

def parse_coord(x):
    try:
        return ast.literal_eval(x) if isinstance(x, str) else x
    except:
        return None

df["COORDINATES"] = df["COORDINATES"].apply(parse_coord)

# Validate rows
df = df[df["COORDINATES"].notna()].copy()

# --------------------------------------------------------------------
# Quiet safe request wrapper
# --------------------------------------------------------------------
def safe_get(url, params=None):
    try:
        r = requests.get(url, params=params, timeout=10)
        if r.status_code == 200:
            return r.json()
        else:
            print(f"\n❌ HTTP {r.status_code}, params={params}")
            return None
    except Exception as e:
        print(f"\n❌ REQUEST ERROR: {e}, params={params}")
        return None

# --------------------------------------------------------------------
# Prepare output list
# --------------------------------------------------------------------
rows = []

print("\nStarting USGS Design Maps queries...\n")

# --------------------------------------------------------------------
# Loop over all coordinates with tqdm
# --------------------------------------------------------------------
for _, row in tqdm(df.iterrows(), total=len(df), desc="Design Maps API"):
    sid = row["STRUCTURE_ID"]
    lat, lon = row["COORDINATES"]

    url = "https://earthquake.usgs.gov/ws/designmaps/asce7-16.json"
    params = {
        "latitude": lat,
        "longitude": lon,
        "riskCategory": "II",
        "siteClass": "D",
        "title": "Earthquake"
    }

    data = safe_get(url, params)

    # Prepare default result row
    entry = {
        "STRUCTURE_ID": sid,
        "COORDINATES": (lat, lon),
        "PGA": None,
        "SS": None,
        "S1": None,
        "SMS": None,
        "SDS": None,
        "SDCS": None,
        "PGAM": None,
        "FPGA": None
    }

    if data and "response" in data and "data" in data["response"]:
        d = data["response"]["data"]
        entry.update({
            "PGA": d.get("pga"),
            "SS": d.get("ss"),
            "S1": d.get("s1"),
            "SMS": d.get("sms"),
            "SDS": d.get("sds"),
            "SDCS": d.get("sdcs"),
            "PGAM": d.get("pgam"),
            "FPGA": d.get("fpga"),
        })
    else:
        # Only print errors/tough cases
        print(f"\n⚠ No usable data for STRUCTURE_ID={sid}, lat={lat}, lon={lon}")

    rows.append(entry)

    time.sleep(0.1)  # polite rate limit

# --------------------------------------------------------------------
# Save final CSV
# --------------------------------------------------------------------
df_out = pd.DataFrame(rows)
out_path = os.path.join(out_dir, "design_maps.csv")
df_out.to_csv(out_path, index=False)

print(f"\nSaved Design Maps results:\n{out_path}\n")


In [2]:
import os
import requests
import pandas as pd
from tqdm import tqdm
import ast
from concurrent.futures import ThreadPoolExecutor, as_completed

# --------------------------------------------------------------------
# IMPROVED PARALLEL VERSION - USGS Design Maps
# --------------------------------------------------------------------

def safe_get(url, params=None, timeout=10):
    """Quiet safe request wrapper"""
    try:
        r = requests.get(url, params=params, timeout=timeout)
        if r.status_code == 200:
            return r.json()
        return None
    except:
        return None

def fetch_design_map(lat, lon):
    """Fetch USGS Design Map data for a given coordinate"""
    url = "https://earthquake.usgs.gov/ws/designmaps/asce7-16.json"
    params = {
        "latitude": lat,
        "longitude": lon,
        "riskCategory": "II",
        "siteClass": "D",
        "title": "Earthquake"
    }

    data = safe_get(url, params)
    
    if data and "response" in data and "data" in data["response"]:
        d = data["response"]["data"]
        return {
            "PGA": d.get("pga"),
            "SS": d.get("ss"),
            "S1": d.get("s1"),
            "SMS": d.get("sms"),
            "SDS": d.get("sds"),
            "SDCS": d.get("sdcs"),
            "PGAM": d.get("pgam"),
            "FPGA": d.get("fpga"),
        }
    
    return None

def process_single_design_map(row_data):
    """Process a single structure - designed for parallel execution"""
    sid, lat, lon = row_data
    
    entry = {
        "STRUCTURE_ID": sid,
        "COORDINATES": (lat, lon),
        "PGA": None,
        "SS": None,
        "S1": None,
        "SMS": None,
        "SDS": None,
        "SDCS": None,
        "PGAM": None,
        "FPGA": None
    }
    
    result = fetch_design_map(lat, lon)
    if result:
        entry.update(result)
    
    return entry

def enrich_design_maps_parallel(
    coord_csv_path,
    output_csv_path,
    sample_n=None,
    max_workers=8
):
    """
    Parallel USGS Design Maps enrichment using ThreadPoolExecutor.
    
    Args:
        coord_csv_path: Path to coordinates CSV
        output_csv_path: Path for output CSV
        sample_n: Number of samples to process (None = all)
        max_workers: Number of parallel threads (default 8)
                    USGS API is generally stable - can increase to 10-15
    """
    
    # Load coordinates
    df = pd.read_csv(coord_csv_path)
    
    def parse_coord(x):
        try:
            return ast.literal_eval(x) if isinstance(x, str) else x
        except:
            return None

    df["COORDINATES"] = df["COORDINATES"].apply(parse_coord)
    df = df[df["COORDINATES"].notna()].copy()

    # Apply sampling
    if sample_n is not None:
        df = df.head(sample_n)

    # Prepare data for parallel processing
    tasks = [(row["STRUCTURE_ID"], row["COORDINATES"][0], row["COORDINATES"][1]) 
             for _, row in df.iterrows()]
    
    print(f"Processing {len(tasks)} structures with {max_workers} parallel workers...")
    
    rows = []
    
    # Use ThreadPoolExecutor for parallel API calls
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks
        future_to_task = {executor.submit(process_single_design_map, task): task for task in tasks}
        
        # Process completed tasks with progress bar
        for future in tqdm(as_completed(future_to_task), total=len(tasks), desc="USGS Design Maps Parallel"):
            try:
                result = future.result()
                rows.append(result)
            except Exception as e:
                # If a task fails, add empty entry
                task = future_to_task[future]
                sid, lat, lon = task
                rows.append({
                    "STRUCTURE_ID": sid,
                    "COORDINATES": (lat, lon),
                    "PGA": None,
                    "SS": None,
                    "S1": None,
                    "SMS": None,
                    "SDS": None,
                    "SDCS": None,
                    "PGAM": None,
                    "FPGA": None
                })

    # Save output
    df_out = pd.DataFrame(rows)
    df_out.to_csv(output_csv_path, index=False)

    # Show summary
    non_null_count = df_out['PGA'].notna().sum()
    print(f"Completed: {non_null_count}/{len(rows)} structures enriched")
    print(f"Success rate: {non_null_count/len(rows)*100:.1f}%")
    print(f"Saved to: {output_csv_path}")
    
    return df_out

# Run the parallel version (MUCH FASTER!)
enrich_design_maps_parallel(
    coord_csv_path=r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data\structure_coordinates.csv",
    output_csv_path=r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data_fixed\design_maps.csv",
    sample_n=None,
    max_workers=8  # USGS API is stable - can increase to 10-15 if needed
)

Processing 4914 structures with 8 parallel workers...


USGS Design Maps Parallel: 100%|██████████| 4914/4914 [10:35<00:00,  7.74it/s]

Completed: 4914/4914 structures enriched
Success rate: 100.0%
Saved to: C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\enriched_data_fixed\design_maps.csv





Unnamed: 0,STRUCTURE_ID,COORDINATES,PGA,SS,S1,SMS,SDS,SDCS,PGAM,FPGA
0,1W,"(48.29745556, -122.6078139)",0.591,1.359,0.487,1.359,0.906,D,0.650,1.1
1,,"(47.769275, -122.707925)",0.503,1.368,0.487,1.368,0.912,D,0.553,1.1
2,,"(47.56759167, -122.5517028)",0.689,1.609,0.560,1.609,1.073,D,0.757,1.1
3,00000000,"(47.25279444, -124.178075)",0.771,1.568,0.745,1.568,1.045,D,0.848,1.1
4,,"(47.98571667, -122.2271222)",0.542,1.260,0.447,1.260,0.840,D,0.596,1.1
...,...,...,...,...,...,...,...,...,...,...
4909,DAPFORLE,"(47.02893056, -122.4655)",0.500,1.284,0.452,1.284,0.856,D,0.550,1.1
4910,DAPFORLE,"(47.02788056, -122.5136)",0.500,1.299,0.459,1.299,0.866,D,0.550,1.1
4911,DAPFORLE,"(47.11668889, -122.5003)",0.500,1.327,0.464,1.327,0.885,D,0.550,1.1
4912,DAPFORLE,"(47.10798889, -122.5898)",0.503,1.354,0.477,1.354,0.902,D,0.553,1.1
