In [2]:
%pip install openai dotenv

Defaulting to user installation because normal site-packages is not writeable
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: dotenv
Successfully installed dotenv-0.9.9
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


# Enhance via LLM Numerical Bins


In [36]:
import json
import os
from openai import OpenAI
from dotenv import load_dotenv
import time
import sys

# Load environment variables
load_dotenv()

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Configuration
SCHEMA_PATH = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\schemas\macrostrat_schema.json"
OUTPUT_DIR = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\bin_schemas"
TEST_MODE = False # Set to True to test on a few entries first
TEST_COUNT = 3    # Number of entries to test

# Load schema
with open(SCHEMA_PATH, "r", encoding="utf-8") as f:
    schema = json.load(f)

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

def create_binning_prompt(field_name, description, range_values):
    """Create prompt for LLM to generate semantic bins"""
    
    # Determine lower bound: use 0 if range starts at or near 0, otherwise use observed min
    lower_bound = 0 if range_values[0] >= 0 else range_values[0]
    
    prompt = f"""You are an expert in structural engineering, civil engineering, and earthquake engineering, specializing in bridge infrastructure and seismic vulnerability assessment.

**Task**: Create semantic categorical bins for the following bridge metric:

**Field Name**: {field_name}
**Description**: {description}
**Observed Range**: {range_values[0]} to {range_values[1]}

**CRITICAL BINNING STRUCTURE**:
Your bins will be bounded to handle ALL possible values including outliers:

- **Lower bound**: Will be set to {lower_bound} (practical minimum for this metric)
- **Upper bound**: Will be set to "inf" (covers all extreme values and outliers)
- **Your job**: Define MEANINGFUL THRESHOLDS between categories (typically 3-5 thresholds)

**Example for "Bridge Age in years"** (observed range 0 to 120):
Your response should define thresholds like [20, 50, 80]:
```json
{{
  "labels": ["very young", "young", "mature", "old", "very old"],
  "edges": [20, 50, 80],
  "sentences": [
    "Very young bridges under 20 years old require minimal maintenance and operate at peak structural integrity with full original design capacity.",
    "Young bridges between 20-50 years show early signs of wear but generally maintain good structural performance with routine maintenance.",
    "Mature bridges between 50-80 years need increased inspection frequency and often require rehabilitation to maintain safety standards.",
    "Old bridges between 80-120 years face significant deterioration requiring major reconstruction or strengthening interventions.",
    "Very old bridges exceeding 120 years and extreme outliers represent historic structures requiring specialized preservation and continuous monitoring."
  ]
}}
```
This will be automatically converted to: edges = [0, 20, 50, 80, "inf"]

**Your Output Format** (JSON only, no markdown, no extra text):
{{
  "labels": ["label1", "label2", "label3", "label4", ...],
  "edges": [threshold1, threshold2, threshold3, ...],
  "sentences": ["Sentence for category 1 (from {lower_bound} to threshold1)", "Sentence for category 2 (from threshold1 to threshold2)", ...]
}}

**Critical Requirements**:
- LABELS: Must have exactly (number of edges + 1) labels. First label covers ({lower_bound} to edge1), last label covers (last_edge to infinity)
- EDGES: Only your meaningful thresholds (typically 3-5 values). Do NOT include {lower_bound} or "inf" - these will be added automatically
- SENTENCES: Exactly as many sentences as labels. Each sentence describes the engineering significance of values in that range
- Focus on MEANINGFUL semantic categories, not data errors or invalid values
- First category starts at {lower_bound} (the practical minimum)
- Last category describes extreme/outlier values beyond normal observations
- Return ONLY valid JSON, no markdown code blocks, no explanatory text

**Observed range is {range_values[0]} to {range_values[1]} - use this to inform your thresholds**

Now create bins for {field_name}:"""

    return prompt

def parse_llm_response(response_text, range_values):
    """Parse LLM response and extract binning information, enforce bounds"""
    try:
        # Try to extract JSON from response
        response_text = response_text.strip()
        
        # Remove markdown code blocks if present
        if response_text.startswith("```"):
            lines = response_text.split("\n")
            response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
            if response_text.startswith("json"):
                response_text = response_text[4:].strip()
        
        data = json.loads(response_text)
        
        # Validate required fields
        required = ["labels", "edges", "sentences"]
        for field in required:
            if field not in data:
                raise ValueError(f"Missing required field: {field}")
        
        # Check that sentences are not empty
        for i, sentence in enumerate(data["sentences"]):
            if not sentence or not sentence.strip():
                raise ValueError(f"Sentence {i+1} is empty or blank")
        
        # Keep edges as-is but convert inf strings to lowercase for consistency
        # Do NOT convert to float("inf") - keep as strings for valid JSON
        data["edges"] = [
            "inf" if str(e).lower() == "inf" else
            "-inf" if str(e).lower() == "-inf" else
            float(e) if not isinstance(e, (int, float)) else e
            for e in data["edges"]
        ]
        
        # Get counts
        n_labels = len(data["labels"])
        n_edges = len(data["edges"])
        n_sentences = len(data["sentences"])
        
        # Validation: labels should equal edges + 1 (since we'll add lower bound and inf)
        expected_labels = n_edges + 1
        if n_labels != expected_labels:
            raise ValueError(f"Labels count ({n_labels}) should be edges count + 1 ({expected_labels})")
        
        if n_sentences != n_labels:
            raise ValueError(f"Sentences count ({n_sentences}) should match labels count ({n_labels})")
        
        # ENFORCE BOUNDS: Add lower bound (0 or observed min) at start and "inf" at end
        lower_bound = 0 if range_values[0] >= 0 else range_values[0]
        original_edges = data["edges"].copy()
        data["edges"] = [lower_bound] + data["edges"] + ["inf"]
        
        print(f"    üîß Enforced bounds: {original_edges} -> [{lower_bound}, ..., 'inf']")
        
        return data
    
    except json.JSONDecodeError as e:
        print(f"‚ùå JSON parsing error: {e}")
        print(f"Response text: {response_text[:200]}...")
        return None
    except Exception as e:
        print(f"‚ùå Validation error: {e}")
        return None

def get_llm_bins(field_name, description, range_values):
    """Call OpenAI API to generate semantic bins. Returns (bins, response_text)"""
    
    prompt = create_binning_prompt(field_name, description, range_values)
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o",  # or "gpt-3.5-turbo" for faster/cheaper
            messages=[
                {"role": "system", "content": "You are an expert structural and earthquake engineer specializing in bridge infrastructure analysis. You always return valid JSON output exactly as requested."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,# Lower temperature for more consistent output
            max_tokens=1500
        )
        
        response_text = response.choices[0].message.content
        bins = parse_llm_response(response_text, range_values)
        
        return bins, response_text
    
    except Exception as e:
        print(f"‚ùå API error for {field_name}: {e}")
        return None, str(e)

# Process schema
bin_schema = {}
skipped = []
errors = []

entries_to_process = list(schema.items())
if TEST_MODE:
    entries_to_process = entries_to_process[:TEST_COUNT]
    print(f"üß™ TEST MODE: Processing first {TEST_COUNT} entries\n")

for idx, (field_name, metadata) in enumerate(entries_to_process, 1):
    
    # Only process numerical or numerical_coded fields
    field_type = metadata.get("type")
    if field_type not in ["numerical", "numerical_coded"]:
        bin_schema[field_name] = metadata
        print(f"[{idx}/{len(entries_to_process)}] ‚è≠Ô∏è  Skipping {field_name} (type: {field_type})")
        continue
    
    # Check for required fields
    description = metadata.get("description")
    range_values = metadata.get("range")
    
    if not description:
        print(f"[{idx}/{len(entries_to_process)}] ‚ö†Ô∏è  {field_name}: Missing description")
        skipped.append((field_name, "missing description"))
        continue
    
    if not range_values or len(range_values) != 2:
        print(f"[{idx}/{len(entries_to_process)}] ‚ö†Ô∏è  {field_name}: Invalid or missing range")
        skipped.append((field_name, "invalid range"))
        continue
    
    # Call LLM to generate bins
    print(f"[{idx}/{len(entries_to_process)}] ü§ñ Processing {field_name}...")
    bins, response_text = get_llm_bins(field_name, description, range_values)
    
    if bins:
        # Create new schema entry with bins
        new_entry = metadata.copy()
        new_entry["semantic_bins"] = bins
        bin_schema[field_name] = new_entry
        print(f"    ‚úÖ Generated {len(bins['labels'])} bins: {bins['labels']}")
        
        # Print detailed results in TEST_MODE
        if TEST_MODE:
            print(f"    üìä Detailed Output:")
            print(f"       Labels: {bins['labels']}")
            print(f"       Edges:  {bins['edges']}")
            print(f"       Sentences:")
            for i, sentence in enumerate(bins['sentences'], 1):
                print(f"         {i}. {sentence}")
    else:
        # STOP EXECUTION AND PRINT DIAGNOSIS
        print(f"    ‚ùå FAILED to generate bins for {field_name}")
        print(f"\n" + "="*80)
        print(f"üõë STOPPING EXECUTION FOR DIAGNOSIS")
        print(f"="*80)
        print(f"\nüìã Field: {field_name}")
        print(f"üìù Description: {description}")
        print(f"üìä Range: {range_values}")
        
        # Save full response to file for inspection
        error_log_path = os.path.join(OUTPUT_DIR, f"error_response_{field_name}.txt")
        with open(error_log_path, "w", encoding="utf-8") as error_file:
            error_file.write(f"Field: {field_name}\n")
            error_file.write(f"Description: {description}\n")
            error_file.write(f"Range: {range_values}\n")
            error_file.write(f"\n{'='*80}\n")
            error_file.write(f"RAW API RESPONSE:\n")
            error_file.write(f"{'='*80}\n\n")
            error_file.write(response_text)
        
        print(f"\nüîç RAW API RESPONSE (length: {len(response_text)} characters):")
        print("-"*80)
        
        # Display full response without truncation
        # Split into chunks if needed to avoid any output limitations
        chunk_size = 5000
        for i in range(0, len(response_text), chunk_size):
            chunk = response_text[i:i+chunk_size]
            print(chunk, end='')
        
        print("\n" + "-"*80)
        print(f"\nüíæ Full response saved to: {error_log_path}")
        print(f"üí° Fix the issue and re-run the script.")
        sys.exit(1)  # Exit with error code
    
    # Rate limiting (optional)
    time.sleep(0.5)
    print()

# Save results (only in production mode)
if not TEST_MODE:
    # Generate output filename based on input schema name
    schema_base_name = os.path.splitext(os.path.basename(SCHEMA_PATH))[0]
    output_filename = f"{schema_base_name}_bins.json"
    output_path = os.path.join(OUTPUT_DIR, output_filename)
    
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(bin_schema, f, indent=2, ensure_ascii=False)

# Summary
print("\n" + "="*60)
print("üìä SUMMARY")
print("="*60)
print(f"‚úÖ Successfully processed: {len(bin_schema) - 1}")  # -1 for STRUCTURE_ID
print(f"‚ö†Ô∏è  Skipped (missing data): {len(skipped)}")
print(f"‚ùå Failed (errors): {len(errors)}")

if not TEST_MODE:
    print(f"\nüíæ Saved to: {output_path}")
else:
    print(f"\nüß™ TEST MODE: Results displayed above, no file saved")

if skipped:
    print("\n‚ö†Ô∏è  Skipped fields:")
    for field, reason in skipped:
        print(f"  - {field}: {reason}")

if errors:
    print("\n‚ùå Error fields:")
    for field in errors:
        print(f"  - {field}")

[1/11] ‚è≠Ô∏è  Skipping STRUCTURE_ID (type: reference)
[2/11] ‚è≠Ô∏è  Skipping COORDINATES (type: reference)
[3/11] ‚è≠Ô∏è  Skipping MACRO_UNIT_NAME (type: nl)
[4/11] ‚è≠Ô∏è  Skipping MACRO_STRAT_NAME (type: nl)
[5/11] ‚è≠Ô∏è  Skipping MACRO_LITHOLOGY (type: nl)
[6/11] ü§ñ Processing MACRO_AGE_MIN...
    üîß Enforced bounds: [2.6, 23.03, 66, 252.17, 541] -> [0, ..., 'inf']
    ‚úÖ Generated 6 bins: ['recent', 'quaternary', 'tertiary', 'mesozoic', 'paleozoic', 'precambrian']

[7/11] ü§ñ Processing MACRO_AGE_MAX...
    üîß Enforced bounds: [50, 200, 1000, 2500] -> [0, ..., 'inf']
    ‚úÖ Generated 5 bins: ['very recent', 'recent', 'intermediate', 'ancient', 'prehistoric']

[8/11] ‚è≠Ô∏è  Skipping MACRO_DESCRIPTION (type: nl)
[9/11] ‚è≠Ô∏è  Skipping MACRO_COLOR (type: reference)
[10/11] ‚è≠Ô∏è  Skipping MACRO_SOURCE_ID (type: reference)
[11/11] ‚è≠Ô∏è  Skipping MACRO_MAP_ID (type: reference)

üìä SUMMARY
‚úÖ Successfully processed: 10
‚ö†Ô∏è  Skipped (missing data): 0
‚ùå Failed (err

# Enhance via LLM Codemaps


In [40]:
import json
import os
from openai import OpenAI
from dotenv import load_dotenv
import time
import sys

# Load environment variables
load_dotenv()

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Configuration
SCHEMA_PATH = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\schemas\usgs_design_maps_schema.json"
OUTPUT_DIR = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\codemap_schemas"
TEST_MODE = False  # Set to True to test on a few entries first
TEST_COUNT = 3     # Number of entries to test

# Load schema
with open(SCHEMA_PATH, "r", encoding="utf-8") as f:
    schema = json.load(f)

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

def create_code_enhancement_prompt(field_name, description, code_map):
    """Create prompt for LLM to enhance code_map descriptions"""
    
    # Format code map entries for the prompt
    code_entries = "\n".join([f"  Code '{code}': {desc}" for code, desc in code_map.items()])
    
    prompt = f"""You are an expert in structural engineering, civil engineering, and earthquake engineering, specializing in bridge infrastructure and seismic vulnerability assessment.

**Task**: Enhance the categorical code descriptions for the following bridge metric to provide richer semantic content for embedding models.

**Field Name**: {field_name}
**Field Description**: {description}

**Current Code Map**:
{code_entries}

**Requirements**:
- Expand each brief description into a comprehensive 20-30 word sentence
- Explain what the code represents in practical bridge engineering terms
- Provide context for why this distinction matters for structural analysis or assessment
- Use complete sentences with proper technical terminology
- Maintain technical accuracy and engineering relevance
- Keep the code values EXACTLY as they are (do not modify codes)

**Example Enhancement**:
For a field with codes "H", "R", "N":

Original Code Map:
  "H": "Highway beneath structure"
  "R": "Railroad beneath structure"
  "N": "Feature not a highway or railroad"

Your Enhanced Output (use the EXACT codes "H", "R", "N"):
{{
  "H": "The reported lateral clearance is measured relative to a highway roadway passing beneath the bridge structure, representing the horizontal distance available for vehicle passage and potential collision zones.",
  "R": "The reported lateral clearance references a railroad track alignment beneath the bridge, defining safe operational clearances for rail vehicles and ensuring adequate structural separation.",
  "N": "The lateral clearance is measured relative to a non-transportation feature beneath the bridge such as a waterway or utility corridor where clearance standards differ from vehicular requirements."
}}

**Your Output Format** (JSON only, no markdown, no extra text):
Return a JSON object where EACH KEY is the EXACT code from the Current Code Map above (e.g., "1", "H", "01", etc.), and each value is the enhanced 20-30 word description.

**Critical Requirements**:
- Return ONLY valid JSON, no markdown code blocks, no explanatory text
- Each description should be 20-30 words
- Preserve ALL original codes exactly as given and in the same order
- Focus on engineering significance and practical implications
- Use complete, grammatically correct sentences

Now enhance the code map for {field_name}:"""

    return prompt

def parse_code_enhancement_response(response_text, original_code_map):
    """Parse LLM response and validate enhanced code map"""
    try:
        # Try to extract JSON from response
        response_text = response_text.strip()
        
        # Remove markdown code blocks if present
        if response_text.startswith("```"):
            lines = response_text.split("\n")
            response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
            if response_text.startswith("json"):
                response_text = response_text[4:].strip()
        
        enhanced_map = json.loads(response_text)
        
        # Validate that all original codes are present
        original_codes = set(original_code_map.keys())
        enhanced_codes = set(enhanced_map.keys())
        
        if original_codes != enhanced_codes:
            missing = original_codes - enhanced_codes
            extra = enhanced_codes - original_codes
            error_msg = []
            if missing:
                error_msg.append(f"Missing codes: {missing}")
            if extra:
                error_msg.append(f"Extra codes: {extra}")
            raise ValueError(f"Code mismatch. {' '.join(error_msg)}")
        
        # Check that descriptions are not empty
        for code, desc in enhanced_map.items():
            if not desc or not desc.strip():
                raise ValueError(f"Description for code '{code}' is empty or blank")
            
            # Check word count (aim for 20-30 words)
            word_count = len(desc.split())
            if word_count < 15:
                print(f"    ‚ö†Ô∏è  Warning: Code '{code}' has only {word_count} words (target: 20-30)")
            elif word_count > 40:
                print(f"    ‚ö†Ô∏è  Warning: Code '{code}' has {word_count} words (target: 20-30)")
        
        return enhanced_map
    
    except json.JSONDecodeError as e:
        print(f"‚ùå JSON parsing error: {e}")
        print(f"Response text: {response_text[:200]}...")
        return None
    except Exception as e:
        print(f"‚ùå Validation error: {e}")
        return None

def get_llm_enhanced_codes(field_name, description, code_map):
    """Call OpenAI API to enhance code map descriptions. Returns (enhanced_map, response_text)"""
    
    prompt = create_code_enhancement_prompt(field_name, description, code_map)
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are an expert structural and earthquake engineer specializing in bridge infrastructure analysis. You always return valid JSON output exactly as requested."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,  # Lower temperature for more consistent output
            max_tokens=2000
        )
        
        response_text = response.choices[0].message.content
        enhanced_map = parse_code_enhancement_response(response_text, code_map)
        
        return enhanced_map, response_text
    
    except Exception as e:
        print(f"‚ùå API error for {field_name}: {e}")
        return None, str(e)

# Process schema
enhanced_schema = {}
skipped = []
errors = []

entries_to_process = list(schema.items())
if TEST_MODE:
    entries_to_process = entries_to_process[:TEST_COUNT]
    print(f"üß™ TEST MODE: Processing first {TEST_COUNT} entries\n")

for idx, (field_name, metadata) in enumerate(entries_to_process, 1):
    
    # Only process nominal fields with code_map
    field_type = metadata.get("type")
    code_map = metadata.get("code_map")
    
    if field_type != "nominal":
        enhanced_schema[field_name] = metadata
        print(f"[{idx}/{len(entries_to_process)}] ‚è≠Ô∏è  Skipping {field_name} (type: {field_type})")
        continue
    
    if not code_map or not isinstance(code_map, dict) or len(code_map) == 0:
        enhanced_schema[field_name] = metadata
        print(f"[{idx}/{len(entries_to_process)}] ‚è≠Ô∏è  Skipping {field_name} (no code_map)")
        skipped.append((field_name, "no code_map"))
        continue
    
    # Check for required fields
    description = metadata.get("description")
    
    if not description:
        print(f"[{idx}/{len(entries_to_process)}] ‚ö†Ô∏è  {field_name}: Missing description")
        skipped.append((field_name, "missing description"))
        enhanced_schema[field_name] = metadata
        continue
    
    # Call LLM to enhance code map
    print(f"[{idx}/{len(entries_to_process)}] ü§ñ Processing {field_name}...")
    enhanced_map, response_text = get_llm_enhanced_codes(field_name, description, code_map)
    
    if enhanced_map:
        # Create new schema entry with enhanced code_map
        new_entry = metadata.copy()
        new_entry["code_map"] = enhanced_map
        enhanced_schema[field_name] = new_entry
        print(f"    ‚úÖ Enhanced {len(enhanced_map)} code descriptions")
        
        # Print detailed results in TEST_MODE
        if TEST_MODE:
            print(f"    üìä Detailed Output:")
            for code, desc in enhanced_map.items():
                word_count = len(desc.split())
                print(f"       '{code}' ({word_count} words): {desc}")
    else:
        # STOP EXECUTION AND PRINT DIAGNOSIS
        print(f"    ‚ùå FAILED to enhance code map for {field_name}")
        print(f"\n" + "="*80)
        print(f"üõë STOPPING EXECUTION FOR DIAGNOSIS")
        print(f"="*80)
        print(f"\nüìã Field: {field_name}")
        print(f"üìù Description: {description}")
        print(f"üìä Original Code Map: {code_map}")
        
        # Save full response to file for inspection
        error_log_path = os.path.join(OUTPUT_DIR, f"error_response_codemap_{field_name}.txt")
        with open(error_log_path, "w", encoding="utf-8") as error_file:
            error_file.write(f"Field: {field_name}\n")
            error_file.write(f"Description: {description}\n")
            error_file.write(f"Original Code Map: {code_map}\n")
            error_file.write(f"\n{'='*80}\n")
            error_file.write(f"RAW API RESPONSE:\n")
            error_file.write(f"{'='*80}\n\n")
            error_file.write(response_text)
        
        print(f"\nüîç RAW API RESPONSE (length: {len(response_text)} characters):")
        print("-"*80)
        
        # Display full response without truncation
        chunk_size = 5000
        for i in range(0, len(response_text), chunk_size):
            chunk = response_text[i:i+chunk_size]
            print(chunk, end='')
        
        print("\n" + "-"*80)
        print(f"\nüíæ Full response saved to: {error_log_path}")
        print(f"üí° Fix the issue and re-run the script.")
        sys.exit(1)  # Exit with error code
    
    # Rate limiting
    time.sleep(0.5)
    print()

# Save results (only in production mode)
if not TEST_MODE:
    # Generate output filename based on input schema name
    schema_base_name = os.path.splitext(os.path.basename(SCHEMA_PATH))[0]
    output_filename = f"{schema_base_name}_enhanced.json"
    output_path = os.path.join(OUTPUT_DIR, output_filename)
    
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(enhanced_schema, f, indent=2, ensure_ascii=False)

# Summary
print("\n" + "="*60)
print("üìä SUMMARY")
print("="*60)
print(f"‚úÖ Successfully processed: {len([f for f, m in enhanced_schema.items() if m.get('type') == 'nominal' and m.get('code_map')])}")
print(f"‚è≠Ô∏è  Skipped (not nominal or no code_map): {len(skipped)}")
print(f"‚ùå Failed (errors): {len(errors)}")

if not TEST_MODE:
    print(f"\nüíæ Saved to: {output_path}")
else:
    print(f"\nüß™ TEST MODE: Results displayed above, no file saved")

if skipped:
    print(f"\n‚ö†Ô∏è  Skipped {len(skipped)} fields (not nominal or no code_map)")


[1/10] ‚è≠Ô∏è  Skipping PGA (type: numerical)
[2/10] ‚è≠Ô∏è  Skipping SS (type: numerical)
[3/10] ‚è≠Ô∏è  Skipping S1 (type: numerical)
[4/10] ‚è≠Ô∏è  Skipping SMS (type: numerical)
[5/10] ‚è≠Ô∏è  Skipping SDS (type: numerical)
[6/10] ü§ñ Processing SDCS...
    ‚úÖ Enhanced 6 code descriptions

[7/10] ‚è≠Ô∏è  Skipping PGAM (type: numerical)
[8/10] ‚è≠Ô∏è  Skipping FPGA (type: numerical)
[9/10] ‚è≠Ô∏è  Skipping STRUCTURE_ID (type: string)
[10/10] ‚è≠Ô∏è  Skipping COORDINATES (type: string)

üìä SUMMARY
‚úÖ Successfully processed: 1
‚è≠Ô∏è  Skipped (not nominal or no code_map): 0
‚ùå Failed (errors): 0

üíæ Saved to: C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\codemap_schemas\usgs_design_maps_schema_enhanced.json
