In [2]:
import json
import os

# Load numerical schema (coded)
schema_path = os.path.join(os.getcwd(), "schemas", "nbi_numerical_coded_schema.json")
with open(schema_path, "r") as f:
    numerical_schema = json.load(f)

# Load descriptions - try with explicit encoding and error handling (coded)
descriptions_path = r"C:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\schema descriptions\nbi_numerical_coded_descriptions.json"

try:
    # Try UTF-8 first
    with open(descriptions_path, "r", encoding="utf-8") as f:
        descriptions_array = json.load(f)
except json.JSONDecodeError as e:
    print(f"❌ JSONDecodeError: {e}")
    print(f"Trying UTF-8-sig encoding...")
    # Try UTF-8 with BOM
    with open(descriptions_path, "r", encoding="utf-8-sig") as f:
        descriptions_array = json.load(f)

# Extract STRUCTURE_ID for later reinsertion
structure_id_entry = numerical_schema.pop("STRUCTURE_ID", None)

# Process schema entries: remove 'category', add 'description'
schema_keys_in_order = [k for k in numerical_schema.keys()]

for idx, key in enumerate(schema_keys_in_order):
    entry = numerical_schema[key]
    
    # Remove category
    entry.pop("category", None)
    
    # Add description from array (matching by index)
    if idx < len(descriptions_array):
        entry["description"] = descriptions_array[idx]
    else:
        entry["description"] = None

# Reinsert STRUCTURE_ID at the beginning
if structure_id_entry:
    new_schema = {"STRUCTURE_ID": structure_id_entry}
    new_schema.update(numerical_schema)
    numerical_schema = new_schema

# Save updated schema back to file with special characters preserved
with open(schema_path, "w", encoding="utf-8") as f:
    json.dump(numerical_schema, f, indent=2, ensure_ascii=False)

print(f"✅ Updated {len(numerical_schema)} entries (including STRUCTURE_ID)")
print(f"Removed 'category' attributes and added 'description' from nbi_numerical_coded_descriptions.json")
print(f"Saved to {schema_path}")

✅ Updated 11 entries (including STRUCTURE_ID)
Removed 'category' attributes and added 'description' from nbi_numerical_coded_descriptions.json
Saved to c:\Users\wongb\Bridge-ML\Bridge-ML-LLM-Embedding-Architecture\schemas\nbi_numerical_coded_schema.json
