# 📓 Catalist → MAP Schema Migration Notebook (Bundle Format)

This notebook converts the **MAP Types Airtable export (CSV)** into a set of **flat JSON descriptor files** plus a **manifest** (`schema.json`), matching our new loader design.

**Overall Workflow**
1. Imports & configuration
2. Load CSV and normalize
3. Helper functions: `$ref` and headers
4. Row → descriptor conversion (Holon, Property, Value, Enum, Relationship)
5. Group descriptors by `type_kind` and write flat files
6. Build and write `schema.json` manifest
7. (Optional) JSON-Schema validation
---
# 1. Imports & Config


In [9]:
import pandas as pd
import json, itertools
from datetime import datetime
from pathlib import Path

# Paths (edit as needed)
CSV_PATH   = Path("data/MAP Types-Grid view.csv")        # private CSV
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
OUT_DIR    = Path(f"output/map_types_{timestamp}")        # bundle output dir
OUT_DIR.mkdir(exist_ok=True)

# Which TypeKinds to export
TYPEKINDS = [
    "HolonType", "PropertyType", "ValueType",
    "EnumType", "EnumVariantType", "RelationshipType"
]

# Bundle manifest info
default_schema = {
    "type_name":   "CatalistSchema",
    "label":       "Catalist 2.5 Schema",
    "description": "Types generated from Catalist Airtable export"
}

---
# 2. Load CSV + Normalize

In [10]:
# Read and preview
df = pd.read_csv(CSV_PATH)
print(f"Loaded {len(df)} rows × {len(df.columns)} cols")
df.head(3)

# Normalizer for Airtable checkboxes

def bool_norm(val):
    return bool(val) if isinstance(val, bool) else str(val).strip().lower() == "checked"

Loaded 181 rows × 45 cols


---
# 3. Helpers: `$ref` & Common Header

In [11]:
# Build a JSON-$ref object (with optional schema/space)
def ref(type_name: str, schema: str = None, space: str = None):
    obj = {"type_name": type_name}
    if schema: obj["schema"] = schema
    if space:  obj["space"]  = space
    return {"$ref": obj}

# Common header fields for all descriptors
def common_header(row, descriptor_name):
    """
    Return the fields common to every descriptor type.
    """
    return {
        "descriptor_name": descriptor_name,
        "label":           row.get("Label (Human Readable)", "") or "",
        "description":     row.get("Description", "") or "",
        "is_dependent":    bool_norm(row.get("Is Dependent", False)),
        "is_value_type":   bool_norm(row.get("Is ValueType", False)),
        "described_by":    ref(row["TypeKind"]),
        "is_subtype_of":   None  # TODO: populate when CSV supports
    }

---
# 4. Row → Descriptor Conversion

In [12]:
def common_header(row, descriptor_name):
    """Return the pieces common to every TypeKind."""
    return {
        "descriptor_name" : descriptor_name,
        "label"           : row.get("Label (Human Readable)", "") or "",
        "description"     : row.get("Description", "") or "",
        "is_dependent"    : bool_norm(row.get("Is Dependent", False)),
        "is_value_type"   : bool_norm(row.get("Is ValueType", False)),
        "described_by"    : ref(row["TypeKind"]),      # all core meta-types are addressable
        "is_subtype_of"   : None                       # TODO: populate when CSV adds this
    }

# HolonType

def row_to_holontype(row):
    name = row["Type Name"]
    spec = common_header(row, f"{name}Descriptor")
    # properties & key_properties
    raw = row.get("MAP PROPERTIES PropertyTypes", "")
    spec["properties"]     = [] if pd.isna(raw) else [p.strip() for p in str(raw).split(",") if p.strip()]
    raw = row.get("MAP KEY_PROPERTIES PropertyTypes", "")
    spec["key_properties"]  = [] if pd.isna(raw) else [k.strip() for k in str(raw).split(",") if k.strip()]
    spec["type_name"]       = name
    return {"type_kind": "HolonType", "type_name": name,
            "described_by": ref("HolonType"), "spec": spec}

# PropertyType

def row_to_propertytype(row):
    name = row["Type Name"]
    spec = common_header(row, f"{name}_descriptor")
    spec["property_name"] = name
    ref_val = row.get("ValueType (VALUE_TYPE_FOR)", "").strip()
    spec["value_type"]    = ref(ref_val) if ref_val else None
    return {"type_kind": "PropertyType", "type_name": name,
            "described_by": ref("PropertyType"), "spec": spec}

# ValueType, EnumType, EnumVariantType similar…
# RelationshipType (flattened, with collection fields.)

def row_to_relationshiptype(row):
    rel = row["Type Name"]
    # normalize fields
    source_owns  = bool_norm(row.get("Source Owns Relationship", False))
    deletion_sem = row.get("Deletion Semantic")
    deletion_sem = None if pd.isna(deletion_sem) or not str(deletion_sem).strip() else deletion_sem
    load_links   = bool_norm(row.get("Load Links Immediate", False))
    load_holons  = bool_norm(row.get("Load Holons Immediate", False))
    has_inv      = row.get("Has Inverse") or None
    # cardinality
    tmin = int(row.get("Target Min Cardinality") or 0)
    tmax = int(row.get("Target Max Cardinality") or 1)
    tsem = row.get("Target Semantic", "Set")
    # iterate sources × targets
    outs = []
    froms = [f.strip() for f in str(row.get("Relationship From", "")).split(",") if f.strip()]
    tos   = [t.strip() for t in str(row.get("Relationship To", "")).split(",") if t.strip()]
    for src, tgt in itertools.product(froms, tos):
        tname = f"{src}-{rel}->{tgt}"
        spec = common_header(row, f"{tname}Descriptor")
        spec.update({
            "relationship_name":        rel,
            "source_owns_relationship": source_owns,
            "deletion_semantic":        deletion_sem,
            "load_links_immediate":     load_links,
            "load_holons_immediate":    load_holons,
            "has_inverse":              ref(has_inv) if has_inv else None,
            "target_holon_type":        ref(tgt, schema=default_schema["type_name"]),
            "target_semantic":          tsem,
            "target_min_cardinality":   tmin,
            "target_max_cardinality":   tmax
        })
        outs.append({
            "type_kind":    "RelationshipType",
            "type_name":    tname,
            "described_by": ref("RelationshipType"),
            "spec":         spec
        })
    return outs


---
# 5. Group and Write Flat JSON Files

In [13]:
# Convert all rows→descriptors
all_desc = []
for _, row in df.iterrows():
    kind = row["TypeKind"]
    if kind not in TYPEKINDS: continue
    if kind == "HolonType":      all_desc.append(row_to_holontype(row))
    elif kind == "PropertyType": all_desc.append(row_to_propertytype(row))
    elif kind == "RelationshipType": all_desc.extend(row_to_relationshiptype(row))
    # TODO: ValueType, EnumType, EnumVariantType

# Bucket by type_kind
descriptor_buckets = {}
for d in all_desc:
    descriptor_buckets.setdefault(d["type_kind"], []).append(d)

# Write each bucket to its own JSON
for kind, items in descriptor_buckets.items():
    fname = OUT_DIR / f"{kind.lower()}s.json"
    with open(fname, 'w') as f:
        json.dump(items, f, indent=2)
    print(f"Wrote {len(items)} descriptors → {fname}")

Wrote 81 descriptors → output/map_types_20250701_130731/holontypes.json
Wrote 136 descriptors → output/map_types_20250701_130731/relationshiptypes.json
Wrote 31 descriptors → output/map_types_20250701_130731/propertytypes.json


---
# 6. Manifest: Build and Write `schema.json`

In [14]:
manifest = {
    "schema": {
        "type_name":   default_schema["type_name"],
        "described_by": ref("SchemaType", schema="CoreSchema"),
        "properties": {
            "name":        default_schema["label"],
            "description": default_schema["description"]
        }
    },
    "type_files": [f.name for f in OUT_DIR.glob("*.json") if f.name != "schema.json"]
}
with open(OUT_DIR / "schema.json", 'w') as f:
    json.dump(manifest, f, indent=2)
print("Wrote manifest → schema.json")

Wrote manifest → schema.json


---
## 7. Validate against JSON Schema (optional)
When you have a JSON Schema for the canonical MAP import JSON, you can validate the output against it.

In [16]:
# Uncomment the code below to use this when you have a JSON Schema

# import jsonschema, pathlib, json
# schema_doc = json.load(open("map_schema_import.schema.json"))
# jsonschema.validate(payload, schema_doc)
# print("Payload validated against canonical JSON-Schema")