# MAP Airtable CSV → Canonical Holon JSON (Row-wise SchemaType Detection)

Created: 2025-08-16T20:43:45

**Change:** Rows are classified **per row** using the CSV `type` column.
- author JSON `type: "#{type}"` from CSV `type` column


Also preserved:
- `ComponentOf`, `extends`, `uses_key_rule` → emitted as **relationships**
- No `key` inside `properties`
- Two export modes: `"single"` or `"by-file"`


# 1) Configuration

In [1]:

from pathlib import Path
import datetime

INPUT_DIR = Path("./inputs"); INPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR = Path("./outputs"); OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Choose exactly one export mode: "single" or "by-file"
EXPORT_MODE = "by-file"   # change to "by-file" for 1 JSON per CSV, "single" for one JSON with all types

# Optional future schema path (unused here)
JSON_SCHEMA_PATH = None

EXPORT_META = {
    "generator": "MAP CSV→JSON Notebook (row-wise SchemaType detection)",
    "generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
    "export_mode": EXPORT_MODE,
}

print("INPUT_DIR:", INPUT_DIR.resolve())
print("OUTPUT_DIR:", OUTPUT_DIR.resolve())
print("EXPORT_MODE:", EXPORT_MODE)


INPUT_DIR: /Users/matrix/Documents/coding-projects/memetic-activation-platform/catalist-type-conversion-notebook/inputs
OUTPUT_DIR: /Users/matrix/Documents/coding-projects/memetic-activation-platform/catalist-type-conversion-notebook/outputs
EXPORT_MODE: by-file


# 2) Upload or Discover CSVs

In [2]:

import pandas as pd
try:
    import ipywidgets as widgets
    from IPython.display import display
    HAS_WIDGETS = True
except Exception:
    HAS_WIDGETS = False

if HAS_WIDGETS:
    upload = widgets.FileUpload(accept='.csv', multiple=True)
    display(upload)
    def _save_upload(change):
        for name, item in upload.value.items():
            (INPUT_DIR / name).write_bytes(item["content"])
        print(f"Saved {len(upload.value)} file(s) to {INPUT_DIR.resolve()}")
    upload.observe(_save_upload, names='value')
else:
    print("ipywidgets not available; place CSVs directly in:", INPUT_DIR.resolve())

# Auto-copy previously attached CSVs if present
ATTACHED = [
    "MAP Meta-Schema 0.0.3-metaschema-abstract-value-types.csv",
    "MAP Meta-Schema 0.0.3-metaschema-concrete-value-types.csv",
    "MAP Meta-Schema 0.0.3-metaschema-keyrules-schema.csv",
    "MAP Meta-Schema 0.0.3-metaschema-property-types.csv",
    "MAP Meta-Schema 0.0.3-metaschema-relationship-types.csv",
    "MAP Meta-Schema 0.0.3-metaschema-root.csv",
]
from pathlib import Path as _Path
src_dir = _Path("/mnt/data")
for fname in ATTACHED:
    src = src_dir / fname
    if src.exists():
        dst = INPUT_DIR / fname
        if not dst.exists():
            dst.write_bytes(src.read_bytes())
            print("Copied attached:", fname)

CSV_FILES = sorted(INPUT_DIR.glob("*.csv"))
print("Discovered", len(CSV_FILES), "CSV file(s):")
for p in CSV_FILES:
    print(" -", p.name)


FileUpload(value=(), accept='.csv', description='Upload', multiple=True)

Discovered 6 CSV file(s):
 - MAP Meta-Schema 0.0.3-metaschema-abstract-value-types.csv
 - MAP Meta-Schema 0.0.3-metaschema-concrete-value-types.csv
 - MAP Meta-Schema 0.0.3-metaschema-keyrules-schema.csv
 - MAP Meta-Schema 0.0.3-metaschema-property-types.csv
 - MAP Meta-Schema 0.0.3-metaschema-relationship-types.csv
 - MAP Meta-Schema 0.0.3-metaschema-root.csv


# 3) Helpers

In [3]:

import re, json
import pandas as pd
from typing import Any, Dict, List, Optional

def list_norm(v) -> List[str]:
    if pd.isna(v) or v == "": return []
    if isinstance(v, list): return [str(x).strip() for x in v if str(x).strip()]
    parts = re.split(r"[;,]", str(v))
    return [p.strip() for p in parts if p.strip()]

def ensure_hash_key(key: str) -> str:
    if not key: return key
    if key.startswith(("#","id:","@","ext:")): return key
    return f"#{key}"

def ref(key: str) -> Dict[str, str]:
    return {"$ref": ensure_hash_key(str(key))}

def scalarize(v: Any):
    if v is None: return None
    if isinstance(v, (bool, int, float, str)): return v
    if isinstance(v, list): return [scalarize(x) for x in v]
    return str(v)

def col(df: pd.DataFrame, *candidates) -> Optional[str]:
    m = {c.lower(): c for c in df.columns}
    for c in candidates:
        if c.lower() in m: return m[c.lower()]
    return None

REL_COL_TO_NAME = {
    "componentof": "ComponentOf",
    "extends": "Extends",
    "uses_key_rule": "UsesKeyRule",
    "inverseof": "InverseOf",
    "inverse_of": "InverseOf",
    "sourcetype": "SourceType",
    "targettype": "TargetType"
}

RESERVED_COLS = {
    "key", "type", "metaschema_partition",
    "componentof", "extends", "uses_key_rule",
    "inverseof", "inverse_of",
    "sourcetype", "targettype"
}


# 4) Holon model

In [4]:

class Holon:
    def __init__(self, type_ref: str, key: Optional[str] = None):
        self.key = key
        self.type = ensure_hash_key(type_ref)
        self.properties: Dict[str, Any] = {}
        self.relationships: List[Dict[str, Any]] = []

    def add_property(self, name: str, value: Any):
        if value is None: return self
        self.properties[name] = scalarize(value); return self

    def add_relationship(self, name: str, targets: Any):
        self.relationships.append({"name": name, "target": targets}); return self

    def to_json(self) -> Dict[str, Any]:
        """Convert to JSON with field order: key, type, properties, relationships."""
        obj: Dict[str, Any] = {}
        # 1) key
        if self.key is not None:
            obj["key"] = self.key
        # 2) type
        obj["type"] = self.type
        # 3) properties (always present)
        obj["properties"] = self.properties
        # 4) relationships (only if present)
        if self.relationships:
            obj["relationships"] = self.relationships
        return obj


# 5) Optional indexes: key rules & inverse mapping

In [5]:

from typing import Tuple

class KeyRuleEngine:
    def __init__(self):
        self.rules: Dict[str, Optional[Tuple[str, List[str]]]] = {}

    def register(self, holon_type_key: str, fmt: Optional[str], prop_names: Optional[List[str]]):
        self.rules[ensure_hash_key(holon_type_key)] = (fmt, prop_names) if fmt else None

    def is_keyed(self, holon_type_key: str) -> bool:
        return self.rules.get(ensure_hash_key(holon_type_key), ("$0", ["type_name"])) is not None

    def derive(self, holon_type_key: str, properties: Dict[str, Any]) -> Optional[str]:
        rule = self.rules.get(ensure_hash_key(holon_type_key))
        if rule is None: return None
        fmt, names = rule
        names = names or []
        key = fmt
        for i, n in enumerate(names):
            key = key.replace(f"${i}", str(properties.get(n, "")))
        return key

    def validate(self, h: Holon) -> List[str]:
        errs: List[str] = []
        keyed = self.is_keyed(h.type)
        if keyed and not h.key: errs.append(f"{h.type} requires a key but none provided.")
        if (not keyed) and h.key is not None: errs.append(f"{h.type} is keyless but a key was provided.")
        if keyed and h.key:
            derived = self.derive(h.type, h.properties) or ""
            if derived and h.key != derived:
                errs.append(f"Key '{h.key}' does not match derived '{derived}' for {h.type}.")
        return errs

class InverseIndex:
    def __init__(self):
        self.inverse_of: Dict[str, str] = {}
    def declare_pair(self, declared: str, inverse: str):
        if declared and inverse: self.inverse_of[str(inverse).strip()] = str(declared).strip()
    def rewrite(self, name: str) -> str:
        return self.inverse_of.get(name, name)

def rewrite_relationship_names(holon: Holon, inv: InverseIndex):
    for r in holon.relationships: r["name"] = inv.rewrite(r["name"])


# 6) Load CSVs

In [6]:

def load_tables(files: List[Path]):
    tables = []
    for p in files:
        df = pd.read_csv(p)
        part = col(df, "metaschema_partition")  # still captured for debugging/visibility
        partition = None
        if part:
            vals = df[part].dropna().astype(str).unique().tolist()
            partition = vals[0] if vals else None
        tables.append({"path": p, "df": df, "partition": partition})
    return tables

tables = load_tables(CSV_FILES)
for t in tables:
    print(f"{t['path'].name} :: partition={t['partition']} :: rows={len(t['df'])}")


MAP Meta-Schema 0.0.3-metaschema-abstract-value-types.csv :: partition=abstract_value_types :: rows=9
MAP Meta-Schema 0.0.3-metaschema-concrete-value-types.csv :: partition=concrete_value_types :: rows=27
MAP Meta-Schema 0.0.3-metaschema-keyrules-schema.csv :: partition=keyrules :: rows=8
MAP Meta-Schema 0.0.3-metaschema-property-types.csv :: partition=property_types :: rows=21
MAP Meta-Schema 0.0.3-metaschema-relationship-types.csv :: partition=relationship_types :: rows=38
MAP Meta-Schema 0.0.3-metaschema-root.csv :: partition=root :: rows=14


# 7) Build indexes from any suitable tables

In [7]:

key_rules = KeyRuleEngine()
inverses = InverseIndex()

def detect_keyrules(df: pd.DataFrame):
    c_type = col(df, "holon_type", "type", "type_name")
    c_fmt = col(df, "format")
    c_props = col(df, "property_names")
    if not (c_type and c_fmt): return
    for _, row in df.iterrows():
        ht = str(row.get(c_type, "")).strip()
        fmt = str(row.get(c_fmt, "")).strip() or None
        props = list_norm(row.get(c_props)) if c_props else []
        if fmt and fmt.lower() in {"none", "none.keyruletype"}: fmt = None
        if ht: key_rules.register(ht, fmt, props)

def detect_inverses(df: pd.DataFrame):
    c_rel = col(df, "relationship_name"); c_inv = col(df, "inverse_name", "inverse_of")
    if not (c_rel and c_inv): return
    for _, row in df.iterrows():
        declared = str(row.get(c_rel, "")).strip(); inv = str(row.get(c_inv, "")).strip()
        if declared and inv: inverses.declare_pair(declared, inv)

for t in tables:
    df = t["df"]
    detect_keyrules(df)
    detect_inverses(df)

print("KeyRule entries:", len(key_rules.rules))
print("Inverse pairs:", len(inverses.inverse_of))


KeyRule entries: 0
Inverse pairs: 0


# 8) Row mappers (use CSV 'type' to pick JSON type)

In [8]:

def relationship_from_column(h: Holon, df_row: pd.Series, csv_col_name: str, rel_name: str):
    raw = df_row.get(csv_col_name)
    if pd.isna(raw) or raw == "": return
    targets = [ref(v) for v in list_norm(raw)]
    if not targets: return
    h.add_relationship(rel_name, targets[0] if len(targets) == 1 else targets)

def copy_properties_excluding(h: Holon, df_row: pd.Series, exclude: set):
    exclude_lower = {e.lower() for e in exclude}
    for c in df_row.index:
        if c.lower() in exclude_lower: continue
        val = df_row.get(c)
        if pd.isna(val): continue
        h.add_property(c, val)

def build_row(df_row: pd.Series) -> Holon:
    # Determine JSON 'type' from CSV 'type' column (row-wise)
    tcol = col(df_row.to_frame().T, "type")
    csv_type_val = (str(df_row.get(tcol, "")).strip() if tcol else "")
    json_type = f"#{csv_type_val}"

    # Determine key: prefer 'key', else 'type_name', else 'schema_name'
    key_col = col(df_row.to_frame().T, "key") or col(df_row.to_frame().T, "type_name") or col(df_row.to_frame().T, "schema_name")
    key_val = str(df_row.get(key_col, "")).strip() if key_col else ""
    h = Holon(type_ref=json_type, key=(key_val or None))

    # Map relationship columns → relationships
    for csv_col, rel_name in REL_COL_TO_NAME.items():
        c = col(df_row.to_frame().T, csv_col)
        if c: relationship_from_column(h, df_row, c, rel_name)

    # Copy other scalars except reserved/relationship columns
    copy_properties_excluding(h, df_row, RESERVED_COLS)
    return h


# 9) Assemble holons and enforce rules

In [9]:

errors: List[str] = []
file_holons: Dict[str, List[Holon]] = {}
all_holons: List[Holon] = []

def enforce_rules(h: Holon):
    rewrite_relationship_names(h, inverses)
    errors.extend(key_rules.validate(h))
    for bad in ("key","type"):
        if bad in h.properties: h.properties.pop(bad, None)

for t in tables:
    hs = []
    for _, row in t["df"].iterrows():
        h = build_row(row)
        enforce_rules(h)
        hs.append(h)
    file_holons[t["path"].name] = hs
    all_holons.extend(hs)

print(f"Built {len(all_holons)} holon(s) from {len(tables)} CSV file(s).")
print("Preview first 3 holons:")
for h in all_holons[:3]:
    print(json.dumps(h.to_json(), indent=2)[:600], "...")


Built 117 holon(s) from 6 CSV file(s).
Preview first 3 holons:
{
  "key": "MetaValueType",
  "type": "#TypeDescriptor",
  "properties": {
    "is_abstract_type": true,
    "type_name": "MetaValueType",
    "type_name_plural": "MetaValueTypes",
    "display_name": "Meta Value Type",
    "display_name_plural": "Meta Value Types",
    "instance_type_kind": "Holon"
  },
  "relationships": [
    {
      "name": "ComponentOf",
      "target": {
        "$ref": "#MAP Metaschema-v0.0.2"
      }
    },
    {
      "name": "Extends",
      "target": {
        "$ref": "#MetaHolonType"
      }
    },
    {
      "name": "UsesKeyRule",
      "target": {
        "$ref" ...
{
  "key": "ValueType",
  "type": "#TypeDescriptor",
  "properties": {
    "is_abstract_type": true,
    "type_name": "ValueType",
    "type_name_plural": "ValueTypes",
    "display_name": "Value Type",
    "display_name_plural": "Value Types",
    "instance_type_kind": "Holon"
  },
  "relationships": [
    {
      "name": "Compon

# 10) Export JSON

In [10]:
# --- Export JSON (with cross-file #ref resolution for by-file mode) ---

def keys_defined_by(holons: List[Holon]) -> set:
    """Return the set of keys defined by a holon list."""
    return {h.key for h in holons if h.key}

def refs_from_holons(holons: List[Holon]) -> set:
    """
    Collect ONLY local #... references used by these holons.
    We intentionally ignore id:/@/ext: forms here (can't be resolved via local files).
    """
    used = set()
    def walk(x):
        if isinstance(x, dict):
            if "$ref" in x and isinstance(x["$ref"], str):
                s = x["$ref"]
                if s.startswith("#"):
                    used.add(s[1:])  # strip leading '#'
            for v in x.values():
                walk(v)
        elif isinstance(x, list):
            for v in x:
                walk(v)
    for h in holons:
        walk(h.to_json())
    return used

def greedy_load_with(unresolved: set, providers: dict) -> (list, set):
    """
    Greedy set cover: pick files that cover the most remaining unresolved keys
    until none remain or no progress can be made.
      unresolved: set[str] of keys needed
      providers:  {filename(str) -> set[str] keys_defined_in_that_file}
    Returns (selected_filenames_list, still_unresolved_set).
    """
    remaining = set(unresolved)
    selected = []
    # Work on a shallow copy so we can pop chosen providers
    avail = dict(providers)
    while remaining:
        best_file, best_cover = None, 0
        for fname, kset in avail.items():
            cover = len(remaining & kset)
            if cover > best_cover:
                best_file, best_cover = fname, cover
        if best_cover == 0:
            break  # no further progress possible
        selected.append(best_file)
        remaining -= avail[best_file]
        avail.pop(best_file, None)
    return selected, remaining

paths = []
errors_found = False  # track whether we printed any unresolved-ref errors

if EXPORT_MODE == "single":
    # All holons in one export: unresolved #refs should be zero if all data is local.
    all_defined = keys_defined_by(all_holons)
    all_used = refs_from_holons(all_holons)
    leftover = sorted(all_used - all_defined)

    meta = dict(EXPORT_META)
    meta["source_files"] = [t["path"].name for t in tables]
    meta["load_with"] = []  # No need to add anything in single-file mode

    if leftover:
        errors_found = True
        print("ERROR: unresolved #refs in single export:", leftover)

    export_obj = {"meta": meta, "holons": [h.to_json() for h in all_holons]}
    out = OUTPUT_DIR / "export.holons.json"
    out.write_text(json.dumps(export_obj, indent=2), encoding="utf-8")
    paths.append(out)

elif EXPORT_MODE == "by-file":
    # Precompute providers: which file defines which keys?
    providers_by_file = {t["path"].name: keys_defined_by(file_holons.get(t["path"].name, [])) for t in tables}

    for t in tables:
        fname = t["path"].name
        hs = file_holons.get(fname, [])

        # Local needs vs. local supply
        used = refs_from_holons(hs)
        own_keys = providers_by_file.get(fname, set())
        unresolved = used - own_keys

        # Build provider map excluding self
        others = {other_name: kset for other_name, kset in providers_by_file.items() if other_name != fname}

        # Greedy cover: pick other files to resolve unresolved refs (only #refs)
        selected_files, remaining = greedy_load_with(unresolved, others)

        meta = dict(EXPORT_META)
        meta["source_files"] = [fname]
        # Only include files that actually resolve refs; convert input CSV names -> output JSON names
        meta["load_with"] = [Path(sf).stem + ".json" for sf in selected_files]

        if remaining:
            errors_found = True
            print(f"ERROR: unresolved #refs in '{fname}': {sorted(remaining)}")

        export_obj = {"meta": meta, "holons": [h.to_json() for h in hs]}
        out = OUTPUT_DIR / (t["path"].stem + ".json")
        out.write_text(json.dumps(export_obj, indent=2), encoding="utf-8")
        paths.append(out)

else:
    raise ValueError("EXPORT_MODE must be 'single' or 'by-file'")

print("Wrote", len(paths), "file(s):")
for p in paths:
    print(" -", p.name)

if errors_found:
    print("\n✖ One or more exports still contain unresolved local (#...) references. See ERROR lines above.")
else:
    print("\n✓ All local (#...) references were resolved within the chosen export mode.")


Wrote 6 file(s):
 - MAP Meta-Schema 0.0.3-metaschema-abstract-value-types.json
 - MAP Meta-Schema 0.0.3-metaschema-concrete-value-types.json
 - MAP Meta-Schema 0.0.3-metaschema-keyrules-schema.json
 - MAP Meta-Schema 0.0.3-metaschema-property-types.json
 - MAP Meta-Schema 0.0.3-metaschema-relationship-types.json
 - MAP Meta-Schema 0.0.3-metaschema-root.json

✓ All local (#...) references were resolved within the chosen export mode.


# 11) Diagnostics

In [11]:

print("Errors:", len(errors))
for e in errors[:25]: print("-", e)
if len(errors) > 25: print("... (+", len(errors)-25, "more)")


Errors: 0
