miqdigital · pull · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -79,9 +79,9 @@ jobs:
           uv run --frozen graphify install
 
   security-scan:
-    # The dev deps already include bandit, pip-audit, and safety. Run them in
-    # CI so a new HIGH-severity finding or vulnerable dependency is caught on
-    # the PR that introduces it, rather than at the next manual audit.
+    # The dev deps include bandit and pip-audit. Run them in CI so a new
+    # HIGH-severity finding or vulnerable dependency is caught on the PR that
+    # introduces it, rather than at the next manual audit.
     # Non-blocking for now (continue-on-error) to avoid breaking CI on
     # pre-existing findings; remove continue-on-error after the initial
     # cleanup pass.

diff --git a/docs/.DS_Store b/docs/.DS_Store
diff --git a/docs/translations/README.ja-JP.md b/docs/translations/README.ja-JP.md
@@ -114,7 +114,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski
 
 ```
 - **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
-When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
+When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
 ```
 
 </details>

diff --git a/docs/translations/README.ko-KR.md b/docs/translations/README.ko-KR.md
@@ -150,7 +150,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski
 
 ```
 - **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
-When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
+When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
 ```
 
 </details>

diff --git a/docs/translations/README.zh-CN.md b/docs/translations/README.zh-CN.md
@@ -110,7 +110,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski
 
 ```
 - **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
-When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
+When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
 ```
 
 </details>

diff --git a/graphify/.DS_Store b/graphify/.DS_Store
diff --git a/graphify/__main__.py b/graphify/__main__.py
@@ -462,8 +462,8 @@ def _skill_registration(skill_path: str = "~/.claude/skills/graphify/SKILL.md")
         "\n# graphify\n"
         f"- **graphify** (`{skill_path}`) "
         "- any input to knowledge graph. Trigger: `/graphify`\n"
-        "When the user types `/graphify`, invoke the Skill tool "
-        "with `skill: \"graphify\"` before doing anything else.\n"
+        "When the user types `/graphify`, use the installed graphify skill "
+        "or instructions before doing anything else.\n"
     )
 
 
@@ -2970,13 +2970,18 @@ def main() -> None:
 
         p = _ap.ArgumentParser(prog="graphify save-result")
         p.add_argument("--question", required=True)
-        p.add_argument("--answer", required=True)
+        p.add_argument("--answer", default=None)
+        p.add_argument("--answer-file", dest="answer_file", default=None)
         p.add_argument("--type", dest="query_type", default="query")
         p.add_argument("--nodes", nargs="*", default=[])
         p.add_argument("--outcome", choices=("useful", "dead_end", "corrected"), default=None)
         p.add_argument("--correction", default=None)
         p.add_argument("--memory-dir", default=str(Path(_GRAPHIFY_OUT) / "memory"))
         opts = p.parse_args(sys.argv[2:])
+        if opts.answer_file:
+            opts.answer = Path(opts.answer_file).read_text(encoding="utf-8").strip()
+        elif not opts.answer:
+            p.error("--answer or --answer-file is required")
         from graphify.ingest import save_query_result as _sqr
 
         out = _sqr(
@@ -4546,6 +4551,7 @@ def _parse_float(name: str, raw: str) -> float:
         # Semantic extraction on docs/papers/images. Check cache first.
         from graphify.cache import (
             check_semantic_cache as _check_semantic_cache,
+            prune_semantic_cache as _prune_semantic_cache,
             save_semantic_cache as _save_semantic_cache,
         )
         sem_result: dict = {
@@ -4636,6 +4642,32 @@ def _progress(idx: int, total: int, _result: dict) -> None:
                 sem_result["hyperedges"].extend(fresh.get("hyperedges", []))
                 sem_result["input_tokens"] += fresh.get("input_tokens", 0)
                 sem_result["output_tokens"] += fresh.get("output_tokens", 0)
+
+        # Prune orphaned semantic cache entries. The semantic cache is
+        # content-hash-keyed and unversioned, so it is never swept by the AST
+        # version-cleanup: every content change or file deletion leaves a
+        # permanent orphan that accumulates unbounded (#1527). Sweep it against
+        # the FULL live document set (``files_by_type`` — present in both the
+        # incremental and full branches), NOT the incremental ``semantic_files``
+        # changed-subset, which would delete every unchanged doc's valid entry.
+        # Best-effort: a prune failure must never break extraction.
+        try:
+            from graphify.cache import file_hash as _file_hash
+            _live_hashes: set[str] = set()
+            for _kind in ("document", "paper", "image"):
+                for _fp in files_by_type.get(_kind, []):
+                    _abs = Path(_fp)
+                    if not _abs.is_absolute():
+                        _abs = Path(out_root) / _abs
+                    if not _abs.is_file():
+                        continue  # deleted/missing — leave out so its entry is pruned
+                    try:
+                        _live_hashes.add(_file_hash(_abs, out_root))
+                    except OSError:
+                        pass
+            _prune_semantic_cache(out_root, _live_hashes)
+        except Exception as exc:
+            print(f"[graphify extract] warning: could not prune semantic cache: {exc}", file=sys.stderr)
         stages.mark("semantic extract")
 
         pg_result: dict = {"nodes": [], "edges": []}

diff --git a/graphify/always_on/agents-md.md b/graphify/always_on/agents-md.md
@@ -2,7 +2,7 @@
 
 This project has a knowledge graph at graphify-out/ with god nodes, community structure, and cross-file relationships.
 
-When the user types `/graphify`, invoke the `skill` tool with `skill: "graphify"` before doing anything else.
+When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
 
 Rules:
 - For codebase questions, first run `graphify query "<question>"` when graphify-out/graph.json exists. Use `graphify path "<A>" "<B>"` for relationships and `graphify explain "<concept>"` for focused concepts. These return a scoped subgraph, usually much smaller than GRAPH_REPORT.md or raw grep output.

diff --git a/graphify/cache.py b/graphify/cache.py
@@ -407,6 +407,44 @@ def clear_cache(root: Path = Path(".")) -> None:
                 f.unlink()
 
 
+def prune_semantic_cache(root: Path, live_hashes: set[str]) -> int:
+    """Remove orphaned semantic cache entries, returning the count pruned.
+
+    The semantic cache is content-hash-keyed (``{file_hash}.json`` under
+    ``cache/semantic/``) and deliberately UNVERSIONED — entries are produced by
+    the LLM from file contents, so invalidating them on every release would
+    re-bill extraction. Because it is unversioned it is also never swept by the
+    AST version-cleanup, so every content change or file deletion leaves a
+    permanent orphan entry that accumulates unbounded.
+
+    This sweeps ``cache/semantic/*.json`` and deletes any entry whose stem (the
+    content hash) is not in ``live_hashes`` — the hashes of the current live
+    document set. ``*.tmp`` atomic-write temporaries are skipped, and only this
+    directory is touched (never ``cache/ast/**`` or anything else). The
+    unversioned design is preserved: we prune by liveness, not by version.
+
+    Best-effort, mirroring :func:`_cleanup_stale_ast_entries`: each unlink is
+    wrapped in ``try/except OSError`` and a failure is ignored. The worst-case
+    failure mode is benign — a surviving orphan costs only one re-extraction of
+    one doc on a future run, never incorrect output.
+    """
+    _out = Path(_GRAPHIFY_OUT)
+    base = _out if _out.is_absolute() else Path(root).resolve() / _out
+    semantic_dir = base / "cache" / "semantic"
+    if not semantic_dir.is_dir():
+        return 0
+    pruned = 0
+    for entry in semantic_dir.glob("*.json"):
+        if entry.stem in live_hashes:
+            continue
+        try:
+            entry.unlink()
+            pruned += 1
+        except OSError:
+            pass
+    return pruned
+
+
 def check_semantic_cache(
     files: list[str],
     root: Path = Path("."),

diff --git a/graphify/export.py b/graphify/export.py
@@ -1535,6 +1535,15 @@ def to_graphml(
     for _, _, attrs in H.edges(data=True):
         for k in [k for k in attrs if k.startswith("_")]:
             del attrs[k]
+    # nx.write_graphml raises ValueError on None attribute values; replace with "".
+    for node_id in H.nodes():
+        for key, val in list(H.nodes[node_id].items()):
+            if val is None:
+                H.nodes[node_id][key] = ""
+    for u, v in H.edges():
+        for key, val in list(H.edges[u, v].items()):
+            if val is None:
+                H.edges[u, v][key] = ""
     nx.write_graphml(H, output_path)