Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ jobs:
uv run --frozen graphify install

security-scan:
# The dev deps already include bandit, pip-audit, and safety. Run them in
# CI so a new HIGH-severity finding or vulnerable dependency is caught on
# the PR that introduces it, rather than at the next manual audit.
# The dev deps include bandit and pip-audit. Run them in CI so a new
# HIGH-severity finding or vulnerable dependency is caught on the PR that
# introduces it, rather than at the next manual audit.
# Non-blocking for now (continue-on-error) to avoid breaking CI on
# pre-existing findings; remove continue-on-error after the initial
# cleanup pass.
Expand Down
Binary file removed docs/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/translations/README.ja-JP.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski

```
- **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
```

</details>
Expand Down
2 changes: 1 addition & 1 deletion docs/translations/README.ko-KR.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski

```
- **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
```

</details>
Expand Down
2 changes: 1 addition & 1 deletion docs/translations/README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/ski

```
- **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.
```

</details>
Expand Down
Binary file removed graphify/.DS_Store
Binary file not shown.
38 changes: 35 additions & 3 deletions graphify/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ def _skill_registration(skill_path: str = "~/.claude/skills/graphify/SKILL.md")
"\n# graphify\n"
f"- **graphify** (`{skill_path}`) "
"- any input to knowledge graph. Trigger: `/graphify`\n"
"When the user types `/graphify`, invoke the Skill tool "
"with `skill: \"graphify\"` before doing anything else.\n"
"When the user types `/graphify`, use the installed graphify skill "
"or instructions before doing anything else.\n"
)


Expand Down Expand Up @@ -2970,13 +2970,18 @@ def main() -> None:

p = _ap.ArgumentParser(prog="graphify save-result")
p.add_argument("--question", required=True)
p.add_argument("--answer", required=True)
p.add_argument("--answer", default=None)
p.add_argument("--answer-file", dest="answer_file", default=None)
p.add_argument("--type", dest="query_type", default="query")
p.add_argument("--nodes", nargs="*", default=[])
p.add_argument("--outcome", choices=("useful", "dead_end", "corrected"), default=None)
p.add_argument("--correction", default=None)
p.add_argument("--memory-dir", default=str(Path(_GRAPHIFY_OUT) / "memory"))
opts = p.parse_args(sys.argv[2:])
if opts.answer_file:
opts.answer = Path(opts.answer_file).read_text(encoding="utf-8").strip()
elif not opts.answer:
p.error("--answer or --answer-file is required")
from graphify.ingest import save_query_result as _sqr

out = _sqr(
Expand Down Expand Up @@ -4546,6 +4551,7 @@ def _parse_float(name: str, raw: str) -> float:
# Semantic extraction on docs/papers/images. Check cache first.
from graphify.cache import (
check_semantic_cache as _check_semantic_cache,
prune_semantic_cache as _prune_semantic_cache,
save_semantic_cache as _save_semantic_cache,
)
sem_result: dict = {
Expand Down Expand Up @@ -4636,6 +4642,32 @@ def _progress(idx: int, total: int, _result: dict) -> None:
sem_result["hyperedges"].extend(fresh.get("hyperedges", []))
sem_result["input_tokens"] += fresh.get("input_tokens", 0)
sem_result["output_tokens"] += fresh.get("output_tokens", 0)

# Prune orphaned semantic cache entries. The semantic cache is
# content-hash-keyed and unversioned, so it is never swept by the AST
# version-cleanup: every content change or file deletion leaves a
# permanent orphan that accumulates unbounded (#1527). Sweep it against
# the FULL live document set (``files_by_type`` — present in both the
# incremental and full branches), NOT the incremental ``semantic_files``
# changed-subset, which would delete every unchanged doc's valid entry.
# Best-effort: a prune failure must never break extraction.
try:
from graphify.cache import file_hash as _file_hash
_live_hashes: set[str] = set()
for _kind in ("document", "paper", "image"):
for _fp in files_by_type.get(_kind, []):
_abs = Path(_fp)
if not _abs.is_absolute():
_abs = Path(out_root) / _abs
if not _abs.is_file():
continue # deleted/missing — leave out so its entry is pruned
try:
_live_hashes.add(_file_hash(_abs, out_root))
except OSError:
pass
_prune_semantic_cache(out_root, _live_hashes)
except Exception as exc:
print(f"[graphify extract] warning: could not prune semantic cache: {exc}", file=sys.stderr)
stages.mark("semantic extract")

pg_result: dict = {"nodes": [], "edges": []}
Expand Down
2 changes: 1 addition & 1 deletion graphify/always_on/agents-md.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This project has a knowledge graph at graphify-out/ with god nodes, community structure, and cross-file relationships.

When the user types `/graphify`, invoke the `skill` tool with `skill: "graphify"` before doing anything else.
When the user types `/graphify`, use the installed graphify skill or instructions before doing anything else.

Rules:
- For codebase questions, first run `graphify query "<question>"` when graphify-out/graph.json exists. Use `graphify path "<A>" "<B>"` for relationships and `graphify explain "<concept>"` for focused concepts. These return a scoped subgraph, usually much smaller than GRAPH_REPORT.md or raw grep output.
Expand Down
38 changes: 38 additions & 0 deletions graphify/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,44 @@ def clear_cache(root: Path = Path(".")) -> None:
f.unlink()


def prune_semantic_cache(root: Path, live_hashes: set[str]) -> int:
"""Remove orphaned semantic cache entries, returning the count pruned.

The semantic cache is content-hash-keyed (``{file_hash}.json`` under
``cache/semantic/``) and deliberately UNVERSIONED — entries are produced by
the LLM from file contents, so invalidating them on every release would
re-bill extraction. Because it is unversioned it is also never swept by the
AST version-cleanup, so every content change or file deletion leaves a
permanent orphan entry that accumulates unbounded.

This sweeps ``cache/semantic/*.json`` and deletes any entry whose stem (the
content hash) is not in ``live_hashes`` — the hashes of the current live
document set. ``*.tmp`` atomic-write temporaries are skipped, and only this
directory is touched (never ``cache/ast/**`` or anything else). The
unversioned design is preserved: we prune by liveness, not by version.

Best-effort, mirroring :func:`_cleanup_stale_ast_entries`: each unlink is
wrapped in ``try/except OSError`` and a failure is ignored. The worst-case
failure mode is benign — a surviving orphan costs only one re-extraction of
one doc on a future run, never incorrect output.
"""
_out = Path(_GRAPHIFY_OUT)
base = _out if _out.is_absolute() else Path(root).resolve() / _out
semantic_dir = base / "cache" / "semantic"
if not semantic_dir.is_dir():
return 0
pruned = 0
for entry in semantic_dir.glob("*.json"):
if entry.stem in live_hashes:
continue
try:
entry.unlink()
pruned += 1
except OSError:
pass
return pruned


def check_semantic_cache(
files: list[str],
root: Path = Path("."),
Expand Down
9 changes: 9 additions & 0 deletions graphify/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,15 @@ def to_graphml(
for _, _, attrs in H.edges(data=True):
for k in [k for k in attrs if k.startswith("_")]:
del attrs[k]
# nx.write_graphml raises ValueError on None attribute values; replace with "".
for node_id in H.nodes():
for key, val in list(H.nodes[node_id].items()):
if val is None:
H.nodes[node_id][key] = ""
for u, v in H.edges():
for key, val in list(H.edges[u, v].items()):
if val is None:
H.edges[u, v][key] = ""
nx.write_graphml(H, output_path)


Expand Down
Loading
Loading