Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/python/src/synapt_extract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from synapt_extract.validate import validate_extraction, ValidationResult, ValidationError
from synapt_extract.finalize import finalize_extraction, FinalizeContext, FinalizeResult
from synapt_extract.prompt import build_extraction_prompt, resolve_capabilities

__all__ = [
"SynaptExtraction",
Expand All @@ -30,4 +31,6 @@
"finalize_extraction",
"FinalizeContext",
"FinalizeResult",
"build_extraction_prompt",
"resolve_capabilities",
]
188 changes: 188 additions & 0 deletions packages/python/src/synapt_extract/prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Composable prompt system for SynaptExtraction IL v1."""

from __future__ import annotations

import json
import re
from pathlib import Path
from typing import Any

from synapt_extract.schema import EXTRACTION_CAPABILITIES

_INSTALLED_PROMPTS = Path(__file__).resolve().parent / "prompts"
_REPO_PROMPTS = Path(__file__).resolve().parents[4] / "prompts"
PROMPTS_DIR = _INSTALLED_PROMPTS if _INSTALLED_PROMPTS.is_dir() else _REPO_PROMPTS

CAPABILITY_DEPS: dict[str, list[str]] = {
"entity_state": ["entities"],
"entity_context": ["entities"],
"entity_ids": ["entities"],
"goal_timing": ["goals"],
"goal_entity_refs": ["goals", "entity_ids"],
"temporal_classes": ["temporal_refs"],
"relations": ["entities", "entity_ids"],
"relation_origin": ["relations"],
}

CANONICAL_ORDER = [
"entities", "goals", "themes", "summary", "sentiment", "facts", "temporal_refs",
"entity_state", "entity_context", "entity_ids",
"goal_timing", "goal_entity_refs",
"temporal_classes",
"relations", "relation_origin",
"assertion_signals", "evidence_anchoring",
]

CAPABILITY_RULES: dict[str, str] = {
"entity_ids": 'Assign each entity a short local ID ("e1", "e2", etc.). Goals and relations reference entities by ID.',
"temporal_refs": "Resolve all relative dates to absolute dates.",
"relation_origin": 'Mark relation origin: "explicit" if stated in text, "inferred" if deduced from context, "dependent" if derived from another relation.',
"assertion_signals": 'Preserve negation, hedging, and conditions in signals. "I might move" → hedged=true. "No longer using Redis" → negated=true. "If we get funding" → condition="we get funding".',
}


def _load_profile(name: str) -> list[str]:
path = PROMPTS_DIR / "profiles" / f"{name}.json"
if not path.exists():
raise ValueError(f"Unknown profile: {name}")
data = json.loads(path.read_text())
return data["capabilities"]


def _load_fragment(name: str) -> str:
path = PROMPTS_DIR / "v1" / f"{name}.txt"
return path.read_text()


def _render_template(template: str, context: dict[str, Any]) -> str:
def replace_if(match: re.Match) -> str:
var = match.group(1)
body = match.group(2)
if context.get(var):
return body
return ""

result = re.sub(r"\{\{#if (\w+)\}\}(.*?)\{\{/if\}\}", replace_if, template, flags=re.DOTALL)
return _render_vars(result, context)


def _render_vars(template: str, context: dict[str, Any]) -> str:
def replace_var(match: re.Match) -> str:
var = match.group(1)
val = context.get(var, "")
if isinstance(val, list):
return ", ".join(str(v) for v in val)
return str(val)

return re.sub(r"\{\{(\w+)\}\}", replace_var, template)


BASE_CAPABILITIES = frozenset(["entities", "goals", "facts"])
MODIFIER_ONLY_CAPABILITIES = frozenset(["assertion_signals", "evidence_anchoring"])


def _validate_capability_names(caps: set[str], source: str) -> None:
unknown = caps - EXTRACTION_CAPABILITIES
if unknown:
raise ValueError(f"Unknown {source}: {', '.join(sorted(unknown))}")


def resolve_capabilities(
*,
capabilities: list[str] | None = None,
profile: str | None = None,
add: list[str] | None = None,
remove: list[str] | None = None,
) -> list[str]:
if capabilities is None and profile is None:
raise ValueError("Either capabilities or profile must be provided")

if capabilities is not None:
_validate_capability_names(set(capabilities), "capabilities")
caps = set(capabilities)
else:
caps = set(_load_profile(profile))

if add:
_validate_capability_names(set(add), "capabilities in add")
caps.update(add)
if remove:
caps -= set(remove)

changed = True
while changed:
changed = False
for cap in list(caps):
for dep in CAPABILITY_DEPS.get(cap, []):
if dep not in caps:
caps.add(dep)
changed = True

if not caps:
raise ValueError("Resolved capability set is empty")

modifiers_present = caps & MODIFIER_ONLY_CAPABILITIES
if modifiers_present and not (caps & BASE_CAPABILITIES):
raise ValueError(
f"Modifier capabilities {sorted(modifiers_present)} require at least one "
f"base capability ({', '.join(sorted(BASE_CAPABILITIES))})"
)

return sorted(caps, key=lambda c: CANONICAL_ORDER.index(c) if c in CANONICAL_ORDER else len(CANONICAL_ORDER))


def build_extraction_prompt(
text: str,
*,
capabilities: list[str] | None = None,
profile: str | None = None,
add: list[str] | None = None,
remove: list[str] | None = None,
categories: list[str] | None = None,
source_type: str | None = None,
date: str | None = None,
) -> str:
if capabilities is not None and profile is not None:
raise ValueError("Cannot specify both capabilities and profile")

resolved = resolve_capabilities(
capabilities=capabilities,
profile=profile,
add=add,
remove=remove,
)

template_ctx: dict[str, Any] = {
"text": text,
"categories": categories,
"source_type": source_type,
"date": date,
}

parts: list[str] = []

preamble = _render_template(_load_fragment("preamble"), template_ctx)
parts.append(preamble.strip())

for cap in resolved:
fragment = _render_template(_load_fragment(cap), template_ctx)
parts.append(fragment.rstrip())

rules_section: list[str] = []
for cap in resolved:
rule = CAPABILITY_RULES.get(cap)
if rule:
rules_section.append(rule)

postamble_template = _load_fragment("postamble")
if rules_section:
extra_rules = "\n".join(f"- {r}" for r in rules_section)
postamble_rendered = _render_template(postamble_template, template_ctx).rstrip()
idx = postamble_rendered.find("\nText:")
if idx >= 0:
postamble_rendered = postamble_rendered[:idx] + "\n" + extra_rules + postamble_rendered[idx:]
parts.append(postamble_rendered)
else:
parts.append(_render_template(postamble_template, template_ctx).rstrip())

return "\n".join(parts) + "\n"
18 changes: 18 additions & 0 deletions packages/ts/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/ts/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"node": ">=18"
},
"devDependencies": {
"@types/node": "^25.6.0",
"typescript": "^5.5.0"
}
}
3 changes: 3 additions & 0 deletions packages/ts/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ export type { ValidationResult, ValidationError } from "./validate.js";

export { finalizeExtraction } from "./finalize.js";
export type { FinalizeContext, FinalizeResult } from "./finalize.js";

export { buildExtractionPrompt, resolveCapabilities } from "./prompt.js";
export type { PromptOptions } from "./prompt.js";
Loading