Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ jobs:
- name: Install dependencies
run: pip install -e ".[dev]" pytest-timeout

# Non-blocking: reports drift between architecture.json and prompt <include> tags.
# Does not fail the job; use a warning annotation when the check exits non-zero.
- name: Validate architecture vs prompt includes (warning only)
run: |
if ! pdd validate-arch-includes; then
echo "::warning::validate-arch-includes reported mismatches (warning only; fix in a follow-up)"
fi

- name: Run unit tests
run: >
pytest tests/
Expand Down
58 changes: 22 additions & 36 deletions pdd/agentic_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,15 @@
_find_pdd_executable,
build_dep_graph_from_architecture,
)
from .construct_paths import _detect_context_from_basename, _extract_prefix_from_prompts_dir, _find_pddrc_file, _load_pddrc_config
from .architecture_include_validation import collect_architecture_include_validation_warnings
from .sync_graph_order_consistency import warnings_for_arch_vs_include_sync_order
from .architecture_registry import find_project_root as _find_project_root
from .construct_paths import (
_detect_context_from_basename,
_extract_prefix_from_prompts_dir,
_find_pddrc_file,
_load_pddrc_config,
)
from .load_prompt_template import load_prompt_template
from .sync_determine_operation import sync_determine_operation
from .sync_main import _detect_languages_with_context
Expand Down Expand Up @@ -252,19 +260,6 @@ def _filter_invalid_basenames(
return valid, invalid


def _find_project_root(start: Path) -> Path:
"""Walk up from start to find project root (directory containing .pddrc or .git)."""
current = start.resolve()
for _ in range(20):
if (current / ".pddrc").exists() or (current / ".git").exists():
return current
parent = current.parent
if parent == current:
break
current = parent
return start.resolve()


def _load_architecture_json(
project_root: Path,
issue_number: Optional[int] = None,
Expand All @@ -276,33 +271,15 @@ def _load_architecture_json(

Args:
project_root: Root directory of the project.
issue_number: Optional issue number for logging origin info.
issue_number: Optional issue number for logging origin info (reserved).

Returns:
Tuple of (parsed data or None, path to primary architecture.json).
"""
from .architecture_registry import find_architecture_for_project
from .architecture_registry import load_combined_architecture_data

arch_files = find_architecture_for_project(project_root)
if not arch_files:
return None, project_root / "architecture.json"

primary_path = arch_files[0]
combined: List[Dict[str, Any]] = []

for arch_path in arch_files:
try:
with open(arch_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
combined.extend(data)
except (json.JSONDecodeError, OSError):
continue

if not combined:
return None, primary_path

return combined, primary_path
_ = issue_number # reserved for future origin-aware loading
return load_combined_architecture_data(project_root)


def _is_catchall_match(basename: str, config: Dict[str, Any]) -> bool:
Expand Down Expand Up @@ -977,6 +954,15 @@ def run_agentic_sync(
if dep_graph_result.warnings and not quiet:
for w in dep_graph_result.warnings:
console.print(f"[yellow]Warning: {w}[/yellow]")
if not quiet and verbose:
for w in collect_architecture_include_validation_warnings(project_root):
console.print(f"[yellow]Warning: {w}[/yellow]")
for w in warnings_for_arch_vs_include_sync_order(
dep_graph_from_architecture=dep_graph,
modules_to_sync=modules_to_sync,
project_root=project_root,
):
console.print(f"[yellow]Warning: {w}[/yellow]")
else:
# Fallback: scan prompt files for <include> tags
prompts_dir = project_root / "prompts"
Expand Down
173 changes: 173 additions & 0 deletions pdd/architecture_include_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""
Cross-validate architecture.json dependency entries against <include> tags in prompts.

Phase 5: surface drift between declarative architecture dependencies and the module
prompts the LLM actually pulls in via <include>.
"""
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, FrozenSet, List

from .sync_order import extract_includes_from_file, extract_module_from_include

# Top-level directories in the PDD repo that ship sample architecture (not app code).
_BUNDLED_SAMPLE_TOPLEVEL_DIRS: FrozenSet[str] = frozenset(
{"examples", "example_project", "example_workspace", "staging"}
)


def _arch_path_under_skipped_sample_tree(
arch_path: Path,
project_root: Path,
skip_roots: FrozenSet[str],
) -> bool:
if not skip_roots:
return False
try:
rel = arch_path.resolve().relative_to(project_root.resolve())
except ValueError:
return False
return len(rel.parts) >= 2 and rel.parts[0] in skip_roots


def collect_architecture_include_validation_warnings(
project_root: Path,
*,
skip_bundled_sample_arch: bool = True,
) -> List[str]:
"""
Run cross-validation for every ``architecture.json`` under ``project_root``.

Each file is validated with its **parent directory** as the project root for
resolving ``prompts/…`` paths, so nested packages (e.g. ``services/api/``) work.

When ``skip_bundled_sample_arch`` is true (default), skips trees like
``examples/`` used in the PDD repository so routine sync stays focused on app code.
"""
from .architecture_registry import find_architecture_for_project

root = project_root.resolve()
skip = _BUNDLED_SAMPLE_TOPLEVEL_DIRS if skip_bundled_sample_arch else frozenset()
warnings: List[str] = []
for arch_path in find_architecture_for_project(project_root):
if _arch_path_under_skipped_sample_tree(arch_path, root, skip):
continue
try:
with open(arch_path, "r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
continue
if not isinstance(data, list) or not data:
continue
base = arch_path.parent
for w in cross_validate_architecture_with_prompt_includes(data, base):
warnings.append(f"{arch_path}: {w}")
return warnings


def print_architecture_include_validation_warnings(*, quiet: bool, verbose: bool = False) -> None:
"""Print yellow warnings for the current project only when ``--verbose`` (and not ``--quiet``)."""
if quiet or not verbose:
return
from rich import print as rprint

from .architecture_registry import find_project_root

for w in collect_architecture_include_validation_warnings(find_project_root()):
rprint(f"[yellow]Warning: {w}[/yellow]")


def _resolve_architecture_prompt_path(filename: str, project_root: Path) -> Path:
"""Resolve architecture ``filename`` to an on-disk path under the project."""
rel = filename.replace("\\", "/").lstrip("/")
if rel.startswith("prompts/"):
return (project_root / rel).resolve()
return (project_root / "prompts" / rel).resolve()


def resolve_architecture_prompt_path(filename: str, project_root: Path) -> Path:
"""Public API for resolving an architecture ``filename`` to an on-disk path."""
return _resolve_architecture_prompt_path(filename, project_root)


def cross_validate_architecture_with_prompt_includes(
arch_data: List[Dict[str, Any]],
project_root: Path,
) -> List[str]:
"""
Compare each architecture entry's ``dependencies`` (as module basenames) with
module targets of ``<include>`` tags in the corresponding prompt file.

Non-module includes (docs, preambles, etc.) are ignored via
``extract_module_from_include``.

Returns:
Human-readable warning strings (empty if no mismatches / nothing to check).
"""
warnings: List[str] = []

filename_to_basename: Dict[str, str] = {}
for entry in arch_data:
fn = entry.get("filename") or ""
if not fn:
continue
b = extract_module_from_include(fn)
if b:
filename_to_basename[fn] = b

for entry in arch_data:
fn = entry.get("filename") or ""
if not fn:
continue
mod_base = filename_to_basename.get(fn)
if not mod_base:
continue

prompt_path = _resolve_architecture_prompt_path(fn, project_root)
if not prompt_path.is_file():
warnings.append(
f"Cross-validation skipped for architecture entry {fn!r}: "
f"prompt file not found at {prompt_path}"
)
continue

includes = extract_includes_from_file(prompt_path)
include_modules: set[str] = set()
include_proof: Dict[str, str] = {}
for inc in includes:
m = extract_module_from_include(inc)
if m and m != mod_base:
include_modules.add(m)
include_proof.setdefault(m, inc)

arch_modules: set[str] = set()
for dep_fn in entry.get("dependencies", []):
db = filename_to_basename.get(dep_fn)
if db and db != mod_base:
arch_modules.add(db)

for d in sorted(arch_modules - include_modules):
dep_fn_proof = next(
(
df
for df in entry.get("dependencies", [])
if filename_to_basename.get(df) == d
),
None,
)
extra = f" ({dep_fn_proof!r})" if dep_fn_proof else ""
warnings.append(
f"architecture.json / <include> mismatch: {fn!r} declares dependency on module "
f"{d!r}{extra} but the prompt has no <include> of that module's prompt"
)

for i in sorted(include_modules - arch_modules):
inc_s = include_proof.get(i, "")
warnings.append(
f"architecture.json / <include> mismatch: {fn!r} <include>s module {i!r} "
f"({inc_s!r}) but architecture.json dependencies do not list that module"
)

return warnings
46 changes: 46 additions & 0 deletions pdd/architecture_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,52 @@ def find_architecture_for_project(project_root: Path) -> List[Path]:
return results


def find_project_root(start: Optional[Path] = None) -> Path:
"""Walk up from ``start`` (default: cwd) for a directory containing ``.pddrc`` or ``.git``."""
if start is None:
start = Path.cwd()
current = start.resolve()
for _ in range(20):
if (current / ".pddrc").exists() or (current / ".git").exists():
return current
parent = current.parent
if parent == current:
break
current = parent
return start.resolve()


def load_combined_architecture_data(
project_root: Path,
) -> Tuple[Optional[List[Dict[str, Any]]], Path]:
"""Load and merge all architecture.json lists under ``project_root`` (root + subdirs).

Returns:
``(combined_entries_or_none, primary_arch_path)`` where primary is the first
file found (typically root ``architecture.json``).
"""
arch_files = find_architecture_for_project(project_root)
if not arch_files:
return None, project_root / "architecture.json"

primary_path = arch_files[0]
combined: List[Dict[str, Any]] = []

for arch_path in arch_files:
try:
with open(arch_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
combined.extend(data)
except (json.JSONDecodeError, OSError):
continue

if not combined:
return None, primary_path

return combined, primary_path


def get_modules_for_issue(arch_data: List[dict], issue_number: int) -> List[dict]:
"""Filter architecture entries by origin.issue_number."""
return [
Expand Down
Loading
Loading