From e03d71e0803d9cbd77eeb95dfb7b6e8e03950b7f Mon Sep 17 00:00:00 2001 From: danielmeppiel Date: Fri, 22 May 2026 09:07:03 +0200 Subject: [PATCH] fix(applyTo): fold #1387 panel follow-ups (docs + DRY + YAML escape) Lands the panel recommendations on #1387 (closes #1366) that were filed as nits after the merge: - CHANGELOG: add Fixed entry under [Unreleased] documenting the comma-separated applyTo fix across Claude/Cursor/Windsurf, with the Copilot-verbatim carve-out spelled out (#1387, closes #1366). - docs(instructions-and-agents): demonstrate the comma-list contract with a worked example, document the brace-alternation carve-out, and clarify per-target rendering in "What compiles where". - docs(apm-usage/package-authoring): mirror the comma-list syntax note in the apm-usage skill resource (linting rule 4). - refactor(patterns): move _has_top_level_comma from context_optimizer into utils/patterns.py as has_top_level_comma (DRY: single source of truth for the brace-depth state machine). - security(integrators): emit YAML scalars via a new yaml_double_quote() helper that escapes backslashes, quotes, and control characters. Defence-in-depth against adversarial or copy-paste-mangled applyTo values; risk is LOW today (parse_apply_to strips whitespace and Windsurf strips newlines) but producer output is now well-formed even on hostile input. Added 11 unit tests including yaml.safe_load round-trip and a has_top_level_comma brace-aware suite. Lint silent; 175 tests pass (26 in test_patterns.py). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 1 + .../instructions-and-agents.md | 24 +++++++-- .../skills/apm-usage/package-authoring.md | 7 +++ src/apm_cli/compilation/context_optimizer.py | 25 ++------- .../integration/instruction_integrator.py | 12 ++--- src/apm_cli/utils/patterns.py | 47 ++++++++++++++++ tests/unit/utils/test_patterns.py | 54 ++++++++++++++++++- 7 files changed, 137 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bf6ae543..70a163d72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Comma-separated `applyTo` globs now correctly emit a YAML list under `globs:` / `paths:` in Claude, Cursor, and Windsurf instruction outputs (Copilot output unchanged -- it already preserves `applyTo` verbatim and handles comma-lists natively). The shared `parse_apply_to()` helper is brace-aware, so `**/*.{css,scss},**/*.py` is split on the top-level comma only. Closes a documented promise that was silently broken on the three non-Copilot harnesses. (#1387, closes #1366) - `apm update` against private Azure DevOps deps no longer fails on Windows with a misleading "az present but not logged in" diagnostic when the user IS signed in via `az login`. Root cause: Python's `subprocess.run(["az", ...])` -> `CreateProcessW` does not honor `PATHEXT` for non-`.exe` executables, so the Windows `az.cmd` wrapper could not be invoked even though `shutil.which("az")` resolved it. `AzureCliBearerProvider` now resolves the `az` binary via `shutil.which` once at construction and passes the absolute path to every subprocess call. As a defense-in-depth measure, the ADO `--update` preflight probe no longer strips `GIT_CONFIG_GLOBAL` / `GIT_CONFIG_NOSYSTEM` / `GIT_ASKPASS`, so Git Credential Manager can answer for Entra-cached ADO credentials whenever bearer acquisition is unavailable for any reason (sandbox, proxy, future PATH quirks). The actual clone path keeps its full gitconfig isolation. (#1430) - Root `.apm` hooks no longer duplicate after renaming the project directory or using git worktrees; Claude, Codex, Cursor, Gemini, and Windsurf hook configs stay idempotent across checkouts. The hook source-id is now derived from `apm.yml`'s `name` field instead of `install_path.name`, and `apm install` silently heals stale same-content entries from prior checkout basenames. Copilot is unaffected (its hooks live in per-file namespaces under `.github/hooks/`, not a shared merged config). (#1392, closes #1329) diff --git a/docs/src/content/docs/producer/author-primitives/instructions-and-agents.md b/docs/src/content/docs/producer/author-primitives/instructions-and-agents.md index d4c32a875..556182fe4 100644 --- a/docs/src/content/docs/producer/author-primitives/instructions-and-agents.md +++ b/docs/src/content/docs/producer/author-primitives/instructions-and-agents.md @@ -58,6 +58,22 @@ unconditional and gets folded into compiled context files (`AGENTS.md`, `GEMINI.md`) instead of a per-file rule directory. With it, each harness wraps the body in its own scoping syntax. +`applyTo` accepts either a single glob or a comma-separated list of +globs. Commas inside brace alternation (`**/*.{css,scss}`) are part of +the glob and are NOT treated as list separators -- only top-level +commas split the list. + +```markdown +--- +description: Frontend style rules +applyTo: "**/*.{css,scss},**/*.tsx" +--- +``` + +On Copilot the comma-list is preserved verbatim (Copilot splits it +natively). On Claude, Cursor, and Windsurf the list is expanded to a +YAML array under `paths:` / `globs:`. + ### Body conventions - Lead with bullets, not prose. Instructions are read by an agent @@ -72,10 +88,10 @@ it, each harness wraps the body in its own scoping syntax. | Target | Output path | Transform | |---|---|---| -| copilot | `.github/instructions/.instructions.md` | verbatim; `applyTo` preserved | -| claude | `.claude/rules/.md` | `applyTo` -> `paths:` list | -| cursor | `.cursor/rules/.mdc` | `applyTo` -> `globs:`; description auto-derived if missing | -| windsurf | `.windsurf/rules/.md` | `applyTo` -> `trigger: glob` + `globs:`; missing `applyTo` -> `trigger: always_on` | +| copilot | `.github/instructions/.instructions.md` | verbatim; `applyTo` preserved (comma-lists split natively by Copilot) | +| claude | `.claude/rules/.md` | `applyTo` -> `paths:` list (comma-lists expanded to YAML array) | +| cursor | `.cursor/rules/.mdc` | `applyTo` -> `globs:` (scalar for single glob, YAML array for comma-lists); description auto-derived if missing | +| windsurf | `.windsurf/rules/.md` | `applyTo` -> `trigger: glob` + `globs:` (scalar or YAML array); missing `applyTo` -> `trigger: always_on` | | codex | folded into `AGENTS.md` | compile-only, no per-file deploy | | gemini | folded into `GEMINI.md` | compile-only, no per-file deploy | | opencode | folded into `AGENTS.md` | compile-only, no per-file deploy | diff --git a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md index 557dff2ab..1dcf070f7 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md +++ b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md @@ -131,6 +131,13 @@ tags: [security, validation] --- ``` +`applyTo` accepts a single glob (`"**/*.py"`) or a comma-separated list +(`"**/src/**,**/api/**"`). Commas inside brace alternation +(`**/*.{css,scss}`) are part of the glob and are NOT separators -- only +top-level commas split the list. On Copilot the value is preserved +verbatim; on Claude/Cursor/Windsurf comma-lists are expanded to a YAML +array under `paths:` / `globs:`. + ### 2. Chatmode (`*.chatmode.md`) Chat persona configuration. diff --git a/src/apm_cli/compilation/context_optimizer.py b/src/apm_cli/compilation/context_optimizer.py index 9cc8969ac..43271ad71 100644 --- a/src/apm_cli/compilation/context_optimizer.py +++ b/src/apm_cli/compilation/context_optimizer.py @@ -27,26 +27,7 @@ from ..primitives.models import Instruction from ..utils.exclude import should_exclude, validate_exclude_patterns from ..utils.paths import portable_relpath -from ..utils.patterns import parse_apply_to - - -def _has_top_level_comma(pattern: str) -> bool: - """Return True if ``pattern`` contains a comma outside any ``{...}`` group. - - Commas inside brace alternation (e.g. ``**/*.{css,scss}``) are part - of glob brace expansion and must not be treated as list separators. - """ - depth = 0 - for ch in pattern: - if ch == "{": - depth += 1 - elif ch == "}": - if depth > 0: - depth -= 1 - elif ch == "," and depth == 0: - return True - return False - +from ..utils.patterns import has_top_level_comma, parse_apply_to # CRITICAL: Shadow Click commands to prevent namespace collision # When this module is imported during 'apm compile', Click's active context @@ -687,7 +668,7 @@ def _extract_intended_directory_from_pattern(self, pattern: str) -> Path | None: """ # For comma-lists, only the first segment is consulted - the # placement still flows into a single directory. - if _has_top_level_comma(pattern): + if has_top_level_comma(pattern): segments = parse_apply_to(pattern) if not segments: return None @@ -749,7 +730,7 @@ def _file_matches_pattern(self, file_path: Path, pattern: str) -> bool: # segment match as a hit so list patterns mirror per-glob semantics. # Only split on top-level commas - commas inside brace alternation # (e.g. ``**/*.{css,scss}``) must stay attached for brace expansion. - if _has_top_level_comma(pattern): + if has_top_level_comma(pattern): segments = parse_apply_to(pattern) return any(self._file_matches_pattern(file_path, seg) for seg in segments) diff --git a/src/apm_cli/integration/instruction_integrator.py b/src/apm_cli/integration/instruction_integrator.py index 2e32b370c..f8dd6d2e9 100644 --- a/src/apm_cli/integration/instruction_integrator.py +++ b/src/apm_cli/integration/instruction_integrator.py @@ -16,7 +16,7 @@ from apm_cli.integration.base_integrator import BaseIntegrator, IntegrationResult from apm_cli.utils.path_security import ensure_path_within from apm_cli.utils.paths import portable_relpath -from apm_cli.utils.patterns import parse_apply_to +from apm_cli.utils.patterns import parse_apply_to, yaml_double_quote if TYPE_CHECKING: from apm_cli.integration.targets import TargetProfile @@ -282,10 +282,10 @@ def _convert_to_cursor_rules(content: str) -> str: parts.append(f"description: {description}") globs = parse_apply_to(apply_to) if len(globs) == 1: - parts.append(f'globs: "{globs[0]}"') + parts.append(f"globs: {yaml_double_quote(globs[0])}") elif globs: parts.append("globs:") - parts.extend(f' - "{g}"' for g in globs) + parts.extend(f" - {yaml_double_quote(g)}" for g in globs) parts.append("---") return "\n".join(parts) + "\n\n" + body.lstrip("\n") @@ -379,10 +379,10 @@ def _convert_to_windsurf_rules(content: str) -> str: if globs: parts.append("trigger: glob") if len(globs) == 1: - parts.append(f'globs: "{globs[0]}"') + parts.append(f"globs: {yaml_double_quote(globs[0])}") else: parts.append("globs:") - parts.extend(f' - "{g}"' for g in globs) + parts.extend(f" - {yaml_double_quote(g)}" for g in globs) else: parts.append("trigger: always_on") parts.append("---") @@ -434,7 +434,7 @@ def _convert_to_claude_rules(content: str) -> str: globs = parse_apply_to(apply_to) if globs: parts = ["---", "paths:"] - parts.extend(f' - "{g}"' for g in globs) + parts.extend(f" - {yaml_double_quote(g)}" for g in globs) parts.append("---") return "\n".join(parts) + "\n\n" + body.lstrip("\n") diff --git a/src/apm_cli/utils/patterns.py b/src/apm_cli/utils/patterns.py index f2cce7a69..05498ed2f 100644 --- a/src/apm_cli/utils/patterns.py +++ b/src/apm_cli/utils/patterns.py @@ -8,6 +8,53 @@ from __future__ import annotations +def has_top_level_comma(pattern: str) -> bool: + """Return True if ``pattern`` contains a comma outside any ``{...}`` group. + + Commas inside brace alternation (e.g. ``**/*.{css,scss}``) are part + of glob brace expansion and must not be treated as list separators. + Single source of truth for the comma-vs-brace discrimination; the + placement optimizer and the integrators both consume this so the + semantics of ``parse_apply_to`` and its callers stay in lock-step. + """ + depth = 0 + for ch in pattern: + if ch == "{": + depth += 1 + elif ch == "}": + if depth > 0: + depth -= 1 + elif ch == "," and depth == 0: + return True + return False + + +def yaml_double_quote(value: str) -> str: + """Escape ``value`` for embedding inside a YAML double-quoted scalar. + + Defence-in-depth for the instruction integrators that emit YAML + frontmatter via f-strings (``f' - "{g}"'``). A glob containing a + literal backslash, double-quote, or control character would break + the surrounding YAML if inlined verbatim; this helper escapes the + minimal set needed for the YAML 1.2 double-quoted form. Returns the + value already wrapped in the surrounding double quotes. + + Note: ``parse_apply_to`` already strips leading/trailing whitespace + per segment, and the Windsurf integrator strips newlines from the + raw frontmatter value before splitting, so the practical risk today + is near-zero -- this exists so emitted YAML stays well-formed even + on adversarial or copy-paste-mangled inputs. + """ + escaped = ( + value.replace("\\", "\\\\") + .replace('"', '\\"') + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t") + ) + return f'"{escaped}"' + + def parse_apply_to(value: str | None) -> list[str]: """Split a primitive ``applyTo`` value into individual glob patterns. diff --git a/tests/unit/utils/test_patterns.py b/tests/unit/utils/test_patterns.py index f7148915f..f788bca5e 100644 --- a/tests/unit/utils/test_patterns.py +++ b/tests/unit/utils/test_patterns.py @@ -1,6 +1,6 @@ """Tests for the applyTo pattern parser.""" -from apm_cli.utils.patterns import parse_apply_to +from apm_cli.utils.patterns import has_top_level_comma, parse_apply_to, yaml_double_quote class TestParseApplyTo: @@ -55,3 +55,55 @@ def test_nested_braces(self): "**/{a,{b,c}}", "**/*.py", ] + + +class TestHasTopLevelComma: + """Unit tests for has_top_level_comma().""" + + def test_no_comma(self): + assert has_top_level_comma("**/*.py") is False + + def test_top_level_comma(self): + assert has_top_level_comma("a,b") is True + + def test_brace_comma_only(self): + # Commas inside {...} are brace expansion, not separators. + assert has_top_level_comma("**/*.{css,scss}") is False + + def test_brace_comma_and_top_level(self): + assert has_top_level_comma("**/*.{css,scss},**/*.py") is True + + def test_nested_braces(self): + assert has_top_level_comma("**/{a,{b,c}}") is False + + def test_empty(self): + assert has_top_level_comma("") is False + + +class TestYamlDoubleQuote: + """Unit tests for yaml_double_quote() defence-in-depth escaping.""" + + def test_plain_glob(self): + assert yaml_double_quote("**/*.py") == '"**/*.py"' + + def test_escapes_double_quote(self): + assert yaml_double_quote('a"b') == '"a\\"b"' + + def test_escapes_backslash(self): + assert yaml_double_quote("a\\b") == '"a\\\\b"' + + def test_escapes_newline(self): + assert yaml_double_quote("a\nb") == '"a\\nb"' + + def test_escapes_carriage_return(self): + assert yaml_double_quote("a\rb") == '"a\\rb"' + + def test_escapes_tab(self): + assert yaml_double_quote("a\tb") == '"a\\tb"' + + def test_yaml_safe_load_roundtrip(self): + import yaml + + for value in ['a"b', "a\\b", "a\nb", "**/src/**", "**/*.{css,scss}"]: + yaml_doc = f"k: {yaml_double_quote(value)}\n" + assert yaml.safe_load(yaml_doc) == {"k": value}