microsoft · curtisman · Apr 24, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/ts/docs/architecture/actionGrammar.md b/ts/docs/architecture/actionGrammar.md
@@ -382,6 +382,199 @@ GrammarParseResult {
 6. Type-checks value expressions in two passes (see
    [Validation architecture](#validation-architecture) below)
 7. Produces the flat `Grammar` structure ready for matching
+8. Optionally runs the [Compile-time optimizer](#compile-time-optimizations)
+   to reshape the AST without changing match semantics
+
+### Compile-time optimizations
+
+`grammarOptimizer.ts` exposes opt-in AST passes that reshape the
+compiled `Grammar` to reduce matcher work without changing match
+results. Both passes are off by default and individually controllable
+through `LoadGrammarRulesOptions.optimizations`:
+
+```typescript
+loadGrammarRules("agent.agr", text, {
+  optimizations: {
+    inlineSingleAlternatives: true,
+    factorCommonPrefixes: true,
+  },
+});
+```
+
+The optimizer runs after value-expression validation, so it operates on
+fully-compiled `CompiledValueNode`s. It is skipped entirely when the
+compile produced any errors (the AST may be partial).
+
+#### Pass 1 — Inline single-alternative `RulesPart`
+
+`inlineSingleAlternativeRules` walks all rule alternatives post-order
+and replaces an eligible `RulesPart` with the spread of its child
+rule's parts. This removes one layer of `ParentMatchState` push/pop and
+`finalizeNestedRule` in the matcher, which is common for named rules
+that simply delegate to a single sub-rule.
+
+A `RulesPart` is inlined only when **all** structural preconditions
+hold:
+
+- `part.rules.length === 1` and `!part.repeat && !part.optional`
+- The child rule has at least one part
+- `child.spacingMode === parentRule.spacingMode` (exact equality;
+  `undefined` is treated as a distinct "auto" mode at the matcher
+  level, not as "inherit from parent")
+- The body of the child's `rules` array is not shared by any other
+  `RulesPart` in the input AST. A pre-pass reference-counts every
+  `GrammarRule[]` array; inlining a shared body would duplicate the
+  child's parts at every call site and defeat the serializer's
+  identity-based dedup (see "shared-rule identity preservation"
+  below).
+
+When the child carries an explicit `value` expression, one of three
+sub-strategies fires:
+
+- **Hoist** — parent has no value of its own and either has a single
+  part (this `RulesPart`) or captures via `part.variable`. The matcher
+  would have computed the parent's value at runtime via its
+  default-value rule using `child.value`; we synthesize that
+  assignment explicitly onto the parent's `value`.
+- **Substitute** — parent captures via `part.variable` AND has its
+  own value expression. The (α-renamed) `child.value` is substituted
+  for the captured variable in `parent.value`.
+- **Drop** — `child.value` is unobservable at runtime; inline only the
+  child's parts.
+
+For Substitute and Drop the child's top-level bindings are α-renamed
+to fresh opaque names (`__opt_inline_<n>`, per-parent counter) so
+they cannot collide with sibling parts the parent already has. Hoist
+into a single-part parent skips the rename — there are no siblings.
+
+When the child has no value expression and the parent captures via
+`part.variable`, the child must contain exactly one binding-friendly
+part (`wildcard`, `number`, or `rules`). The parent's variable name
+is re-targeted onto that single part in place. (This is the only
+case where a binding may legitimately move onto a nested
+`RulesPart` — the absence of a child value rules out the silent-drop
+hazard that the value-bearing branches handle via Hoist/Substitute.)
+
+#### Pass 2 — Common prefix factoring
+
+`factorCommonPrefixes` walks every `RulesPart` and groups alternatives
+that share a non-empty leading prefix. The shared prefix is hoisted
+into the alternative once, followed by a nested `RulesPart` containing
+the remaining suffixes:
+
+```
+play the song -> "song"        play the (song -> "song"
+play the track -> "track"  ⇒              | track -> "track"
+play the album -> "album"                 | album -> "album")
+```
+
+**Top-level factoring.** After nested factoring completes, the
+top-level `Grammar.rules` array is factored against itself — the
+matcher treats top-level alternatives the same way it treats inner
+`RulesPart` alternatives. The trie build/emit core (`factorRules`)
+operates on a flat `GrammarRule[]`, so it is reused directly at the
+top level without wrapping the array in a synthetic `RulesPart`.
+This intentionally **destroys the 1:1 correspondence between
+top-level rule indices and the original source**; downstream consumers
+that depend on that mapping must capture it before enabling
+`factorCommonPrefixes`.
+
+**Implementation.** Factoring is implemented as a trie build +
+post-order emission inside `factorRules` (with `factorRulesPart` as a
+thin `RulesPart`-aware wrapper that handles the `repeat`/`optional`
+bailout):
+
+- Each rule is inserted as a sequence of "atomic" steps. `StringPart`
+  explodes into one `(string, token)` edge per token in `value[]`,
+  so `["play", "song"]` and `["play", "album"]` share the `"play"`
+  edge but branch at the next token. `wildcard`, `number`, `rules`,
+  and `phraseSet` parts each yield one edge. `rules` edges key by
+  `rules` array identity.
+- `edgeKeyMatches` ignores variable _names_ on variable-bearing edges
+  but requires binding _presence parity_ on `rules` edges (so
+  `<Inner>` and `$(v:<Inner>)` do not silently merge into the same
+  child). In the current implementation this comparison is encoded
+  into a primitive `stepMergeKey` so that the trie children map can
+  perform an O(1) lookup instead of an O(siblings) scan.
+- Emission walks the trie post-order. Single-child / no-terminal
+  chains are path-compressed back into a flat parts array (with
+  adjacent `StringPart`s re-merged at the seam), and multi-member
+  nodes become wrapper `RulesPart`s.
+
+**Opaque canonical names.** Variable-bearing trie edges carry a fresh
+opaque canonical name (`__opt_v_<n>`) allocated per `factorRules`
+invocation, _not_ the first inserter's user-supplied variable name. This eliminates two collision classes
+that any "first inserter wins" scheme is vulnerable to: outer-scope
+shadow (a non-lead's value referencing an outer name that happens to
+match the lead's local) and bound-vs-unbound `rules` parity. Each
+inserter accumulates a `local → canonical` remap that is applied to
+its terminal's `value` expression at emission time. `remapValueVariables`
+expands object-shorthand `{ foo }` to `{ foo: <renamed> }` so the
+object field name stays the same.
+
+**Wrapper value capture.** When any factored member carries a value
+expression, the wrapper rule has more than one part and the matcher's
+default single-part value rule no longer fires. The optimizer
+generates a fresh `__opt_factor` / `__opt_factor_<n>` name (avoiding
+any name already bound in prefix or members), binds the suffix
+`RulesPart` to it, and sets the wrapper's `value` to
+`{ type: "variable", name: "__opt_factor" }` — preserving the suffix
+value through the new nesting level.
+
+**Local bailout on eligibility failure.** Per-fork eligibility checks
+run before each wrapper is built; on failure the would-be members
+are emitted as separate full rules with the canonical prefix
+prepended, losing factoring at _that fork only_. Factoring above and
+below the failing fork still applies. Failure reasons:
+
+- **Whole-consumed.** A member's parts were entirely consumed by the
+  prefix (empty-parts suffix) — the matcher cannot default-value
+  resolve an empty-parts rule inside a wrapped `RulesPart`.
+- **Mixed value presence.** Some members carry explicit `value`,
+  others rely on default-value semantics; wrapping would silently
+  drop the implicit values.
+- **Implicit-default multipart.** All members rely on default values
+  but at least one suffix would end up with more than one part,
+  where the matcher's single-part default-value policy no longer
+  applies.
+- **Cross-scope reference.** A suffix's value expression references
+  a canonical name bound by the prefix. Nested rule scope is fresh
+  in the matcher (entering a `RulesPart` resets `valueIds`), so the
+  suffix cannot see prefix bindings — bail out so each member emits
+  at the wrapper's level instead.
+
+The earlier "binding shadow" guard is no longer needed: opaque
+canonicals allocated globally per `factorRules` call cannot
+collide with each other.
+
+**No fixed-point loop.** Factoring is applied once per group of
+alternatives — the trie's grouping converges in a single pass and
+freshly synthesized suffix `RulesPart`s are intentionally not
+re-walked. When both passes are enabled, the optimizer runs Pass 1
+once more after Pass 2 so that any sub-`RulesPart`s inside the
+emitted suffixes that have become inline-eligible can collapse.
+
+**Shared-rule identity preservation.** Both passes memoize their
+output by `GrammarRule[]` array identity. The compiler points every
+reference to the same named rule (`<X>`) at the same underlying
+`rules` array so [grammarSerializer.ts](../../packages/actionGrammar/src/grammarSerializer.ts)
+can dedupe via `rulesToIndex.get(p.rules)`. The optimizer preserves
+that invariant: two `RulesPart`s that originally pointed at the same
+array still point at the same (possibly new) array after the pass —
+keeping `.ag.json` size proportional to unique rule bodies.
+
+#### Equivalence and benchmarks
+
+The `grammarOptimizer*.spec.ts` test suite covers unit behavior of
+both passes, regression repros for previously broken factoring
+patterns, structural-equivalence checks (every flag combination
+produces identical `matchGrammar` output across curated and
+real-agent grammars), and shared-array preservation. Standalone
+informational benchmarks live under
+[packages/actionGrammar/src/bench/](../../packages/actionGrammar/src/bench/);
+run them via `pnpm run bench:synthetic` and `pnpm run bench:real`
+from the package directory (a `pnpm run tsc` build is required first
+since the bench scripts execute the compiled `dist/bench/` output).
 
 ### Matching backend
 

diff --git a/ts/packages/actionGrammar/README.md b/ts/packages/actionGrammar/README.md
@@ -141,6 +141,24 @@ Key test suites:
 - `nfaRealGrammars.spec.ts` — End-to-end tests with production grammars
 - `dfa.spec.ts` — DFA compiler correctness
 - `dfaBenchmark.spec.ts` — Performance benchmarks
+- `grammarOptimizer*.spec.ts` — Compile-time AST optimizer (inline + factor passes)
+
+## Optimizer benchmarks
+
+Standalone benchmarks for the opt-in compile-time grammar optimizer
+([`src/bench/`](src/bench/)) are not part of the jest suite. They
+execute the compiled output, so a build is required first:
+
+```bash
+pnpm run tsc
+pnpm run bench:synthetic   # synthetic pass-through / wide-prefix grammars
+pnpm run bench:real        # real agent grammars (player, list, calendar)
+pnpm run bench             # both
+```
+
+Each script prints a per-configuration table comparing baseline,
+inline-only, factor-only, and both. Speedup is colored once it moves
+more than 10% from baseline.
 
 ## Downstream consumers
 

diff --git a/ts/packages/actionGrammar/package.json b/ts/packages/actionGrammar/package.json
@@ -24,9 +24,13 @@
   },
   "files": [
     "dist",
-    "!dist/test"
+    "!dist/test",
+    "!dist/bench"
   ],
   "scripts": {
+    "bench": "npm run bench:synthetic && npm run bench:real",
+    "bench:real": "node ./dist/bench/grammarOptimizerBenchmark.js",
+    "bench:synthetic": "node ./dist/bench/grammarOptimizerSyntheticBenchmark.js",
     "build": "npm run tsc",
     "clean": "rimraf --glob dist *.tsbuildinfo *.done.build.log",
     "jest-esm": "node --no-warnings --experimental-vm-modules ./node_modules/jest/bin/jest.js",

diff --git a/ts/packages/actionGrammar/src/bench/benchUtil.ts b/ts/packages/actionGrammar/src/bench/benchUtil.ts
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+/**
+ * Shared helpers for grammar optimizer benchmarks.
+ */
+
+import chalk from "chalk";
+import {
+    loadGrammarRulesNoThrow,
+    LoadGrammarRulesOptions,
+} from "../grammarLoader.js";
+import { matchGrammar } from "../grammarMatcher.js";
+import { GrammarPart } from "../grammarTypes.js";
+
+export const ITERATIONS = 500;
+
+export const CONFIGS: { name: string; opts: LoadGrammarRulesOptions }[] = [
+    { name: "baseline", opts: {} },
+    {
+        name: "inline",
+        opts: { optimizations: { inlineSingleAlternatives: true } },
+    },
+    {
+        name: "factor",
+        opts: { optimizations: { factorCommonPrefixes: true } },
+    },
+    {
+        name: "both",
+        opts: {
+            optimizations: {
+                inlineSingleAlternatives: true,
+                factorCommonPrefixes: true,
+            },
+        },
+    },
+];
+
+// Speedup is colored once it moves more than 10% from baseline.
+export function colorSpeedup(speedup: number): string {
+    const text = `${speedup.toFixed(2)}x`.padStart(6);
+    if (speedup > 1.1) return chalk.green(text);
+    if (speedup < 0.9) return chalk.red(text);
+    return text;
+}
+
+export function timeMs(fn: () => void, iterations: number): number {
+    const start = performance.now();
+    for (let i = 0; i < iterations; i++) fn();
+    return performance.now() - start;
+}
+
+export function countRulesParts(
+    grammar: ReturnType<typeof loadGrammarRulesNoThrow>,
+): number {
+    if (!grammar) return 0;
+    let count = 0;
+    const visit = (parts: GrammarPart[]) => {
+        for (const p of parts) {
+            if (p.type === "rules") {
+                count++;
+                for (const r of p.rules) visit(r.parts);
+            }
+        }
+    };
+    for (const r of grammar.rules) visit(r.parts);
+    return count;
+}
+
+/**
+ * Run all CONFIGS against the given grammar text and print a comparison
+ * table.  `label` is the section heading; `grammarName` is passed to the
+ * loader (used in error messages).
+ */
+export function runBenchmark(
+    label: string,
+    grammarName: string,
+    grammarText: string,
+    requests: string[],
+): void {
+    console.log(`\n=== ${label} ===`);
+    console.log(
+        `| config    | RulesParts | match ms (${ITERATIONS}x) | speedup |`,
+    );
+    console.log(`|-----------|-----------:|---------------:|--------:|`);
+    let baselineMs = 0;
+    for (const cfg of CONFIGS) {
+        const errors: string[] = [];
+        const grammar = loadGrammarRulesNoThrow(
+            grammarName,
+            grammarText,
+            errors,
+            undefined,
+            cfg.opts,
+        );
+        if (!grammar) {
+            console.log(`[error] ${cfg.name}: ${errors.join("; ")}`);
+            continue;
+        }
+        const partCount = countRulesParts(grammar);
+        // Warm-up — also validates that the optimized grammar can run.
+        try {
+            for (const r of requests) matchGrammar(grammar, r);
+        } catch (e) {
+            console.log(
+                `[error] ${cfg.name} match failed: ${(e as Error).message}`,
+            );
+            continue;
+        }
+        const ms = timeMs(() => {
+            for (const r of requests) matchGrammar(grammar, r);
+        }, ITERATIONS);
+        if (cfg.name === "baseline") baselineMs = ms;
+        const speedup = baselineMs > 0 ? baselineMs / ms : 1;
+        console.log(
+            `| ${cfg.name.padEnd(9)} | ${String(partCount).padStart(10)} | ${ms.toFixed(1).padStart(14)} | ${colorSpeedup(speedup)} |`,
+        );
+    }
+}