From 6f15af1f14021ccb8cd83809b5ddba96665637f3 Mon Sep 17 00:00:00 2001 From: Curtis Man Date: Mon, 27 Apr 2026 10:34:13 -0700 Subject: [PATCH 1/4] actionGrammar(fuzz): replace boolean feature flags with grouped weights/probabilities Restructure FuzzFeatureFlags into sub-records grouped by area of impact (partKinds, values, spacing, groups), so each knob's interpretation (weight vs probability) is clear from its container and suffix. - partKinds.{literal,ruleRef,wildcard,number}: relative weights for a weighted random pick of each part slot. - values.attachProb: probability per eligible alternate of attaching a '-> value' expression. - spacing.altProb / spacing.ruleProb: probability per alternate / per rule of attaching a [spacing=...] annotation. - spacing.modes.{required,optional,none,auto}: relative weights for which spacing mode is picked when annotating. - groups.{optionalProb,repeatProb}: reserved for (...)? and ()*/()+ (not yet implemented). Other changes: - Add cloneFeatures() deep-clone helper. - Add generic weightedPick() used by part-kind and spacing-mode selection. - CLI --features uses dotted paths, e.g. --features partKinds.wildcard=5,values.attachProb=0.7,spacing.modes.required=3 - Test helper deep-merges the grouped feature record. All 660 existing fuzz checks still pass. --- .../actionGrammar/src/fuzz/fuzzHarness.ts | 12 +- .../actionGrammar/src/fuzz/fuzzRunner.ts | 123 +++++++--- .../src/fuzz/grammarGenerator.ts | 231 ++++++++++++++---- .../actionGrammar/test/grammarFuzz.spec.ts | 67 +++-- 4 files changed, 325 insertions(+), 108 deletions(-) diff --git a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts index e23073f49..a67ef4c7b 100644 --- a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts +++ b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts @@ -69,12 +69,22 @@ export const DEFAULT_CONFIG: FuzzConfig = { seed: 0xc0ffee, count: 40, inputsPerGrammar: 6, - features: { ...DEFAULT_FEATURES }, + features: cloneFeatures(DEFAULT_FEATURES), validations: ["optimizer", "roundtrip-text", "roundtrip-json"], generator: { ...DEFAULT_GENERATOR_CONFIG }, verbose: false, }; +/** Deep clone of a {@link FuzzFeatureFlags} record. */ +export function cloneFeatures(f: FuzzFeatureFlags): FuzzFeatureFlags { + return { + partKinds: { ...f.partKinds }, + values: { ...f.values }, + spacing: { ...f.spacing, modes: { ...f.spacing.modes } }, + groups: { ...f.groups }, + }; +} + export type FuzzResult = { grammarIndex: number; grammarText: string; diff --git a/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts b/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts index 81cd0ab01..f49b536e4 100644 --- a/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts +++ b/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts @@ -32,6 +32,7 @@ import chalk from "chalk"; import { runFuzz, DEFAULT_CONFIG, + cloneFeatures, validateOptimizerEquivalence, validateTextRoundTrip, validateJsonRoundTrip, @@ -55,11 +56,21 @@ function printUsage(): void { " --seed PRNG seed (decimal or 0x hex, default: 0xc0ffee)", " --count Number of grammars to generate (default: 40)", " --inputs Extra random inputs per grammar (default: 6)", - " --features Comma-separated feature flags (default: literals,ruleRefs)", - " Literals are always implicitly enabled as the", - " fallback part kind. Other options:", - " ruleRefs, wildcards, numbers,", - " optionals (NYI), repeats (NYI), values, spacing", + " --features Comma-separated feature overrides.", + " Each entry is `path` (= weight 1) or", + " `path=` where `path` is a dotted", + " reference into the FuzzFeatureFlags tree:", + " partKinds.{literal,ruleRef,wildcard,number}", + " values.attachProb", + " spacing.{altProb,ruleProb}", + " spacing.modes.{required,optional,none,auto}", + " groups.{optionalProb,repeatProb} [NYI]", + " Fields named `*Prob` are probabilities in", + " [0,1]; other numeric fields are relative", + " weights for a weighted random pick. When", + " --features is given, all weights/probs reset", + " to 0 first; partKinds.literal stays at 1 as", + " the fallback part kind unless overridden.", " --validation Comma-separated validations (default: all)", " Options: optimizer, roundtrip-text, roundtrip-json", " --depth Max rules / nesting depth (default: 4)", @@ -72,7 +83,9 @@ function printUsage(): void { "", "Examples:", " node ./dist/fuzz/fuzzRunner.js --seed 42 --count 10", - " node ./dist/fuzz/fuzzRunner.js --features wildcards,values --validation optimizer --verbose", + " node ./dist/fuzz/fuzzRunner.js --features partKinds.wildcard,values.attachProb=0.7 --validation optimizer --verbose", + " node ./dist/fuzz/fuzzRunner.js --features partKinds.wildcard=5,partKinds.literal=1", + " node ./dist/fuzz/fuzzRunner.js --features spacing.altProb=0.3,spacing.modes.required=3", " node ./dist/fuzz/fuzzRunner.js --count 500 --seed 0xdeadbeef", " node ./dist/fuzz/fuzzRunner.js --replay ./repro-cases", "", @@ -80,18 +93,50 @@ function printUsage(): void { console.log(lines.join("\n")); } -const FEATURE_MAP: Record = { - literals: "literals", - ruleRefs: "ruleRefs", - rulerefs: "ruleRefs", - wildcards: "wildcards", - numbers: "numbers", - optionals: "optionals", - repeats: "repeats", - values: "values", - spacing: "spacingModes", +// Dotted-path setters into FuzzFeatureFlags. Each entry maps a +// canonical lower-case path (e.g. `partkinds.wildcard`) to a setter +// that writes the numeric value into the right slot of the record. +type FeatureSetter = (f: FuzzFeatureFlags, value: number) => void; +const FEATURE_PATHS: Record = { + "partkinds.literal": (f, v) => (f.partKinds.literal = v), + "partkinds.ruleref": (f, v) => (f.partKinds.ruleRef = v), + "partkinds.wildcard": (f, v) => (f.partKinds.wildcard = v), + "partkinds.number": (f, v) => (f.partKinds.number = v), + "values.attachprob": (f, v) => (f.values.attachProb = v), + "spacing.altprob": (f, v) => (f.spacing.altProb = v), + "spacing.ruleprob": (f, v) => (f.spacing.ruleProb = v), + "spacing.modes.required": (f, v) => (f.spacing.modes.required = v), + "spacing.modes.optional": (f, v) => (f.spacing.modes.optional = v), + "spacing.modes.none": (f, v) => (f.spacing.modes.none = v), + "spacing.modes.auto": (f, v) => (f.spacing.modes.auto = v), + "groups.optionalprob": (f, v) => (f.groups.optionalProb = v), + "groups.repeatprob": (f, v) => (f.groups.repeatProb = v), }; +/** Reset every feature value to 0 (used before applying --features). */ +function zeroAllFeatures(f: FuzzFeatureFlags): void { + for (const setter of Object.values(FEATURE_PATHS)) setter(f, 0); +} + +/** Iterate (path, value) pairs for diagnostic / summary printing. */ +function* featureEntries( + f: FuzzFeatureFlags, +): Iterable { + yield ["partKinds.literal", f.partKinds.literal]; + yield ["partKinds.ruleRef", f.partKinds.ruleRef]; + yield ["partKinds.wildcard", f.partKinds.wildcard]; + yield ["partKinds.number", f.partKinds.number]; + yield ["values.attachProb", f.values.attachProb]; + yield ["spacing.altProb", f.spacing.altProb]; + yield ["spacing.ruleProb", f.spacing.ruleProb]; + yield ["spacing.modes.required", f.spacing.modes.required]; + yield ["spacing.modes.optional", f.spacing.modes.optional]; + yield ["spacing.modes.none", f.spacing.modes.none]; + yield ["spacing.modes.auto", f.spacing.modes.auto]; + yield ["groups.optionalProb", f.groups.optionalProb]; + yield ["groups.repeatProb", f.groups.repeatProb]; +} + const VALIDATION_MAP: Record = { optimizer: "optimizer", "roundtrip-text": "roundtrip-text", @@ -116,7 +161,7 @@ type ParsedArgs = { function parseArgs(argv: string[]): ParsedArgs { const config: FuzzConfig = { ...DEFAULT_CONFIG, - features: { ...DEFAULT_CONFIG.features }, + features: cloneFeatures(DEFAULT_CONFIG.features), generator: { ...DEFAULT_CONFIG.generator }, validations: [...DEFAULT_CONFIG.validations], }; @@ -163,27 +208,35 @@ function parseArgs(argv: string[]): ParsedArgs { break; case "--features": { if (!featuresExplicit) { - // First --features resets all to false, then enables - // the listed features. Literals are always kept on - // as the fallback part kind. - for (const k of Object.keys( - config.features, - ) as (keyof FuzzFeatureFlags)[]) { - config.features[k] = false; - } - config.features.literals = true; + // First --features resets all to 0, then applies + // the listed overrides. partKinds.literal stays at + // 1 as the fallback part kind unless overridden. + zeroAllFeatures(config.features); + config.features.partKinds.literal = 1; featuresExplicit = true; } const parts = argv[++i].split(","); for (const p of parts) { - const key = FEATURE_MAP[p.trim().toLowerCase()]; - if (!key) { + const trimmed = p.trim(); + if (!trimmed) continue; + const eq = trimmed.indexOf("="); + const rawPath = eq >= 0 ? trimmed.slice(0, eq) : trimmed; + const rawValue = eq >= 0 ? trimmed.slice(eq + 1) : "1"; + const setter = FEATURE_PATHS[rawPath.trim().toLowerCase()]; + if (!setter) { + console.error( + `Unknown feature path: ${rawPath.trim()}. Valid paths: ${Object.keys(FEATURE_PATHS).join(", ")}`, + ); + process.exit(1); + } + const value = Number(rawValue); + if (!Number.isFinite(value) || value < 0) { console.error( - `Unknown feature: ${p.trim()}. Valid: ${Object.keys(FEATURE_MAP).join(", ")}`, + `Invalid value for feature '${rawPath.trim()}': ${rawValue}`, ); process.exit(1); } - config.features[key] = true; + setter(config.features, value); } break; } @@ -308,7 +361,7 @@ function replayReproCases(dir: string): number { const gen: GeneratedGrammar = { text: grammarText, testInputs: [], - usesValueExpressions: meta.features.values, + usesValueExpressions: meta.features.values.attachProb > 0, startValueRequired: false, }; @@ -385,11 +438,9 @@ function main(): void { } // Print configuration summary. - const enabledFeatures = ( - Object.entries(config.features) as [keyof FuzzFeatureFlags, boolean][] - ) - .filter(([, v]) => v) - .map(([k]) => k) + const enabledFeatures = Array.from(featureEntries(config.features)) + .filter(([, v]) => v > 0) + .map(([k, v]) => (v === 1 ? k : `${k}=${v}`)) .join(", "); console.log(chalk.bold("Grammar Fuzz Runner")); diff --git a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts index 1d1175749..6a8da9355 100644 --- a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts +++ b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts @@ -7,9 +7,11 @@ * Generates structurally valid `.agr` grammar text along with inputs * that are guaranteed to match (and a few that deliberately do not). * - * Each grammar feature (wildcards, numbers, optional groups, spacing - * modes, value expressions, ...) is toggled independently via - * {@link FuzzFeatureFlags} so callers can explore specific dimensions. + * Generation is controlled by {@link FuzzFeatureFlags}, a record + * grouped by **area of impact** (part kinds, value expressions, + * spacing, groups). Within each group, fields named `*Prob` are + * probabilities in `[0, 1]`; other numeric fields are relative + * weights for a weighted random pick. */ // ── PRNG ────────────────────────────────────────────────────────────────────── @@ -36,36 +38,154 @@ export function intInRange(rng: () => number, lo: number, hi: number): number { // ── Feature flags ───────────────────────────────────────────────────────────── +/** + * Knobs that bias which kind of part the generator emits in each + * token slot of an alternative. Values are **relative weights** for + * a weighted random pick. `0` disables a kind; equal positive values + * yield uniform selection; larger values bias toward that kind + * (e.g. `wildcard: 5` with the others at `1` picks wildcards ~5x as + * often). `literal` is the safe fallback when no other kind is + * available in a position. + */ +export type PartKindWeights = { + /** Weight for literal string tokens. */ + literal: number; + /** Weight for `` references (DAG-shaped, no cycles). */ + ruleRef: number; + /** Weight for `$(varN:string)` wildcard captures. */ + wildcard: number; + /** Weight for `$(varN:number)` numeric captures. */ + number: number; +}; + +/** + * Knobs controlling `-> value` expressions on alternates. + */ +export type ValueFeatures = { + /** + * Probability in `[0, 1]` of attaching a `-> value` expression + * to an alternate that has at least one capture. Clamped. + */ + attachProb: number; +}; + +/** + * Relative weights for which `[spacing=...]` mode to pick when an + * annotation is attached. Same semantics as {@link PartKindWeights}. + * If all four are 0, modes are picked uniformly. + */ +export type SpacingModeWeights = { + required: number; + optional: number; + none: number; + auto: number; +}; + +/** + * Knobs controlling `[spacing=...]` annotations on alternates and + * rules. Probabilities are independent: `altProb` is checked once + * per alternate; `ruleProb` once per rule. + */ +export type SpacingFeatures = { + /** Probability per alternate of attaching a spacing annotation. */ + altProb: number; + /** Probability per rule of attaching a spacing annotation. */ + ruleProb: number; + /** Relative weights for the spacing mode that gets picked. */ + modes: SpacingModeWeights; +}; + +/** + * Knobs for optional / repeat groups around parts. NOT YET + * IMPLEMENTED: accepted for forward compatibility but ignored by the + * generator. + */ +export type GroupFeatures = { + /** Probability per part of being wrapped in `(...)?`. */ + optionalProb: number; + /** Probability per part of being wrapped in `()*` / `()+`. */ + repeatProb: number; +}; + +/** + * Composable feature configuration for the random grammar generator. + * + * Knobs are grouped by **area of impact** so callers can tell at a + * glance which dimension they are tuning. Within each group: + * + * - Fields named `*Prob` are probabilities in `[0, 1]` (clamped). + * - Other numeric fields are relative weights for a weighted random + * pick (`0` disables; equal positive values are uniform). + */ export type FuzzFeatureFlags = { - /** Literal string tokens (always safe to enable). */ - literals: boolean; - /** `` references (DAG-shaped, no cycles). */ - ruleRefs: boolean; - /** `$(varN:string)` wildcard captures. */ - wildcards: boolean; - /** `$(varN:number)` numeric captures. */ - numbers: boolean; - /** `(...)?` optional groups. NOT YET IMPLEMENTED: accepted but ignored by the generator. */ - optionals: boolean; - /** `()*` / `()+` repeat groups. NOT YET IMPLEMENTED: accepted but ignored by the generator. */ - repeats: boolean; - /** Value expressions after `->` (object literals, binary ops, etc.). */ - values: boolean; - /** Random `[spacing=required|optional|none|auto]` per rule/alternate. */ - spacingModes: boolean; + /** Which kind of part to emit in each token slot. */ + partKinds: PartKindWeights; + /** `-> value` expressions on alternates. */ + values: ValueFeatures; + /** `[spacing=...]` annotations and which modes to pick. */ + spacing: SpacingFeatures; + /** Optional / repeat groups around parts. NOT YET IMPLEMENTED. */ + groups: GroupFeatures; }; export const DEFAULT_FEATURES: FuzzFeatureFlags = { - literals: true, - ruleRefs: true, - wildcards: false, - numbers: false, - optionals: false, - repeats: false, - values: false, - spacingModes: false, + partKinds: { + literal: 1, + ruleRef: 1, + wildcard: 0, + number: 0, + }, + values: { + attachProb: 0, + }, + spacing: { + altProb: 0, + ruleProb: 0, + modes: { + required: 1, + optional: 1, + none: 1, + auto: 1, + }, + }, + groups: { + optionalProb: 0, + repeatProb: 0, + }, }; +function clamp01(x: number): number { + if (!(x > 0)) return 0; + if (x > 1) return 1; + return x; +} + +/** + * Weighted pick from `(item, weight)` entries. Negative weights are + * treated as 0. Returns `undefined` if all weights are <= 0. + */ +function weightedPick( + rng: () => number, + entries: ReadonlyArray, +): T | undefined { + let total = 0; + for (const [, w] of entries) { + if (w > 0) total += w; + } + if (total <= 0) return undefined; + let r = rng() * total; + for (const [item, w] of entries) { + if (w <= 0) continue; + r -= w; + if (r < 0) return item; + } + // Numeric edge case: return the last positive-weight item. + for (let i = entries.length - 1; i >= 0; i--) { + if (entries[i][1] > 0) return entries[i][0]; + } + return undefined; +} + // ── Generation config ───────────────────────────────────────────────────────── export type GeneratorConfig = { @@ -107,7 +227,19 @@ type SpacingMode = (typeof SPACING_MODES)[number]; function spacingAnnotation(mode: SpacingMode): string { return ` [spacing=${mode}]`; } - +function pickSpacingMode( + rng: () => number, + weights: SpacingModeWeights, +): SpacingMode { + const entries: ReadonlyArray = [ + ["required", weights.required], + ["optional", weights.optional], + ["none", weights.none], + ["auto", weights.auto], + ]; + // Fall back to uniform if all weights are 0. + return weightedPick(rng, entries) ?? pick(rng, SPACING_MODES); +} // ── Internal state while building a single grammar ──────────────────────────── type RuleState = { @@ -280,8 +412,13 @@ export function buildRandomGrammar( } // Build value expression for this alternative if enabled. + // `features.values.attachProb` is the per-alternate attach + // probability (only eligible when captures exist). let valueText = ""; - if (features.values && altBoundVars.length > 0 && rng() < 0.7) { + if ( + altBoundVars.length > 0 && + rng() < clamp01(features.values.attachProb) + ) { const expr = buildValueExpr(rng, altBoundVars); valueText = ` -> ${expr}`; state.hasValue = true; @@ -298,8 +435,8 @@ export function buildRandomGrammar( // Per-alternate spacing annotation. let spacingText = ""; - if (features.spacingModes && rng() < 0.3) { - const mode = pick(rng, SPACING_MODES); + if (rng() < clamp01(features.spacing.altProb)) { + const mode = pickSpacingMode(rng, features.spacing.modes); spacingText = spacingAnnotation(mode); } @@ -321,8 +458,10 @@ export function buildRandomGrammar( const state = ruleStates[i]; // Rule-level spacing annotation. let ruleSpacing = ""; - if (features.spacingModes && rng() < 0.4) { - ruleSpacing = spacingAnnotation(pick(rng, SPACING_MODES)); + if (rng() < clamp01(features.spacing.ruleProb)) { + ruleSpacing = spacingAnnotation( + pickSpacingMode(rng, features.spacing.modes), + ); } lines.push( `<${ruleName(i)}>${ruleSpacing} = ${state.altTexts.join(" | ")};`, @@ -363,20 +502,18 @@ function choosePartKind( ruleIndex: number, ruleCount: number, ): PartKind { - const candidates: PartKind[] = []; - - // Literals are always a candidate when enabled (and always the fallback). - if (features.literals) candidates.push("literal"); - // Rule refs only when there's a forward rule available. - if (features.ruleRefs && ruleIndex + 1 < ruleCount) - candidates.push("ruleRef"); - if (features.wildcards) candidates.push("wildcard"); - if (features.numbers) candidates.push("number"); - - // Fallback: if nothing else is available, emit a literal. - if (candidates.length === 0) return "literal"; - - return pick(rng, candidates); + // ruleRef requires a forward rule to point at; otherwise force its + // weight to 0. Other kinds are always available. + const ruleRefAvailable = ruleIndex + 1 < ruleCount; + const kinds = features.partKinds; + const entries: ReadonlyArray = [ + ["literal", kinds.literal], + ["ruleRef", ruleRefAvailable ? kinds.ruleRef : 0], + ["wildcard", kinds.wildcard], + ["number", kinds.number], + ]; + // Fallback to literal if no kind has positive weight in this slot. + return weightedPick(rng, entries) ?? "literal"; } // ── Random input generator ──────────────────────────────────────────────────── diff --git a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts index 9a766d1e3..df98de35a 100644 --- a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts +++ b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts @@ -22,6 +22,36 @@ import { // ── Helpers ─────────────────────────────────────────────────────────────────── +/** + * Deep-merge a partial feature override over the defaults. Each + * sub-group (`partKinds`, `values`, `spacing`, `groups`) is merged + * independently, with `spacing.modes` merged one level deeper. + */ +type FeaturesOverride = { + partKinds?: Partial; + values?: Partial; + spacing?: Partial> & { + modes?: Partial; + }; + groups?: Partial; +}; + +function mergeFeatures( + base: FuzzConfig["features"], + over: FeaturesOverride | undefined, +): FuzzConfig["features"] { + return { + partKinds: { ...base.partKinds, ...(over?.partKinds ?? {}) }, + values: { ...base.values, ...(over?.values ?? {}) }, + spacing: { + ...base.spacing, + ...(over?.spacing ?? {}), + modes: { ...base.spacing.modes, ...(over?.spacing?.modes ?? {}) }, + }, + groups: { ...base.groups, ...(over?.groups ?? {}) }, + }; +} + /** * Run the harness and emit one `it()` per result so Jest reports * individual grammar/input failures. @@ -29,7 +59,7 @@ import { function fuzzDescribe( name: string, configOverrides: Omit, "features" | "generator"> & { - features?: Partial; + features?: FeaturesOverride; generator?: Partial; }, ): void { @@ -38,10 +68,10 @@ function fuzzDescribe( const config: FuzzConfig = { ...DEFAULT_CONFIG, ...configOverrides, - features: { - ...DEFAULT_CONFIG.features, - ...(configOverrides.features ?? {}), - }, + features: mergeFeatures( + DEFAULT_CONFIG.features, + configOverrides.features, + ), generator: { ...DEFAULT_CONFIG.generator, ...(configOverrides.generator ?? {}), @@ -90,8 +120,7 @@ fuzzDescribe("Fuzz: optimizer equivalence (literals + ruleRefs)", { count: 40, inputsPerGrammar: 6, features: { - literals: true, - ruleRefs: true, + partKinds: { literal: 1, ruleRef: 1 }, }, generator: { maxRules: 4, @@ -106,11 +135,8 @@ fuzzDescribe("Fuzz: optimizer equivalence (wildcards + values)", { seed: 0xf0221, count: 30, features: { - literals: true, - ruleRefs: true, - wildcards: true, - numbers: true, - values: true, + partKinds: { literal: 1, ruleRef: 1, wildcard: 1, number: 1 }, + values: { attachProb: 0.7 }, }, validations: ["optimizer"], }); @@ -119,11 +145,8 @@ fuzzDescribe("Fuzz: parse-write round-trip", { seed: 0xf0222, count: 30, features: { - literals: true, - ruleRefs: true, - wildcards: true, - numbers: true, - values: true, + partKinds: { literal: 1, ruleRef: 1, wildcard: 1, number: 1 }, + values: { attachProb: 0.7 }, }, validations: ["roundtrip-text"], }); @@ -132,9 +155,8 @@ fuzzDescribe("Fuzz: spacing modes (optimizer equivalence)", { seed: 0xf0223, count: 30, features: { - literals: true, - ruleRefs: true, - spacingModes: true, + partKinds: { literal: 1, ruleRef: 1 }, + spacing: { altProb: 0.3, ruleProb: 0.4 }, }, validations: ["optimizer"], }); @@ -143,10 +165,7 @@ fuzzDescribe("Fuzz: JSON serialization round-trip", { seed: 0xf0224, count: 30, features: { - literals: true, - ruleRefs: true, - wildcards: true, - numbers: true, + partKinds: { literal: 1, ruleRef: 1, wildcard: 1, number: 1 }, }, validations: ["roundtrip-json"], }); From daa77ce62592b3c22418cfb19a58d8d45801a670 Mon Sep 17 00:00:00 2001 From: Curtis Man Date: Mon, 27 Apr 2026 10:48:16 -0700 Subject: [PATCH 2/4] actionGrammar(fuzz): implement optional and repeat group quantifiers The groups.optionalProb and groups.repeatProb feature knobs are now honored. For each emitted part, two independent rolls determine whether the part is wrapped in a quantifier group: - neither: bare part - optional only: (part)? - repeat only: (part)+ - both: (part)* The matching input emits the inner expansion exactly once, which satisfies all three quantifier forms. Quantifiers are applied uniformly across literals, rule references, wildcards, and numbers. Add two new fuzz dimensions: - Fuzz: optional / repeat groups (optimizer equivalence) - Fuzz: optional / repeat groups (parse-write round-trip) 900/900 fuzz checks pass. --- .../src/fuzz/grammarGenerator.ts | 56 ++++++++++++++----- .../actionGrammar/test/grammarFuzz.spec.ts | 20 +++++++ 2 files changed, 62 insertions(+), 14 deletions(-) diff --git a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts index 6a8da9355..c45c27b3c 100644 --- a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts +++ b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts @@ -96,14 +96,24 @@ export type SpacingFeatures = { }; /** - * Knobs for optional / repeat groups around parts. NOT YET - * IMPLEMENTED: accepted for forward compatibility but ignored by the - * generator. + * Knobs for optional / repeat groups around individual parts. + * + * For each emitted part, the generator independently rolls + * `optionalProb` and `repeatProb`; the combination determines the + * quantifier wrapped around the part text: + * + * - neither: bare part (no group) + * - optional only: `(part)?` + * - repeat only: `(part)+` + * - both: `(part)*` + * + * The matching input always emits the inner expansion exactly once, + * which satisfies all three quantifier forms. */ export type GroupFeatures = { - /** Probability per part of being wrapped in `(...)?`. */ + /** Probability per part of being wrapped in an optional group. */ optionalProb: number; - /** Probability per part of being wrapped in `()*` / `()+`. */ + /** Probability per part of being wrapped in a repeat group. */ repeatProb: number; }; @@ -124,7 +134,7 @@ export type FuzzFeatureFlags = { values: ValueFeatures; /** `[spacing=...]` annotations and which modes to pick. */ spacing: SpacingFeatures; - /** Optional / repeat groups around parts. NOT YET IMPLEMENTED. */ + /** Optional / repeat group quantifiers around individual parts. */ groups: GroupFeatures; }; @@ -381,34 +391,52 @@ export function buildRandomGrammar( for (let p = 0; p < partCount; p++) { const partKind = choosePartKind(rng, features, i, ruleCount); + let innerText: string; + let innerMatch: string[]; switch (partKind) { case "literal": { const lit = buildLiteralPart(rng, words); - partTexts.push(lit.text); - partMatch.push(...lit.matchTokens); + innerText = lit.text; + innerMatch = lit.matchTokens; break; } case "ruleRef": { const target = intInRange(rng, i + 1, ruleCount - 1); - partTexts.push(`<${ruleName(target)}>`); - partMatch.push(...ruleStates[target].firstAltMatch); + innerText = `<${ruleName(target)}>`; + innerMatch = ruleStates[target].firstAltMatch; break; } case "wildcard": { const wc = buildWildcardPart(rng, varCounter, words); - partTexts.push(wc.text); - partMatch.push(...wc.matchTokens); + innerText = wc.text; + innerMatch = wc.matchTokens; altBoundVars.push(wc.varName); break; } case "number": { const np = buildNumberPart(rng, varCounter); - partTexts.push(np.text); - partMatch.push(...np.matchTokens); + innerText = np.text; + innerMatch = np.matchTokens; altBoundVars.push(np.varName); break; } } + + // Optionally wrap the part in an optional / repeat + // group. The two probabilities are rolled + // independently and combined into a single quantifier. + const optional = rng() < clamp01(features.groups.optionalProb); + const repeat = rng() < clamp01(features.groups.repeatProb); + let partText = innerText; + if (optional && repeat) partText = `(${innerText})*`; + else if (optional) partText = `(${innerText})?`; + else if (repeat) partText = `(${innerText})+`; + + partTexts.push(partText); + // The matching input includes the inner expansion + // exactly once: this satisfies `?` (present), `+` + // (one repetition), and `*` (one repetition) alike. + partMatch.push(...innerMatch); } // Build value expression for this alternative if enabled. diff --git a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts index df98de35a..59998e3b3 100644 --- a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts +++ b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts @@ -169,3 +169,23 @@ fuzzDescribe("Fuzz: JSON serialization round-trip", { }, validations: ["roundtrip-json"], }); + +fuzzDescribe("Fuzz: optional / repeat groups (optimizer equivalence)", { + seed: 0xf0225, + count: 30, + features: { + partKinds: { literal: 1, ruleRef: 1 }, + groups: { optionalProb: 0.4, repeatProb: 0.3 }, + }, + validations: ["optimizer"], +}); + +fuzzDescribe("Fuzz: optional / repeat groups (parse-write round-trip)", { + seed: 0xf0226, + count: 30, + features: { + partKinds: { literal: 1, ruleRef: 1, wildcard: 1, number: 1 }, + groups: { optionalProb: 0.3, repeatProb: 0.3 }, + }, + validations: ["roundtrip-text", "roundtrip-json"], +}); From a7af3cf7899c043a9223e62d24fd29fad91dfe86 Mon Sep 17 00:00:00 2001 From: Curtis Man Date: Mon, 27 Apr 2026 10:56:45 -0700 Subject: [PATCH 3/4] actionGrammar(fuzz): broaden DEFAULT_FEATURES; add MINIMAL_FEATURES; fix capture scoping in quantifier groups DEFAULT_FEATURES now exercises every feature group out of the box so a caller using the defaults (including the CLI with no --features) gets a representative sweep across part kinds, value expressions, spacing, and quantifier groups. Literals stay weighted 2x to keep them dominant. Add MINIMAL_FEATURES export for callers that want the previous conservative baseline (only literals + rule references; every probability 0). The per-dimension specs now merge over MINIMAL_FEATURES so each test stays isolated to the dimension under test. Generator bug fix: variable captures (wildcard, number) wrapped in a quantifier group ((part)?, (part)+, (part)*) are no longer added to the alternate's bound-vars list, since those captures are not in scope for the alternate's value expression. Without the fix, the broader defaults exposed cases where value expressions referenced n5/v2/etc. that the matcher rejected as undefined. 900/900 fuzz spec checks pass; 3200/3200 CLI checks pass across two seeds with the new defaults. --- .../actionGrammar/src/fuzz/fuzzHarness.ts | 1 + .../src/fuzz/grammarGenerator.ts | 56 ++++++++++++++++++- .../actionGrammar/test/grammarFuzz.spec.ts | 11 ++-- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts index a67ef4c7b..7204c1aab 100644 --- a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts +++ b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts @@ -404,6 +404,7 @@ export { makeRng, pick, intInRange, generateExtraInputs }; export type { FuzzFeatureFlags, GeneratorConfig, GeneratedGrammar }; export { DEFAULT_FEATURES, + MINIMAL_FEATURES, DEFAULT_GENERATOR_CONFIG, buildRandomGrammar, } from "./grammarGenerator.js"; diff --git a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts index c45c27b3c..3e144eaf9 100644 --- a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts +++ b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts @@ -138,7 +138,50 @@ export type FuzzFeatureFlags = { groups: GroupFeatures; }; +/** + * Broad-coverage defaults for the fuzz generator. Every feature + * group is exercised so a caller who passes `DEFAULT_FEATURES` (or + * runs the CLI with no `--features`) gets a representative sweep + * across part kinds, value expressions, spacing, and quantifier + * groups. Literals are weighted 2x to keep them dominant since they + * are the cheap, always-valid baseline. + * + * For a minimum-coverage baseline (only literals + rule refs) use + * {@link MINIMAL_FEATURES}. + */ export const DEFAULT_FEATURES: FuzzFeatureFlags = { + partKinds: { + literal: 2, + ruleRef: 1, + wildcard: 1, + number: 1, + }, + values: { + attachProb: 0.5, + }, + spacing: { + altProb: 0.2, + ruleProb: 0.2, + modes: { + required: 1, + optional: 1, + none: 1, + auto: 1, + }, + }, + groups: { + optionalProb: 0.2, + repeatProb: 0.2, + }, +}; + +/** + * Minimum-coverage feature set: only literals and rule references are + * enabled, every probability is 0. Useful for narrow regression + * checks or as a starting point for callers that want to enable a + * single dimension at a time. + */ +export const MINIMAL_FEATURES: FuzzFeatureFlags = { partKinds: { literal: 1, ruleRef: 1, @@ -393,6 +436,7 @@ export function buildRandomGrammar( let innerText: string; let innerMatch: string[]; + let captureVar: string | undefined; switch (partKind) { case "literal": { const lit = buildLiteralPart(rng, words); @@ -410,14 +454,14 @@ export function buildRandomGrammar( const wc = buildWildcardPart(rng, varCounter, words); innerText = wc.text; innerMatch = wc.matchTokens; - altBoundVars.push(wc.varName); + captureVar = wc.varName; break; } case "number": { const np = buildNumberPart(rng, varCounter); innerText = np.text; innerMatch = np.matchTokens; - altBoundVars.push(np.varName); + captureVar = np.varName; break; } } @@ -432,6 +476,14 @@ export function buildRandomGrammar( else if (optional) partText = `(${innerText})?`; else if (repeat) partText = `(${innerText})+`; + // Captures inside a quantifier group are not visible to + // the alternate's value expression (they're either + // optional or aggregated), so only expose unwrapped + // captures as bound variables. + if (captureVar !== undefined && !optional && !repeat) { + altBoundVars.push(captureVar); + } + partTexts.push(partText); // The matching input includes the inner expansion // exactly once: this satisfies `?` (present), `+` diff --git a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts index 59998e3b3..ae40c8de2 100644 --- a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts +++ b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts @@ -18,6 +18,7 @@ import { type FuzzConfig, type FuzzResult, DEFAULT_CONFIG, + MINIMAL_FEATURES, } from "../src/fuzz/fuzzHarness.js"; // ── Helpers ─────────────────────────────────────────────────────────────────── @@ -64,14 +65,14 @@ function fuzzDescribe( }, ): void { describe(name, () => { - // Merge config. + // Merge config. Per-dimension tests intentionally start from + // MINIMAL_FEATURES (only literals + ruleRefs enabled) so they + // isolate the dimension under test rather than inheriting the + // broad-coverage defaults. const config: FuzzConfig = { ...DEFAULT_CONFIG, ...configOverrides, - features: mergeFeatures( - DEFAULT_CONFIG.features, - configOverrides.features, - ), + features: mergeFeatures(MINIMAL_FEATURES, configOverrides.features), generator: { ...DEFAULT_CONFIG.generator, ...(configOverrides.generator ?? {}), From 2f426a2f80fa39a82ee72f552693345297f1c078 Mon Sep 17 00:00:00 2001 From: Curtis Man Date: Mon, 27 Apr 2026 11:36:41 -0700 Subject: [PATCH 4/4] actionGrammar(fuzz): consolidate feature schema, multi-rep matching, picker tests Address review feedback on the grouped feature-flag refactor. grammarGenerator.ts: - Add FEATURE_FIELDS descriptor table as the single source of truth for the FuzzFeatureFlags schema (path + get + set per slot). The CLI parser, summary printer, and zero-out helper now derive from it instead of duplicating the field list. - Decouple groups.*Prob from values.attachProb: capture-bearing parts (wildcards, numbers) are never wrapped in quantifier groups, so every capture stays exposed to its alternate's value expression. - Implement true multi-rep matching for quantifier groups: '?' emits 0..1 inner copies, '+' emits 1..3, '*' emits 0..2. The previous always-one-copy behavior didn't actually exercise multi-rep semantics. - pickSpacingMode now returns undefined when every mode weight is 0 (callers skip the annotation rather than silently fall back to a uniform pick). - weightedPick: drop dead trailing fallback loop; track lastPositive in the first pass and return it on the rare floating-point edge. - Hoist clamp01() calls out of the inner part/alt/rule loops. - Export weightedPick, pickSpacingMode, clamp01 for unit testing. fuzzHarness.ts: - Add mergeFeatures() + FeaturesOverride and reuse them from the spec. - Add zeroAllFeatures() and featureEntries() derived from FEATURE_FIELDS. - Re-export the new symbols (FEATURE_FIELDS, weightedPick, pickSpacingMode, clamp01, FeatureFieldDescriptor). fuzzRunner.ts: - FEATURE_PATHS is now Object.fromEntries(FEATURE_FIELDS...) so adding a knob in one place propagates everywhere. - Drop the [NYI] marker on groups.{optionalProb,repeatProb}. - Error message lists canonical camelCase paths. - Help text adds an example clarifying that partKinds.literal stays at 1 as the fallback when --features is given. Tests: - grammarFuzz.spec.ts uses the harness's mergeFeatures + FeaturesOverride. - New grammarFuzzPicker.spec.ts: 8 unit tests covering weightedPick (zero/negative weights, ratio fidelity, uniform, zero-weight exclusion), pickSpacingMode (undefined on all-zero, exclusion, weight bias), and clamp01. 908/908 fuzz checks pass; full actionGrammar suite: 3636 passed. --- .../actionGrammar/src/fuzz/fuzzHarness.ts | 55 +++++ .../actionGrammar/src/fuzz/fuzzRunner.ts | 64 ++--- .../src/fuzz/grammarGenerator.ts | 226 ++++++++++++++---- .../actionGrammar/test/grammarFuzz.spec.ts | 34 +-- .../test/grammarFuzzPicker.spec.ts | 143 +++++++++++ 5 files changed, 405 insertions(+), 117 deletions(-) create mode 100644 ts/packages/actionGrammar/test/grammarFuzzPicker.spec.ts diff --git a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts index 7204c1aab..d2ebacfa5 100644 --- a/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts +++ b/ts/packages/actionGrammar/src/fuzz/fuzzHarness.ts @@ -27,6 +27,7 @@ import { generateExtraInputs, DEFAULT_FEATURES, DEFAULT_GENERATOR_CONFIG, + FEATURE_FIELDS, type FuzzFeatureFlags, type GeneratorConfig, type GeneratedGrammar, @@ -85,6 +86,55 @@ export function cloneFeatures(f: FuzzFeatureFlags): FuzzFeatureFlags { }; } +/** Reset every numeric field in `f` to 0 in place. */ +export function zeroAllFeatures(f: FuzzFeatureFlags): void { + for (const field of FEATURE_FIELDS) field.set(f, 0); +} + +/** Iterate `(path, value)` pairs in canonical order for diagnostic output. */ +export function* featureEntries( + f: FuzzFeatureFlags, +): Iterable { + for (const field of FEATURE_FIELDS) { + yield [field.path, field.get(f)]; + } +} + +/** + * Nested partial override of a {@link FuzzFeatureFlags} record, used + * by tests and other callers that prefer a structural literal to the + * dotted-path setter API. + */ +export type FeaturesOverride = { + partKinds?: Partial; + values?: Partial; + spacing?: Partial> & { + modes?: Partial; + }; + groups?: Partial; +}; + +/** + * Deep-merge a {@link FeaturesOverride} on top of `base`, returning a + * fresh record. Each sub-group is merged independently; + * `spacing.modes` is merged one level deeper. + */ +export function mergeFeatures( + base: FuzzFeatureFlags, + over: FeaturesOverride | undefined, +): FuzzFeatureFlags { + return { + partKinds: { ...base.partKinds, ...(over?.partKinds ?? {}) }, + values: { ...base.values, ...(over?.values ?? {}) }, + spacing: { + ...base.spacing, + ...(over?.spacing ?? {}), + modes: { ...base.spacing.modes, ...(over?.spacing?.modes ?? {}) }, + }, + groups: { ...base.groups, ...(over?.groups ?? {}) }, + }; +} + export type FuzzResult = { grammarIndex: number; grammarText: string; @@ -406,5 +456,10 @@ export { DEFAULT_FEATURES, MINIMAL_FEATURES, DEFAULT_GENERATOR_CONFIG, + FEATURE_FIELDS, buildRandomGrammar, + weightedPick, + pickSpacingMode, + clamp01, } from "./grammarGenerator.js"; +export type { FeatureFieldDescriptor } from "./grammarGenerator.js"; diff --git a/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts b/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts index f49b536e4..11fa983ff 100644 --- a/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts +++ b/ts/packages/actionGrammar/src/fuzz/fuzzRunner.ts @@ -33,6 +33,9 @@ import { runFuzz, DEFAULT_CONFIG, cloneFeatures, + zeroAllFeatures, + featureEntries, + FEATURE_FIELDS, validateOptimizerEquivalence, validateTextRoundTrip, validateJsonRoundTrip, @@ -64,13 +67,16 @@ function printUsage(): void { " values.attachProb", " spacing.{altProb,ruleProb}", " spacing.modes.{required,optional,none,auto}", - " groups.{optionalProb,repeatProb} [NYI]", + " groups.{optionalProb,repeatProb}", " Fields named `*Prob` are probabilities in", " [0,1]; other numeric fields are relative", " weights for a weighted random pick. When", " --features is given, all weights/probs reset", " to 0 first; partKinds.literal stays at 1 as", - " the fallback part kind unless overridden.", + " the fallback part kind unless overridden", + " (e.g. `--features partKinds.wildcard=5`", + " leaves literal=1, so wildcards are 5x as", + " common as literals).", " --validation Comma-separated validations (default: all)", " Options: optimizer, roundtrip-text, roundtrip-json", " --depth Max rules / nesting depth (default: 4)", @@ -93,49 +99,17 @@ function printUsage(): void { console.log(lines.join("\n")); } -// Dotted-path setters into FuzzFeatureFlags. Each entry maps a -// canonical lower-case path (e.g. `partkinds.wildcard`) to a setter -// that writes the numeric value into the right slot of the record. +// Dotted-path setters into FuzzFeatureFlags, derived from the +// canonical FEATURE_FIELDS table in grammarGenerator.ts. Keys are +// lower-cased so CLI lookup is case-insensitive; canonical (camelCase) +// paths are kept around for human-readable diagnostics. type FeatureSetter = (f: FuzzFeatureFlags, value: number) => void; -const FEATURE_PATHS: Record = { - "partkinds.literal": (f, v) => (f.partKinds.literal = v), - "partkinds.ruleref": (f, v) => (f.partKinds.ruleRef = v), - "partkinds.wildcard": (f, v) => (f.partKinds.wildcard = v), - "partkinds.number": (f, v) => (f.partKinds.number = v), - "values.attachprob": (f, v) => (f.values.attachProb = v), - "spacing.altprob": (f, v) => (f.spacing.altProb = v), - "spacing.ruleprob": (f, v) => (f.spacing.ruleProb = v), - "spacing.modes.required": (f, v) => (f.spacing.modes.required = v), - "spacing.modes.optional": (f, v) => (f.spacing.modes.optional = v), - "spacing.modes.none": (f, v) => (f.spacing.modes.none = v), - "spacing.modes.auto": (f, v) => (f.spacing.modes.auto = v), - "groups.optionalprob": (f, v) => (f.groups.optionalProb = v), - "groups.repeatprob": (f, v) => (f.groups.repeatProb = v), -}; - -/** Reset every feature value to 0 (used before applying --features). */ -function zeroAllFeatures(f: FuzzFeatureFlags): void { - for (const setter of Object.values(FEATURE_PATHS)) setter(f, 0); -} - -/** Iterate (path, value) pairs for diagnostic / summary printing. */ -function* featureEntries( - f: FuzzFeatureFlags, -): Iterable { - yield ["partKinds.literal", f.partKinds.literal]; - yield ["partKinds.ruleRef", f.partKinds.ruleRef]; - yield ["partKinds.wildcard", f.partKinds.wildcard]; - yield ["partKinds.number", f.partKinds.number]; - yield ["values.attachProb", f.values.attachProb]; - yield ["spacing.altProb", f.spacing.altProb]; - yield ["spacing.ruleProb", f.spacing.ruleProb]; - yield ["spacing.modes.required", f.spacing.modes.required]; - yield ["spacing.modes.optional", f.spacing.modes.optional]; - yield ["spacing.modes.none", f.spacing.modes.none]; - yield ["spacing.modes.auto", f.spacing.modes.auto]; - yield ["groups.optionalProb", f.groups.optionalProb]; - yield ["groups.repeatProb", f.groups.repeatProb]; -} +const FEATURE_PATHS: Record = Object.fromEntries( + FEATURE_FIELDS.map((field) => [field.path.toLowerCase(), field.set]), +); +const CANONICAL_FEATURE_PATHS: readonly string[] = FEATURE_FIELDS.map( + (field) => field.path, +); const VALIDATION_MAP: Record = { optimizer: "optimizer", @@ -225,7 +199,7 @@ function parseArgs(argv: string[]): ParsedArgs { const setter = FEATURE_PATHS[rawPath.trim().toLowerCase()]; if (!setter) { console.error( - `Unknown feature path: ${rawPath.trim()}. Valid paths: ${Object.keys(FEATURE_PATHS).join(", ")}`, + `Unknown feature path: ${rawPath.trim()}. Valid paths: ${CANONICAL_FEATURE_PATHS.join(", ")}`, ); process.exit(1); } diff --git a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts index 3e144eaf9..387b5ba92 100644 --- a/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts +++ b/ts/packages/actionGrammar/src/fuzz/grammarGenerator.ts @@ -207,7 +207,7 @@ export const MINIMAL_FEATURES: FuzzFeatureFlags = { }, }; -function clamp01(x: number): number { +export function clamp01(x: number): number { if (!(x > 0)) return 0; if (x > 1) return 1; return x; @@ -217,13 +217,17 @@ function clamp01(x: number): number { * Weighted pick from `(item, weight)` entries. Negative weights are * treated as 0. Returns `undefined` if all weights are <= 0. */ -function weightedPick( +export function weightedPick( rng: () => number, entries: ReadonlyArray, ): T | undefined { let total = 0; - for (const [, w] of entries) { - if (w > 0) total += w; + let lastPositive: T | undefined; + for (const [item, w] of entries) { + if (w > 0) { + total += w; + lastPositive = item; + } } if (total <= 0) return undefined; let r = rng() * total; @@ -232,13 +236,123 @@ function weightedPick( r -= w; if (r < 0) return item; } - // Numeric edge case: return the last positive-weight item. - for (let i = entries.length - 1; i >= 0; i--) { - if (entries[i][1] > 0) return entries[i][0]; - } - return undefined; + // Floating-point fall-through: by construction `r` should reach + // <0 above, but rounding can leave it at exactly 0 on the last + // entry. Return the last positive-weight item in that case. + return lastPositive; } +// ── Feature field descriptors ───────────────────────────────────────────────── + +/** + * Single source of truth for the {@link FuzzFeatureFlags} schema. + * Each descriptor knows its canonical dotted path and how to read / + * write its slot. All other tables (CLI parser, diagnostic + * summary, zero-out helper) derive from this list. + */ +export type FeatureFieldDescriptor = { + /** Canonical dotted path, e.g. `"partKinds.wildcard"`. */ + readonly path: string; + /** Read the field's current value. */ + readonly get: (f: FuzzFeatureFlags) => number; + /** Write a value into the field. */ + readonly set: (f: FuzzFeatureFlags, value: number) => void; +}; + +export const FEATURE_FIELDS: readonly FeatureFieldDescriptor[] = [ + { + path: "partKinds.literal", + get: (f) => f.partKinds.literal, + set: (f, v) => { + f.partKinds.literal = v; + }, + }, + { + path: "partKinds.ruleRef", + get: (f) => f.partKinds.ruleRef, + set: (f, v) => { + f.partKinds.ruleRef = v; + }, + }, + { + path: "partKinds.wildcard", + get: (f) => f.partKinds.wildcard, + set: (f, v) => { + f.partKinds.wildcard = v; + }, + }, + { + path: "partKinds.number", + get: (f) => f.partKinds.number, + set: (f, v) => { + f.partKinds.number = v; + }, + }, + { + path: "values.attachProb", + get: (f) => f.values.attachProb, + set: (f, v) => { + f.values.attachProb = v; + }, + }, + { + path: "spacing.altProb", + get: (f) => f.spacing.altProb, + set: (f, v) => { + f.spacing.altProb = v; + }, + }, + { + path: "spacing.ruleProb", + get: (f) => f.spacing.ruleProb, + set: (f, v) => { + f.spacing.ruleProb = v; + }, + }, + { + path: "spacing.modes.required", + get: (f) => f.spacing.modes.required, + set: (f, v) => { + f.spacing.modes.required = v; + }, + }, + { + path: "spacing.modes.optional", + get: (f) => f.spacing.modes.optional, + set: (f, v) => { + f.spacing.modes.optional = v; + }, + }, + { + path: "spacing.modes.none", + get: (f) => f.spacing.modes.none, + set: (f, v) => { + f.spacing.modes.none = v; + }, + }, + { + path: "spacing.modes.auto", + get: (f) => f.spacing.modes.auto, + set: (f, v) => { + f.spacing.modes.auto = v; + }, + }, + { + path: "groups.optionalProb", + get: (f) => f.groups.optionalProb, + set: (f, v) => { + f.groups.optionalProb = v; + }, + }, + { + path: "groups.repeatProb", + get: (f) => f.groups.repeatProb, + set: (f, v) => { + f.groups.repeatProb = v; + }, + }, +]; + // ── Generation config ───────────────────────────────────────────────────────── export type GeneratorConfig = { @@ -280,19 +394,25 @@ type SpacingMode = (typeof SPACING_MODES)[number]; function spacingAnnotation(mode: SpacingMode): string { return ` [spacing=${mode}]`; } -function pickSpacingMode( + +/** + * Pick a spacing mode by weight. Returns `undefined` if every mode + * weight is `0` so the caller can skip the annotation entirely + * (rather than silently falling back to a uniform pick). + */ +export function pickSpacingMode( rng: () => number, weights: SpacingModeWeights, -): SpacingMode { +): SpacingMode | undefined { const entries: ReadonlyArray = [ ["required", weights.required], ["optional", weights.optional], ["none", weights.none], ["auto", weights.auto], ]; - // Fall back to uniform if all weights are 0. - return weightedPick(rng, entries) ?? pick(rng, SPACING_MODES); + return weightedPick(rng, entries); } + // ── Internal state while building a single grammar ──────────────────────────── type RuleState = { @@ -413,6 +533,13 @@ export function buildRandomGrammar( const varCounter = { n: 0 }; let usesValueExpressions = false; + // Hoist clamped probabilities out of the inner loop. + const optionalProb = clamp01(features.groups.optionalProb); + const repeatProb = clamp01(features.groups.repeatProb); + const valueAttachProb = clamp01(features.values.attachProb); + const altSpacingProb = clamp01(features.spacing.altProb); + const ruleSpacingProb = clamp01(features.spacing.ruleProb); + // Build rules in reverse so rule i can reference rules > i. const ruleStates: RuleState[] = new Array(ruleCount); @@ -469,36 +596,49 @@ export function buildRandomGrammar( // Optionally wrap the part in an optional / repeat // group. The two probabilities are rolled // independently and combined into a single quantifier. - const optional = rng() < clamp01(features.groups.optionalProb); - const repeat = rng() < clamp01(features.groups.repeatProb); + // + // Captures inside a quantifier group are not visible + // to the alternate's value expression (they would be + // optional or aggregated), so we keep capture parts + // unwrapped. This decouples the `groups.*Prob` and + // `values.attachProb` dimensions: every capture stays + // exposed regardless of group rolls. + const canWrap = captureVar === undefined; + const optional = canWrap && rng() < optionalProb; + const repeat = canWrap && rng() < repeatProb; let partText = innerText; - if (optional && repeat) partText = `(${innerText})*`; - else if (optional) partText = `(${innerText})?`; - else if (repeat) partText = `(${innerText})+`; - - // Captures inside a quantifier group are not visible to - // the alternate's value expression (they're either - // optional or aggregated), so only expose unwrapped - // captures as bound variables. - if (captureVar !== undefined && !optional && !repeat) { + let repCount = 1; + if (optional && repeat) { + partText = `(${innerText})*`; + // `*` matches 0..N: emit 0..2 copies. + repCount = intInRange(rng, 0, 2); + } else if (optional) { + partText = `(${innerText})?`; + // `?` matches 0..1. + repCount = intInRange(rng, 0, 1); + } else if (repeat) { + partText = `(${innerText})+`; + // `+` matches 1..N: emit 1..3 copies. + repCount = intInRange(rng, 1, 3); + } + + if (captureVar !== undefined) { altBoundVars.push(captureVar); } partTexts.push(partText); - // The matching input includes the inner expansion - // exactly once: this satisfies `?` (present), `+` - // (one repetition), and `*` (one repetition) alike. - partMatch.push(...innerMatch); + // Replicate the inner expansion `repCount` times to + // exercise multi-rep semantics for `+` and `*` (and + // zero-rep elision for `?` and `*`). + for (let r = 0; r < repCount; r++) + partMatch.push(...innerMatch); } // Build value expression for this alternative if enabled. // `features.values.attachProb` is the per-alternate attach // probability (only eligible when captures exist). let valueText = ""; - if ( - altBoundVars.length > 0 && - rng() < clamp01(features.values.attachProb) - ) { + if (altBoundVars.length > 0 && rng() < valueAttachProb) { const expr = buildValueExpr(rng, altBoundVars); valueText = ` -> ${expr}`; state.hasValue = true; @@ -513,11 +653,15 @@ export function buildRandomGrammar( } } - // Per-alternate spacing annotation. + // Per-alternate spacing annotation. If every mode weight + // is 0 the picker returns undefined and we skip the + // annotation rather than fall back to uniform. let spacingText = ""; - if (rng() < clamp01(features.spacing.altProb)) { + if (rng() < altSpacingProb) { const mode = pickSpacingMode(rng, features.spacing.modes); - spacingText = spacingAnnotation(mode); + if (mode !== undefined) { + spacingText = spacingAnnotation(mode); + } } state.altTexts.push( @@ -536,12 +680,14 @@ export function buildRandomGrammar( const lines: string[] = []; for (let i = ruleCount - 1; i >= 0; i--) { const state = ruleStates[i]; - // Rule-level spacing annotation. + // Rule-level spacing annotation. Skip when every mode weight + // is 0 (see per-alternate annotation above). let ruleSpacing = ""; - if (rng() < clamp01(features.spacing.ruleProb)) { - ruleSpacing = spacingAnnotation( - pickSpacingMode(rng, features.spacing.modes), - ); + if (rng() < ruleSpacingProb) { + const mode = pickSpacingMode(rng, features.spacing.modes); + if (mode !== undefined) { + ruleSpacing = spacingAnnotation(mode); + } } lines.push( `<${ruleName(i)}>${ruleSpacing} = ${state.altTexts.join(" | ")};`, diff --git a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts index ae40c8de2..de62c6f29 100644 --- a/ts/packages/actionGrammar/test/grammarFuzz.spec.ts +++ b/ts/packages/actionGrammar/test/grammarFuzz.spec.ts @@ -15,44 +15,14 @@ import { runFuzz, + mergeFeatures, type FuzzConfig, type FuzzResult, + type FeaturesOverride, DEFAULT_CONFIG, MINIMAL_FEATURES, } from "../src/fuzz/fuzzHarness.js"; -// ── Helpers ─────────────────────────────────────────────────────────────────── - -/** - * Deep-merge a partial feature override over the defaults. Each - * sub-group (`partKinds`, `values`, `spacing`, `groups`) is merged - * independently, with `spacing.modes` merged one level deeper. - */ -type FeaturesOverride = { - partKinds?: Partial; - values?: Partial; - spacing?: Partial> & { - modes?: Partial; - }; - groups?: Partial; -}; - -function mergeFeatures( - base: FuzzConfig["features"], - over: FeaturesOverride | undefined, -): FuzzConfig["features"] { - return { - partKinds: { ...base.partKinds, ...(over?.partKinds ?? {}) }, - values: { ...base.values, ...(over?.values ?? {}) }, - spacing: { - ...base.spacing, - ...(over?.spacing ?? {}), - modes: { ...base.spacing.modes, ...(over?.spacing?.modes ?? {}) }, - }, - groups: { ...base.groups, ...(over?.groups ?? {}) }, - }; -} - /** * Run the harness and emit one `it()` per result so Jest reports * individual grammar/input failures. diff --git a/ts/packages/actionGrammar/test/grammarFuzzPicker.spec.ts b/ts/packages/actionGrammar/test/grammarFuzzPicker.spec.ts new file mode 100644 index 000000000..08280f3ba --- /dev/null +++ b/ts/packages/actionGrammar/test/grammarFuzzPicker.spec.ts @@ -0,0 +1,143 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * Unit tests for the fuzz generator's weighted-selection helpers. + * + * Covers the statistical contract of `weightedPick` and the + * mode-skipping behavior of `pickSpacingMode`, both of which are + * relied on by every other fuzz dimension. + */ + +import { + makeRng, + weightedPick, + pickSpacingMode, + clamp01, +} from "../src/fuzz/fuzzHarness.js"; + +describe("weightedPick", () => { + it("returns undefined when all weights are <= 0", () => { + const rng = makeRng(1); + expect( + weightedPick(rng, [ + ["a", 0], + ["b", 0], + ["c", -1], + ]), + ).toBeUndefined(); + }); + + it("never returns a zero-weight entry", () => { + const rng = makeRng(42); + for (let i = 0; i < 1000; i++) { + const r = weightedPick(rng, [ + ["a", 1], + ["b", 0], + ["c", 1], + ]); + expect(r === "a" || r === "c").toBe(true); + } + }); + + it("approximates the requested weight ratios", () => { + const rng = makeRng(0xc0ffee); + const counts: Record = { a: 0, b: 0, c: 0 }; + const N = 20000; + for (let i = 0; i < N; i++) { + const r = weightedPick(rng, [ + ["a", 8], + ["b", 1], + ["c", 1], + ])!; + counts[r]++; + } + // Expected: a ≈ 80%, b ≈ 10%, c ≈ 10%. Allow ±2% slack. + expect(counts.a / N).toBeGreaterThan(0.78); + expect(counts.a / N).toBeLessThan(0.82); + expect(counts.b / N).toBeGreaterThan(0.08); + expect(counts.b / N).toBeLessThan(0.12); + expect(counts.c / N).toBeGreaterThan(0.08); + expect(counts.c / N).toBeLessThan(0.12); + }); + + it("is uniform when all positive weights are equal", () => { + const rng = makeRng(7); + const counts: Record = { a: 0, b: 0, c: 0, d: 0 }; + const N = 20000; + for (let i = 0; i < N; i++) { + const r = weightedPick(rng, [ + ["a", 1], + ["b", 1], + ["c", 1], + ["d", 1], + ])!; + counts[r]++; + } + for (const k of Object.keys(counts)) { + expect(counts[k] / N).toBeGreaterThan(0.22); + expect(counts[k] / N).toBeLessThan(0.28); + } + }); +}); + +describe("pickSpacingMode", () => { + it("returns undefined when every mode weight is 0", () => { + const rng = makeRng(1); + expect( + pickSpacingMode(rng, { + required: 0, + optional: 0, + none: 0, + auto: 0, + }), + ).toBeUndefined(); + }); + + it("never picks a zeroed mode", () => { + const rng = makeRng(99); + for (let i = 0; i < 500; i++) { + const r = pickSpacingMode(rng, { + required: 1, + optional: 0, + none: 1, + auto: 0, + }); + expect(r === "required" || r === "none").toBe(true); + } + }); + + it("biases toward the heaviest mode", () => { + const rng = makeRng(0xdeadbeef); + const counts: Record = { + required: 0, + optional: 0, + none: 0, + auto: 0, + }; + const N = 10000; + for (let i = 0; i < N; i++) { + const r = pickSpacingMode(rng, { + required: 7, + optional: 1, + none: 1, + auto: 1, + })!; + counts[r]++; + } + // required ≈ 70%, others ≈ 10% each. + expect(counts.required / N).toBeGreaterThan(0.66); + expect(counts.required / N).toBeLessThan(0.74); + }); +}); + +describe("clamp01", () => { + it("clamps below 0 and above 1", () => { + expect(clamp01(-1)).toBe(0); + expect(clamp01(0)).toBe(0); + expect(clamp01(0.5)).toBe(0.5); + expect(clamp01(1)).toBe(1); + expect(clamp01(2)).toBe(1); + expect(clamp01(NaN)).toBe(0); + }); +});