From ab7bdc3a648f3a2b315f77ee3e0a2fd74c00b978 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Wed, 22 Apr 2026 23:53:41 +0200 Subject: [PATCH] feat(sexpr): count-compare lowering + matches parse-time regex check + doc clarifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three followups from the v0.4.3 sexpr audit: 1. `(> (count ) N)` now lowers to a new `CountCompare` expr variant that evaluates the count against the store once and compares to an integer threshold. Previously the audit documented `(count ...)` as "meant for numeric comparisons" but no lowering existed — you could only use it as a standalone predicate (equivalent to `(exists true)`). Now every comparison operator (>, <, >=, <=, =, !=) accepts a `(count ...)` LHS with an integer RHS. 2. `(matches "")` validates the regex at lower time instead of silently returning false at runtime on malformed patterns. Closes the "mysterious empty result" footgun the audit flagged — users typing `(matches id "[")` used to see nothing match and spend time debugging; now they get a parse error with the compiler's message. Non-literal patterns (rare; from field interpolation) still use the runtime-lenient path. 3. docs/getting-started.md gains dedicated sections for count comparisons and regex validation, plus a note that dotted accessors like `links.satisfies.target` are not supported — use the purpose-built `linked-by` / `linked-from` / `linked-to` / `links-count` predicates. Closes the doc drift where the filter language's scope was implicit. Regression tests: - count_compare_gt_threshold — basic shape lowers - count_compare_requires_integer_rhs — string on the right errors - count_compare_all_six_operators_lower — all six comparison ops - matches_rejects_invalid_regex_at_lower_time — unclosed brackets produce parse error - matches_accepts_valid_regex — good patterns still work Updated sexpr_fuzz.rs expr_to_sexpr pretty-printer to handle the new Expr::CountCompare variant (fuzz roundtrip stays equivalent). Updated sexpr_predicate_matrix.rs test that pinned the old lenient regex behavior to the new strict behavior. Implements: REQ-004 Refs: DD-058 Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/getting-started.md | 38 ++++ rivet-core/src/sexpr_eval.rs | 203 +++++++++++++++++++++ rivet-core/tests/sexpr_fuzz.rs | 11 ++ rivet-core/tests/sexpr_predicate_matrix.rs | 16 +- 4 files changed, 265 insertions(+), 3 deletions(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index 9a443ea..04ffe48 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -777,6 +777,44 @@ Quantifiers: `forall`, `exists`, `count`. Graph: `reachable-from`, `reachable-to`. +### Count comparisons + +`(count )` as a standalone form matches artifacts that exist in +the scope (equivalent to `(exists true)`). Wrapped in a +comparison, it counts artifacts and compares to a threshold: + +```bash +# At least one failing test? +rivet list --filter '(> (count (and (= type "test") (= status "failed"))) 0)' + +# Exactly three approved requirements with the safety tag? +rivet list --filter '(= (count (and (= type "requirement") (= status "approved") (has-tag "safety"))) 3)' +``` + +All six operators (`>`, `<`, `>=`, `<=`, `=`, `!=`) accept `(count …)` on +the left and an integer literal on the right. Any other shape produces +a parse error at lower time — no silent match failures. + +### Regex patterns in `matches` + +`(matches "")` validates the regex **at parse time**. An +invalid pattern produces an error with the compiler's complaint, not a +silent empty result. Doubled backslashes are needed inside the s-expr +string (`"\\d+"` not `"\d+"`): + +```bash +rivet list --filter '(matches id "^REQ-\\d+$")' # OK +rivet list --filter '(matches id "[unclosed")' # parse error with clear message +``` + +### Field accessors + +Only single-name field accessors are supported today. Dotted forms like +`links.satisfies.target` parse as a single symbol and currently resolve +to the empty string — they do not navigate nested structure. To filter +on links, use the purpose-built predicates (`linked-by`, `linked-from`, +`linked-to`, `links-count`) rather than field-path navigation. + --- ## Variant Management (Product Line Engineering) diff --git a/rivet-core/src/sexpr_eval.rs b/rivet-core/src/sexpr_eval.rs index 47e49cb..450a2b8 100644 --- a/rivet-core/src/sexpr_eval.rs +++ b/rivet-core/src/sexpr_eval.rs @@ -74,6 +74,12 @@ pub enum Expr { Exists(Box, Box), /// `(count )` — number of artifacts matching scope (compared via parent). Count(Box), + /// `(> (count ) N)` and friends — count-compared-to-threshold. + /// Created by the lowering path when a comparison's left operand is a + /// `(count ...)` form. Evaluates the count once and compares it to the + /// threshold — so authors can finally write + /// `(> (count (and (= type "test") (= status "passed"))) 10)`. + CountCompare(Box, CompOp, i64), // ── Graph traversal ───────────────────────────────────────────── /// `(reachable-from "REQ-001" "satisfies")` — true if current artifact is @@ -269,6 +275,34 @@ pub fn check(expr: &Expr, ctx: &EvalContext) -> bool { check(_scope, &scope_ctx) }) } + Expr::CountCompare(scope, op, threshold) => { + // Count artifacts matching the scope, compare to threshold. + // Requires store access; returns false when the filter is + // being evaluated without a store (e.g. single-artifact + // contexts) — matching the same shape as Forall / Exists. + let Some(store) = ctx.store else { + return false; + }; + let count: i64 = store + .iter() + .filter(|a| { + let scope_ctx = EvalContext { + artifact: a, + graph: ctx.graph, + store: ctx.store, + }; + check(scope, &scope_ctx) + }) + .count() as i64; + match op { + CompOp::Gt => count > *threshold, + CompOp::Lt => count < *threshold, + CompOp::Ge => count >= *threshold, + CompOp::Le => count <= *threshold, + CompOp::Eq => count == *threshold, + CompOp::Ne => count != *threshold, + } + } // Graph traversal Expr::ReachableFrom(start_id, link_type) => { @@ -662,6 +696,38 @@ fn lower_list(node: &crate::sexpr::SyntaxNode, errors: &mut Vec) -> }); return None; } + + // Special case: `(> (count ) N)` and friends. + // Detect a `(count ...)` form on the left, integer literal on + // the right, and lower to `CountCompare` so the count is + // evaluated over the store and compared to the threshold. + // This closes the audit-flagged gap where `count` had no + // usable lowering as a numeric operand. + if let Some(scope_expr) = try_extract_count_scope(&args[0], errors) { + let threshold = match extract_integer_literal(&args[1]) { + Some(n) => n, + None => { + errors.push(LowerError { + offset, + message: format!( + "'{form_name}' with '(count ...)' on the left requires an integer on the right — `(count ...)` counts artifacts and must be compared to a number" + ), + }); + return None; + } + }; + let op = match form_name.as_str() { + "=" => CompOp::Eq, + "!=" => CompOp::Ne, + ">" => CompOp::Gt, + "<" => CompOp::Lt, + ">=" => CompOp::Ge, + "<=" => CompOp::Le, + _ => unreachable!(), + }; + return Some(Expr::CountCompare(Box::new(scope_expr), op, threshold)); + } + let acc = extract_accessor(&args[0])?; let val = extract_value(&args[1])?; Some(match form_name.as_str() { @@ -719,6 +785,27 @@ fn lower_list(node: &crate::sexpr::SyntaxNode, errors: &mut Vec) -> } let acc = extract_accessor(&args[0])?; let val = extract_value(&args[1])?; + // Validate the regex at lower time when the pattern is a + // string literal. Without this, an invalid pattern would + // silently match nothing at runtime (consistent with our + // lenient filter semantics, but users hit "mysterious empty + // results" before the audit surfaced this). Non-literal + // patterns (rare; they'd come from field interpolation) + // retain the runtime-lenient behaviour. + if let Value::Str(ref pattern) = val { + if let Err(e) = regex::RegexBuilder::new(pattern) + .size_limit(1 << 20) + .build() + { + errors.push(LowerError { + offset, + message: format!( + "'matches' regex pattern does not compile: {e}" + ), + }); + return None; + } + } Some(Expr::Matches(acc, val)) } "contains" => { @@ -979,6 +1066,55 @@ fn extract_value(node: &crate::sexpr::SyntaxNode) -> Option { } } +/// If the node is a `(count )` form, lower the inner scope and +/// return the lowered Expr. Returns None if the node is not a list whose +/// head symbol is exactly "count" (other lists fall through to the normal +/// accessor extraction path). Errors during scope lowering are pushed +/// into the same error accumulator the caller is using. +fn try_extract_count_scope( + node: &crate::sexpr::SyntaxNode, + errors: &mut Vec, +) -> Option { + use crate::sexpr::SyntaxKind as SK; + if node.kind() != SK::List { + return None; + } + let children: Vec<_> = node + .children() + .filter(|c| matches!(c.kind(), SK::List | SK::Atom)) + .collect(); + if children.len() < 2 { + return None; + } + let head = extract_symbol(&children[0])?; + if head != "count" { + return None; + } + if children.len() != 2 { + errors.push(LowerError { + offset: node.text_range().start().into(), + message: "'count' requires exactly 1 argument (scope)".into(), + }); + return None; + } + lower_child(&children[1], errors) +} + +/// Extract an integer literal from an atom node. Returns None for any +/// other shape. Used by the comparison lowering to validate that +/// `(> (count ...) N)` has an actual integer on the right-hand side. +fn extract_integer_literal(node: &crate::sexpr::SyntaxNode) -> Option { + use crate::sexpr::SyntaxKind as SK; + if node.kind() != SK::Atom { + return None; + } + let token = node.first_token()?; + if token.kind() != SK::IntLit { + return None; + } + token.text().parse::().ok() +} + // ── Tests ─────────────────────────────────────────────────────────────── #[cfg(test)] @@ -1202,6 +1338,73 @@ mod tests { assert!(result.is_err()); } + // ── sexpr audit followups — v0.4.3 ────────────────────────────── + + // Followup #1: `(> (count ) N)` now lowers to CountCompare + // and evaluates against the store, so users can finally gate on + // "more than N matching artifacts". + #[test] + #[cfg_attr(miri, ignore)] + fn count_compare_gt_threshold() { + let expr = parse_filter("(> (count (= type \"requirement\")) 0)"); + assert!( + expr.is_ok(), + "(> (count ...) 0) must parse: {:?}", + expr.err() + ); + } + + #[test] + #[cfg_attr(miri, ignore)] + fn count_compare_requires_integer_rhs() { + // Passing a string on the right must error with a clear + // message pointing at the contract. + let expr = parse_filter("(> (count (= type \"test\")) \"ten\")"); + assert!(expr.is_err(), "non-integer RHS must error"); + let msg = format!("{:?}", expr.err().unwrap()); + assert!( + msg.contains("integer") || msg.contains("count"), + "error must mention the integer requirement: {msg}" + ); + } + + #[test] + #[cfg_attr(miri, ignore)] + fn count_compare_all_six_operators_lower() { + for op in &[">", "<", ">=", "<=", "=", "!="] { + let input = format!("({op} (count (= status \"approved\")) 5)"); + let expr = parse_filter(&input); + assert!( + expr.is_ok(), + "{op} must lower with count LHS: {:?}", + expr.err() + ); + } + } + + // Followup #2: invalid regex pattern in (matches) now fails at + // lower time with a clear message, instead of silently matching + // nothing at runtime. + #[test] + #[cfg_attr(miri, ignore)] + fn matches_rejects_invalid_regex_at_lower_time() { + let expr = parse_filter("(matches title \"[unclosed\")"); + assert!(expr.is_err(), "invalid regex must error at lower time"); + let msg = format!("{:?}", expr.err().unwrap()); + assert!( + msg.to_lowercase().contains("regex") + || msg.to_lowercase().contains("compile"), + "error must mention regex/compile: {msg}" + ); + } + + #[test] + #[cfg_attr(miri, ignore)] + fn matches_accepts_valid_regex() { + let expr = parse_filter("(matches title \"^REQ-\\\\d+$\")"); + assert!(expr.is_ok(), "valid regex must parse: {:?}", expr.err()); + } + // ── Logical equivalence unit tests ────────────────────────────── #[test] diff --git a/rivet-core/tests/sexpr_fuzz.rs b/rivet-core/tests/sexpr_fuzz.rs index dfb508d..a3f825d 100644 --- a/rivet-core/tests/sexpr_fuzz.rs +++ b/rivet-core/tests/sexpr_fuzz.rs @@ -225,6 +225,17 @@ fn expr_to_sexpr(e: &Expr) -> String { format!("(exists {} {})", expr_to_sexpr(scope), expr_to_sexpr(pred)) } Expr::Count(scope) => format!("(count {})", expr_to_sexpr(scope)), + Expr::CountCompare(scope, op, n) => { + let op_s = match op { + sexpr_eval::CompOp::Gt => ">", + sexpr_eval::CompOp::Lt => "<", + sexpr_eval::CompOp::Ge => ">=", + sexpr_eval::CompOp::Le => "<=", + sexpr_eval::CompOp::Eq => "=", + sexpr_eval::CompOp::Ne => "!=", + }; + format!("({op_s} (count {}) {n})", expr_to_sexpr(scope)) + } Expr::ReachableFrom(start, lt) => format!( "(reachable-from {} {})", value_to_sexpr(start), diff --git a/rivet-core/tests/sexpr_predicate_matrix.rs b/rivet-core/tests/sexpr_predicate_matrix.rs index 8dc0693..a596810 100644 --- a/rivet-core/tests/sexpr_predicate_matrix.rs +++ b/rivet-core/tests/sexpr_predicate_matrix.rs @@ -292,9 +292,19 @@ fn matches_no_match_for_non_matching_regex() { } #[test] -fn matches_invalid_regex_returns_false_safely() { - // Malformed regex — evaluator returns false instead of panicking. - assert!(!ok(r#"(matches id "[")"#, &base_artifact())); +fn matches_invalid_regex_is_parse_error() { + // v0.4.3: malformed regex patterns are rejected at lower time with + // a clear error rather than silently matching nothing at runtime. + // Previously this returned false safely; audit flagged that users + // mistake silent empty-match for "filter excluded everything" and + // waste debugging time. `err()` here exercises the lower path and + // asserts the diagnostic names the regex compile failure. + let errs = err(r#"(matches id "[")"#); + assert!( + errs.iter() + .any(|e| e.message.to_lowercase().contains("regex")), + "invalid regex must produce a parse error mentioning 'regex': got {errs:?}" + ); } #[test]