Skip to content

Commit

Permalink
cli: change --no-captures to --captures (all|implicit|none)
Browse files Browse the repository at this point in the history
When we added the WhichCaptures type, we didn't update the CLI to expose
the full functionality. This change does that.
  • Loading branch information
BurntSushi committed Nov 1, 2023
1 parent 6b72eec commit 662a8b9
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 20 deletions.
2 changes: 1 addition & 1 deletion regex-automata/src/nfa/thompson/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ const INIT: u64 = 14695981039346656037;
/// Specifically, one could observe the difference with std's hashmap via
/// something like the following benchmark:
///
/// hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
/// hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
///
/// But to observe that difference, you'd have to modify the code to use
/// std's hashmap.
Expand Down
2 changes: 1 addition & 1 deletion regex-automata/src/nfa/thompson/range_trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ impl State {
// Benchmarks suggest that binary search is just a bit faster than
// straight linear search. Specifically when using the debug tool:
//
// hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
// hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
binary_search(&self.transitions, |t| range.start <= t.range.end)
}

Expand Down
52 changes: 52 additions & 0 deletions regex-cli/args/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,55 @@ impl std::str::FromStr for MatchKind {
Ok(MatchKind { kind })
}
}

/// Provides an implementation of the --captures flag, for use with Thompson
/// NFA configuration.
#[derive(Debug)]
pub struct WhichCaptures {
pub which: regex_automata::nfa::thompson::WhichCaptures,
}

impl WhichCaptures {
pub const USAGE: Usage = Usage::new(
"--captures <which>",
"One of: all, implicit or none.",
r#"
Selects which capture states should be included in the Thompson NFA. The
choices are 'all' (the default), 'implicit' or 'none'.
'all' means that both explicit and implicit capture states are included.
'implicit' means that only implicit capture states are included. That is, the
Thompson NFA will only be able to report the overall match offsets and not the
match offsets of each explicit capture group.
'none' means that no capture states will be included. This is useful when
capture states aren't needed (like when building a DFA) or if they aren't
supported (like when building a reverse NFA).
"#,
);
}

impl Default for WhichCaptures {
fn default() -> WhichCaptures {
WhichCaptures {
which: regex_automata::nfa::thompson::WhichCaptures::All,
}
}
}

impl std::str::FromStr for WhichCaptures {
type Err = anyhow::Error;

fn from_str(s: &str) -> anyhow::Result<WhichCaptures> {
let which = match s {
"all" => regex_automata::nfa::thompson::WhichCaptures::All,
"implicit" => {
regex_automata::nfa::thompson::WhichCaptures::Implicit
}
"none" => regex_automata::nfa::thompson::WhichCaptures::None,
unk => anyhow::bail!("unrecognized captures option '{}'", unk),
};
Ok(WhichCaptures { which })
}
}
24 changes: 6 additions & 18 deletions regex-cli/args/thompson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ impl Configurable for Config {
Arg::Long("shrink") => {
self.thompson = self.thompson.clone().shrink(true);
}
Arg::Long("no-captures") => {
self.thompson = self
.thompson
.clone()
.which_captures(thompson::WhichCaptures::None);
Arg::Long("captures") => {
let which: flags::WhichCaptures =
args::parse(p, "--captures")?;
self.thompson =
self.thompson.clone().which_captures(which.which);
}
Arg::Long("line-terminator") => {
let byte: flags::OneByte =
Expand Down Expand Up @@ -136,19 +136,7 @@ spent shrinking the NFA can lead to far larger savings in the subsequent DFA
determinization.
"#,
),
Usage::new(
"--no-captures",
"Disable capture states.",
r#"
Disables capture states. By default, NFAs include special "capture" states that
instruct some regex engines (like the PikeVM) to record offset positions in
ancillary state.
It can be useful to disable capture states in order to reduce "clutter" in the
automaton when debugging it. Also, at time of writing, reverse NFAs require
that capture groups are disabled.
"#,
),
flags::WhichCaptures::USAGE,
Usage::new(
"--line-terminator",
"Set the line terminator used by line anchors.",
Expand Down

0 comments on commit 662a8b9

Please sign in to comment.