diff --git a/CHANGELOG.md b/CHANGELOG.md index 25e7ecab9..576984721 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -187,7 +187,7 @@ More specifically, any ASCII character except for `[0-9A-Za-z<>]` can now be escaped. Also, a new routine, `is_escapeable_character`, has been added to `regex-syntax` to query whether a character is escapeable or not. * [FEATURE #547](https://github.com/rust-lang/regex/issues/547): -Add `Regex::captures_at`. This filles a hole in the API, but doesn't otherwise +Add `Regex::captures_at`. This fills a hole in the API, but doesn't otherwise introduce any new expressive power. * [FEATURE #595](https://github.com/rust-lang/regex/issues/595): Capture group names are now Unicode-aware. They can now begin with either a `_` diff --git a/regex-automata/src/dfa/automaton.rs b/regex-automata/src/dfa/automaton.rs index 2be080425..7e2be9a15 100644 --- a/regex-automata/src/dfa/automaton.rs +++ b/regex-automata/src/dfa/automaton.rs @@ -1074,7 +1074,7 @@ pub unsafe trait Automaton { /// // encoding of any Unicode scalar value except for 'a', 'b' or 'c'. /// // That translates to a much more complicated DFA, and also /// // inhibits the 'accelerator' optimization that we are trying to - /// // demostrate in this example. + /// // demonstrate in this example. /// .syntax(syntax::Config::new().unicode(false).utf8(false)) /// .build("[^abc]+a")?; /// diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs index 00086cc94..0ab923f55 100644 --- a/regex-automata/src/dfa/dense.rs +++ b/regex-automata/src/dfa/dense.rs @@ -2109,7 +2109,7 @@ impl> DFA { /// let mut buf = vec![0; original_dfa.write_to_len()]; /// // This is guaranteed to succeed, because the only serialization error /// // that can occur is when the provided buffer is too small. But - /// // write_to_len guarantees a correct sie. + /// // write_to_len guarantees a correct size. /// let written = original_dfa.write_to_native_endian(&mut buf).unwrap(); /// // But this is not guaranteed to succeed! In particular, /// // deserialization requires proper alignment for &[u32], but our buffer @@ -3336,7 +3336,7 @@ impl<'a> TransitionTable<&'a [u32]> { /// /// # Safety /// - /// This routine is not safe because it does not check the valdity of the + /// This routine is not safe because it does not check the validity of the /// transition table itself. In particular, the transition table can be /// quite large, so checking its validity can be somewhat expensive. An /// invalid transition table is not safe because other code may rely on the @@ -3929,7 +3929,7 @@ impl<'a> StartTable<&'a [u32]> { /// /// # Safety /// - /// This routine is not safe because it does not check the valdity of the + /// This routine is not safe because it does not check the validity of the /// starting state IDs themselves. In particular, the number of starting /// IDs can be of variable length, so it's possible that checking their /// validity cannot be done in constant time. An invalid starting state diff --git a/regex-automata/src/dfa/determinize.rs b/regex-automata/src/dfa/determinize.rs index 0a645bc94..19f99f5d6 100644 --- a/regex-automata/src/dfa/determinize.rs +++ b/regex-automata/src/dfa/determinize.rs @@ -539,7 +539,7 @@ impl<'a> Runner<'a> { } let state = builder.to_state(); // States use reference counting internally, so we only need to count - // their memroy usage once. + // their memory usage once. self.memory_usage_state += state.memory_usage(); self.builder_states.push(state.clone()); self.cache.insert(state, id); diff --git a/regex-automata/src/dfa/minimize.rs b/regex-automata/src/dfa/minimize.rs index ccb16a923..fea925bdc 100644 --- a/regex-automata/src/dfa/minimize.rs +++ b/regex-automata/src/dfa/minimize.rs @@ -152,7 +152,7 @@ impl<'a> Minimizer<'a> { // At this point, we now have a minimal partitioning of states, where // each partition is an equivalence class of DFA states. Now we need to - // use this partioning to update the DFA to only contain one state for + // use this partitioning to update the DFA to only contain one state for // each partition. // Create a map from DFA state ID to the representative ID of the diff --git a/regex-automata/src/dfa/mod.rs b/regex-automata/src/dfa/mod.rs index c289567d0..4bb870435 100644 --- a/regex-automata/src/dfa/mod.rs +++ b/regex-automata/src/dfa/mod.rs @@ -1,5 +1,5 @@ /*! -A module for building and searching with determinstic finite automata (DFAs). +A module for building and searching with deterministic finite automata (DFAs). Like other modules in this crate, DFAs support a rich regex syntax with Unicode features. DFAs also have extensive options for configuring the best space vs @@ -267,7 +267,7 @@ the regexes in this module are almost universally slow to compile, especially when they contain large Unicode character classes. For example, on my system, compiling `\w{50}` takes about 1 second and almost 15MB of memory! (Compiling a sparse regex takes about the same time but only uses about 1.2MB of -memory.) Conversly, compiling the same regex without Unicode support, e.g., +memory.) Conversely, compiling the same regex without Unicode support, e.g., `(?-u)\w{50}`, takes under 1 millisecond and about 15KB of memory. For this reason, you should only use Unicode character classes if you absolutely need them! (They are enabled by default though.) diff --git a/regex-automata/src/dfa/regex.rs b/regex-automata/src/dfa/regex.rs index 2c3f4c21a..f39c1c055 100644 --- a/regex-automata/src/dfa/regex.rs +++ b/regex-automata/src/dfa/regex.rs @@ -590,7 +590,7 @@ impl Regex { /// /// The type parameters are as follows: /// -/// * `A` represents the type of the underyling DFA that implements the +/// * `A` represents the type of the underlying DFA that implements the /// [`Automaton`] trait. /// /// The lifetime parameters are as follows: diff --git a/regex-automata/src/hybrid/dfa.rs b/regex-automata/src/hybrid/dfa.rs index 874b511e2..86963248f 100644 --- a/regex-automata/src/hybrid/dfa.rs +++ b/regex-automata/src/hybrid/dfa.rs @@ -32,7 +32,7 @@ use crate::{ }, }; -/// The mininum number of states that a lazy DFA's cache size must support. +/// The minimum number of states that a lazy DFA's cache size must support. /// /// This is checked at time of construction to ensure that at least some small /// number of states can fit in the given capacity allotment. If we can't fit @@ -2332,7 +2332,7 @@ impl<'i, 'c> Lazy<'i, 'c> { "lazy DFA cache has been cleared {} times, \ which exceeds the limit of {}, \ AND its bytes searched per state is less \ - than the configured mininum of {}, \ + than the configured minimum of {}, \ therefore lazy DFA is giving up \ (bytes searched since cache clear = {}, \ number of states = {})", @@ -2348,7 +2348,7 @@ impl<'i, 'c> Lazy<'i, 'c> { "lazy DFA cache has been cleared {} times, \ which exceeds the limit of {}, \ AND its bytes searched per state is greater \ - than the configured mininum of {}, \ + than the configured minimum of {}, \ therefore lazy DFA is continuing! \ (bytes searched since cache clear = {}, \ number of states = {})", @@ -2771,7 +2771,7 @@ enum StateSaver { /// is stored in 'Saved' since it may have changed. ToSave { id: LazyStateID, state: State }, /// An ID that of a state that has been persisted through a lazy DFA - /// cache clearing. The ID recorded here corresonds to an ID that was + /// cache clearing. The ID recorded here corresponds to an ID that was /// once marked as ToSave. The IDs are likely not equivalent even though /// the states they point to are. Saved(LazyStateID), diff --git a/regex-automata/src/hybrid/mod.rs b/regex-automata/src/hybrid/mod.rs index d6ed0e9f8..44e67e129 100644 --- a/regex-automata/src/hybrid/mod.rs +++ b/regex-automata/src/hybrid/mod.rs @@ -1,5 +1,5 @@ /*! -A module for building and searching with lazy determinstic finite automata +A module for building and searching with lazy deterministic finite automata (DFAs). Like other modules in this crate, lazy DFAs support a rich regex syntax with diff --git a/regex-automata/src/hybrid/search.rs b/regex-automata/src/hybrid/search.rs index 0d0bb8af5..f23283685 100644 --- a/regex-automata/src/hybrid/search.rs +++ b/regex-automata/src/hybrid/search.rs @@ -188,7 +188,7 @@ fn find_fwd_imp( // mentioned above was a pretty big pessimization in some other // cases. Namely, it resulted in too much ping-ponging into and out // of the loop, which resulted in nearly ~2x regressions in search - // time when compared to the originaly lazy DFA in the regex crate. + // time when compared to the originally lazy DFA in the regex crate. // So I've removed the second loop unrolling that targets the // self-transition case. let mut prev_sid = sid; diff --git a/regex-automata/src/lib.rs b/regex-automata/src/lib.rs index 85fa27d73..62260a5ae 100644 --- a/regex-automata/src/lib.rs +++ b/regex-automata/src/lib.rs @@ -330,7 +330,7 @@ at search time and it requires the caller to opt into this. There are other ways for regex engines to fail in this crate, but the above two should represent the general theme of failures one can find. Dealing -with these failures is, in part, one the reaponsibilities of the [meta regex +with these failures is, in part, one the responsibilities of the [meta regex engine](meta). Notice, for example, that the meta regex engine exposes an API that never returns an error nor panics. It carefully manages all of the ways in which the regex engines can fail and either avoids the predictable ones diff --git a/regex-automata/src/meta/error.rs b/regex-automata/src/meta/error.rs index f5911aebb..ea9a3160e 100644 --- a/regex-automata/src/meta/error.rs +++ b/regex-automata/src/meta/error.rs @@ -120,7 +120,7 @@ impl core::fmt::Display for BuildError { /// /// The first is one where potential quadratic behavior has been detected. /// In this case, whatever optimization that led to this behavior should be -/// stopped, and the next best strategy shouldbe used. +/// stopped, and the next best strategy should be used. /// /// The second indicates that the underlying regex engine has failed for some /// reason. This usually occurs because either a lazy DFA's cache has become @@ -194,7 +194,7 @@ impl From for RetryError { /// Note that this has convenient `From` impls that will automatically /// convert a `MatchError` into this error. This works because the meta /// regex engine internals guarantee that errors like `HaystackTooLong` and -/// `UnsupportAnchored` will never occur. The only errors left are `Quit` and +/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and /// `GaveUp`, which both correspond to this "failure" error. #[derive(Debug)] pub(crate) struct RetryFailError { diff --git a/regex-automata/src/meta/regex.rs b/regex-automata/src/meta/regex.rs index cc6ac78cb..6e16ceedb 100644 --- a/regex-automata/src/meta/regex.rs +++ b/regex-automata/src/meta/regex.rs @@ -2277,7 +2277,7 @@ impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} /// explicitly separated from the the core regex object (such as a /// [`thompson::NFA`](crate::nfa::thompson::NFA)) so that the read-only regex /// object can be shared across multiple threads simultaneously without any -/// synchronization. Conversly, a `Cache` must either be duplicated if using +/// synchronization. Conversely, a `Cache` must either be duplicated if using /// the same `Regex` from multiple threads, or else there must be some kind of /// synchronization that guarantees exclusive access while it's in use by one /// thread. diff --git a/regex-automata/src/nfa/thompson/backtrack.rs b/regex-automata/src/nfa/thompson/backtrack.rs index 4358652b8..1b4a82fc0 100644 --- a/regex-automata/src/nfa/thompson/backtrack.rs +++ b/regex-automata/src/nfa/thompson/backtrack.rs @@ -825,7 +825,7 @@ impl BoundedBacktracker { #[inline] pub fn max_haystack_len(&self) -> usize { // The capacity given in the config is "bytes of heap memory," but the - // capacity we use here is "number of bits." So conver the capacity in + // capacity we use here is "number of bits." So convert the capacity in // bytes to the capacity in bits. let capacity = 8 * self.get_config().get_visited_capacity(); let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); @@ -1845,7 +1845,7 @@ impl Visited { /// Reset this visited set to work with the given bounded backtracker. fn reset(&mut self, re: &BoundedBacktracker) { // The capacity given in the config is "bytes of heap memory," but the - // capacity we use here is "number of bits." So conver the capacity in + // capacity we use here is "number of bits." So convert the capacity in // bytes to the capacity in bits. let capacity = 8 * re.get_config().get_visited_capacity(); let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs index f29e1e33d..497fc62b4 100644 --- a/regex-automata/src/nfa/thompson/compiler.rs +++ b/regex-automata/src/nfa/thompson/compiler.rs @@ -1008,7 +1008,7 @@ impl Compiler { /// but no more than `max` times. /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_bounded( &self, @@ -1074,7 +1074,7 @@ impl Compiler { /// integer is likely to run afoul of any configured size limits.) /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_at_least( &self, @@ -1155,7 +1155,7 @@ impl Compiler { /// times. /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_zero_or_one( &self, diff --git a/regex-automata/src/nfa/thompson/error.rs b/regex-automata/src/nfa/thompson/error.rs index 645bb065a..82648813b 100644 --- a/regex-automata/src/nfa/thompson/error.rs +++ b/regex-automata/src/nfa/thompson/error.rs @@ -3,7 +3,7 @@ use crate::util::{ primitives::{PatternID, StateID}, }; -/// An error that can occured during the construction of a thompson NFA. +/// An error that can occurred during the construction of a thompson NFA. /// /// This error does not provide many introspection capabilities. There are /// generally only two things you can do with it: @@ -161,13 +161,13 @@ impl core::fmt::Display for BuildError { } BuildErrorKind::TooManyPatterns { given, limit } => write!( f, - "attemped to compile {} patterns, \ + "attempted to compile {} patterns, \ which exceeds the limit of {}", given, limit, ), BuildErrorKind::TooManyStates { given, limit } => write!( f, - "attemped to compile {} NFA states, \ + "attempted to compile {} NFA states, \ which exceeds the limit of {}", given, limit, ), diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs index 6e46b04df..86131406c 100644 --- a/regex-automata/src/nfa/thompson/nfa.rs +++ b/regex-automata/src/nfa/thompson/nfa.rs @@ -1587,7 +1587,7 @@ pub enum State { /// in case they are useful. But mostly, all you'll need is `next` and /// `slot`. Capture { - /// The state to transtition to, unconditionally. + /// The state to transition to, unconditionally. next: StateID, /// The pattern ID that this capture belongs to. pattern_id: PatternID, diff --git a/regex-automata/src/nfa/thompson/pikevm.rs b/regex-automata/src/nfa/thompson/pikevm.rs index d11590bec..d737fb71e 100644 --- a/regex-automata/src/nfa/thompson/pikevm.rs +++ b/regex-automata/src/nfa/thompson/pikevm.rs @@ -2297,7 +2297,7 @@ impl Counters { trace!("===== START PikeVM Instrumentation Output ====="); // We take the top-K most occurring state sets. Otherwise the output // is likely to be overwhelming. And we probably only care about the - // most frequently occuring ones anyway. + // most frequently occurring ones anyway. const LIMIT: usize = 20; let mut set_counts = self.state_sets.iter().collect::, &u64)>>(); diff --git a/regex-automata/src/nfa/thompson/range_trie.rs b/regex-automata/src/nfa/thompson/range_trie.rs index 2522e7fe0..2d43a5b6f 100644 --- a/regex-automata/src/nfa/thompson/range_trie.rs +++ b/regex-automata/src/nfa/thompson/range_trie.rs @@ -131,7 +131,7 @@ sequences of ranges are sorted, and any corresponding ranges are either exactly equivalent or non-overlapping. In effect, a range trie is building a DFA from a sequence of arbitrary byte -ranges. But it uses an algoritm custom tailored to its input, so it is not as +ranges. But it uses an algorithm custom tailored to its input, so it is not as costly as traditional DFA construction. While it is still quite a bit more costly than the forward case (which only needs Daciuk's algorithm), it winds up saving a substantial amount of time if one is doing a full DFA powerset @@ -188,7 +188,7 @@ pub struct RangeTrie { /// A stack for traversing this trie to yield sequences of byte ranges in /// lexicographic order. iter_stack: RefCell>, - /// A bufer that stores the current sequence during iteration. + /// A buffer that stores the current sequence during iteration. iter_ranges: RefCell>, /// A stack used for traversing the trie in order to (deeply) duplicate /// a state. States are recursively duplicated when ranges are split. @@ -622,7 +622,7 @@ struct NextIter { } /// The next state to process during insertion and any remaining ranges that we -/// want to add for a partcular sequence of ranges. The first such instance +/// want to add for a particular sequence of ranges. The first such instance /// is always the root state along with all ranges given. #[derive(Clone, Debug)] struct NextInsert { diff --git a/regex-automata/src/util/alphabet.rs b/regex-automata/src/util/alphabet.rs index 76eca3936..018915bcb 100644 --- a/regex-automata/src/util/alphabet.rs +++ b/regex-automata/src/util/alphabet.rs @@ -132,7 +132,7 @@ impl Unit { } } - /// If this unit is an "end of input" sentinel, then return the underyling + /// If this unit is an "end of input" sentinel, then return the underlying /// sentinel value that was given to [`Unit::eoi`]. Otherwise return /// `None`. pub fn as_eoi(self) -> Option { diff --git a/regex-automata/src/util/captures.rs b/regex-automata/src/util/captures.rs index 30bcced49..60b6df7e2 100644 --- a/regex-automata/src/util/captures.rs +++ b/regex-automata/src/util/captures.rs @@ -1453,7 +1453,7 @@ impl GroupInfo { /// sequence of patterns yields a sequence of possible group names. The /// index of each pattern in the sequence corresponds to its `PatternID`, /// and the index of each group in each pattern's sequence corresponds to - /// its coresponding group index. + /// its corresponding group index. /// /// While this constructor is very generic and therefore perhaps hard to /// chew on, an example of a valid concrete type that can be passed to diff --git a/regex-automata/src/util/determinize/mod.rs b/regex-automata/src/util/determinize/mod.rs index 14aa9a8c3..30a82afb8 100644 --- a/regex-automata/src/util/determinize/mod.rs +++ b/regex-automata/src/util/determinize/mod.rs @@ -205,7 +205,7 @@ pub(crate) fn next( && unit.is_byte(lookm.get_line_terminator()) { // Why only handle StartLF here and not Start? That's because Start - // can only impact the starting state, which is speical cased in + // can only impact the starting state, which is special cased in // start state handling. builder.set_look_have(|have| have.insert(Look::StartLF)); } @@ -412,7 +412,7 @@ pub(crate) fn epsilon_closure( /// and whether this state is being generated for a transition over a word byte /// when applicable) that are true immediately prior to transitioning into this /// state (via `builder.look_have()`). The match pattern IDs should correspond -/// to matches that occured on the previous transition, since all matches are +/// to matches that occurred on the previous transition, since all matches are /// delayed by one byte. The things that should _not_ be set are look-ahead /// assertions (EndLF, End and whether the next byte is a word byte or not). /// The builder state should also not have anything in `look_need` set, as this diff --git a/regex-automata/src/util/determinize/state.rs b/regex-automata/src/util/determinize/state.rs index 68f81f93c..e64123587 100644 --- a/regex-automata/src/util/determinize/state.rs +++ b/regex-automata/src/util/determinize/state.rs @@ -60,7 +60,7 @@ DFA state to check if it already exists. If it does, then there's no need to freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state` can be called to freeze the builder into an immutable `State`. In either case, `clear` should be called on the builder to turn it back into a -`StateBuilderEmpty` that reuses the underyling memory. +`StateBuilderEmpty` that reuses the underlying memory. The main purpose for splitting the builder into these distinct types is to make it impossible to do things like adding a pattern ID after adding an NFA @@ -103,7 +103,7 @@ use crate::util::{ /// This type is intended to be used only in NFA-to-DFA conversion via powerset /// construction. /// -/// It may be cheaply cloned and accessed safely from mulitple threads +/// It may be cheaply cloned and accessed safely from multiple threads /// simultaneously. #[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] pub(crate) struct State(Arc<[u8]>); diff --git a/regex-automata/src/util/iter.rs b/regex-automata/src/util/iter.rs index 4fda8dc4c..a789fa042 100644 --- a/regex-automata/src/util/iter.rs +++ b/regex-automata/src/util/iter.rs @@ -2,7 +2,7 @@ Generic helpers for iteration of matches from a regex engine in a haystack. The principle type in this module is a [`Searcher`]. A `Searcher` provides -its own lower level iterater-like API in addition to methods for constructing +its own lower level iterator-like API in addition to methods for constructing types that implement `Iterator`. The documentation for `Searcher` explains a bit more about why these different APIs exist. diff --git a/regex-automata/src/util/lazy.rs b/regex-automata/src/util/lazy.rs index b9f013c88..de27a2a6e 100644 --- a/regex-automata/src/util/lazy.rs +++ b/regex-automata/src/util/lazy.rs @@ -159,7 +159,7 @@ mod lazy { impl T> Lazy { /// Get the underlying lazy value. If it hasn't been initialized /// yet, then always attempt to initialize it (even if some other - /// thread is initializing it) and atomicly attach it to this lazy + /// thread is initializing it) and atomically attach it to this lazy /// value before returning it. pub(super) fn get(&self) -> &T { if let Some(data) = self.poll() { diff --git a/regex-automata/src/util/look.rs b/regex-automata/src/util/look.rs index 389a864ef..aee31b34e 100644 --- a/regex-automata/src/util/look.rs +++ b/regex-automata/src/util/look.rs @@ -972,8 +972,8 @@ impl core::fmt::Display for UnicodeWordBoundaryError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, - "Unicode-aware \\b and \\B are unavailabe because the \ - requiste data tables are missing, please enable the \ + "Unicode-aware \\b and \\B are unavailable because the \ + requisite data tables are missing, please enable the \ unicode-word-boundary feature" ) } diff --git a/regex-automata/src/util/prefilter/teddy.rs b/regex-automata/src/util/prefilter/teddy.rs index e8383c0a6..02210a5ec 100644 --- a/regex-automata/src/util/prefilter/teddy.rs +++ b/regex-automata/src/util/prefilter/teddy.rs @@ -28,7 +28,7 @@ pub(crate) struct Teddy { anchored_ac: aho_corasick::dfa::DFA, /// The length of the smallest literal we look for. /// - /// We use this as a hueristic to figure out whether this will be "fast" or + /// We use this as a heuristic to figure out whether this will be "fast" or /// not. Generally, the longer the better, because longer needles are more /// discriminating and thus reduce false positive rate. #[cfg(feature = "perf-literal-multisubstring")] diff --git a/regex-automata/src/util/search.rs b/regex-automata/src/util/search.rs index 70affcb7f..2036fa3a4 100644 --- a/regex-automata/src/util/search.rs +++ b/regex-automata/src/util/search.rs @@ -274,7 +274,7 @@ impl<'h> Input<'h> { /// requires that it only report a match that begins at the same offset /// as the beginning of the search. /// 4. The regex `a` is compiled with `Anchored::No` and searches `aba` - /// startting at position `1`. Since the search is not anchored and + /// starting at position `1`. Since the search is not anchored and /// the regex does not start with `^`, the search executes as if there /// is a `(?s:.)*?` prefix that permits it to match anywhere. Thus, it /// reports a match at `[2, 3]`. @@ -745,7 +745,7 @@ impl<'h> Input<'h> { /// /// # Example /// - /// This shows where codepoint bounardies do and don't exist in valid + /// This shows where codepoint boundaries do and don't exist in valid /// UTF-8. /// /// ``` @@ -1179,7 +1179,7 @@ impl PatternSet { /// impossible if you use the `pattern_len()` method as defined on any of /// the regex engines in this crate. Namely, a regex will fail to build by /// returning an error if the number of patterns given to it exceeds the - /// limit. Therefore, the number of patterns in a valid regex is alwasys + /// limit. Therefore, the number of patterns in a valid regex is always /// a correct capacity to provide here. pub fn new(capacity: usize) -> PatternSet { assert!( diff --git a/regex-automata/src/util/syntax.rs b/regex-automata/src/util/syntax.rs index 9260ea20c..78e3cf9a1 100644 --- a/regex-automata/src/util/syntax.rs +++ b/regex-automata/src/util/syntax.rs @@ -209,7 +209,7 @@ impl Config { /// then `.` will match any character except for a new line character. /// /// Note that `.` is impacted by whether the "unicode" setting is enabled - /// or not. When Unicode is enabled (the defualt), `.` will match any UTF-8 + /// or not. When Unicode is enabled (the default), `.` will match any UTF-8 /// encoding of any Unicode scalar value (sans a new line, depending on /// whether this "dot matches new line" option is enabled). When Unicode /// mode is disabled, `.` will match any byte instead. Because of this, @@ -345,7 +345,7 @@ impl Config { /// if callers want to put a limit on the amount of heap space used, then /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since the parser will - /// limit itself to heap space proportional to the lenth of the pattern + /// limit itself to heap space proportional to the length of the pattern /// string. /// /// Note that a nest limit of `0` will return a nest limit error for most diff --git a/regex-automata/src/util/wire.rs b/regex-automata/src/util/wire.rs index a49dfaad1..8890ed4fd 100644 --- a/regex-automata/src/util/wire.rs +++ b/regex-automata/src/util/wire.rs @@ -415,7 +415,7 @@ pub(crate) fn alloc_aligned_buffer(size: usize) -> (Vec, usize) { /// Reads a NUL terminated label starting at the beginning of the given slice. /// /// If a NUL terminated label could not be found, then an error is returned. -/// Similary, if a label is found but doesn't match the expected label, then +/// Similarly, if a label is found but doesn't match the expected label, then /// an error is returned. /// /// Upon success, the total number of bytes read (including padding bytes) is diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h index 01173b451..7b910e7d4 100644 --- a/regex-capi/include/rure.h +++ b/regex-capi/include/rure.h @@ -418,7 +418,7 @@ rure_options *rure_options_new(void); void rure_options_free(rure_options *options); /* - * rure_options_size_limit sets the appoximate size limit of the compiled + * rure_options_size_limit sets the approximate size limit of the compiled * regular expression. * * This size limit roughly corresponds to the number of bytes occupied by a diff --git a/regex-cli/args/dfa.rs b/regex-cli/args/dfa.rs index 94793ee87..a912e68f4 100644 --- a/regex-cli/args/dfa.rs +++ b/regex-cli/args/dfa.rs @@ -152,7 +152,7 @@ a search command.) "-C, --no-byte-classes", "Disable byte classes.", r#" -This casues all bytes to be an equivalence class unto themselves. By default, +This causes all bytes to be an equivalence class unto themselves. By default, bytes are grouped into equivalence classes to reduce the size of the alphabet for a DFA, and therefore decreases overall space usage. diff --git a/regex-cli/args/hybrid.rs b/regex-cli/args/hybrid.rs index e80d9bff8..2af91cbce 100644 --- a/regex-cli/args/hybrid.rs +++ b/regex-cli/args/hybrid.rs @@ -109,7 +109,7 @@ a search command.) "-C, --no-byte-classes", "Disable byte classes.", r#" -This casues all bytes to be an equivalence class unto themselves. By default, +This causes all bytes to be an equivalence class unto themselves. By default, bytes are grouped into equivalence classes to reduce the size of the alphabet for a DFA, and therefore decreases overall space usage. diff --git a/regex-cli/args/meta.rs b/regex-cli/args/meta.rs index e5e7873f0..7c0b4042a 100644 --- a/regex-cli/args/meta.rs +++ b/regex-cli/args/meta.rs @@ -225,7 +225,7 @@ cannot get too big. "-C, --no-byte-classes", "Disable byte classes.", r#" -This casues all bytes to be an equivalence class unto themselves. By default, +This causes all bytes to be an equivalence class unto themselves. By default, bytes are grouped into equivalence classes to reduce the size of the alphabet for a DFA, and therefore decreases overall space usage. diff --git a/regex-cli/args/onepass.rs b/regex-cli/args/onepass.rs index 4261e8b21..a664b7831 100644 --- a/regex-cli/args/onepass.rs +++ b/regex-cli/args/onepass.rs @@ -73,7 +73,7 @@ a search command.) "-C, --no-byte-classes", "Disable byte classes.", r#" -This casues all bytes to be an equivalence class unto themselves. By default, +This causes all bytes to be an equivalence class unto themselves. By default, bytes are grouped into equivalence classes to reduce the size of the alphabet for a DFA, and therefore decreases overall space usage. diff --git a/regex-cli/args/overlapping.rs b/regex-cli/args/overlapping.rs index c925159e0..4403ccce4 100644 --- a/regex-cli/args/overlapping.rs +++ b/regex-cli/args/overlapping.rs @@ -32,7 +32,7 @@ impl Configurable for Config { "Enable overlapping search.", r#" Enable overlapping search. When this is enabled, the regex matcher will -atempt to report all possible matches. Generally speaking, when one enables +attempt to report all possible matches. Generally speaking, when one enables overlapping search, you also want to ensure that '--match-kind all' is given as well. Otherwise the overlapping search is unlikely to work as one would expect since any match semantics other than 'all' exclude some subset of matches from diff --git a/regex-cli/cmd/debug/literal.rs b/regex-cli/cmd/debug/literal.rs index 715e1bbb9..9c82ec6e0 100644 --- a/regex-cli/cmd/debug/literal.rs +++ b/regex-cli/cmd/debug/literal.rs @@ -17,7 +17,7 @@ pub fn run(p: &mut Parser) -> anyhow::Result<()> { Prints the debug representation of extract literals from a regex pattern. Note that the literals this command prints by default should roughly reflect -what regex-automata's meta regex engine does by defualt. In particular, this +what regex-automata's meta regex engine does by default. In particular, this will optimize the extracted literals and will do so under the presumption of leftmost-first match semantics. The --no-optimize flag can be given to skip this optimization step and instead get the literals precisely as they were diff --git a/regex-lite/src/hir/parse.rs b/regex-lite/src/hir/parse.rs index 0b406d1d8..cc3c21fe6 100644 --- a/regex-lite/src/hir/parse.rs +++ b/regex-lite/src/hir/parse.rs @@ -34,7 +34,7 @@ const ERR_UNOPENED_GROUP: &str = "found closing ')' without matching '('"; const ERR_LOOK_UNSUPPORTED: &str = "look-around is not supported"; const ERR_EMPTY_FLAGS: &str = "empty flag directive '(?)' is not allowed"; const ERR_MISSING_GROUP_NAME: &str = - "exepcted capture group name, but got end of pattern"; + "expected capture group name, but got end of pattern"; const ERR_INVALID_GROUP_NAME: &str = "invalid group name"; const ERR_UNCLOSED_GROUP_NAME: &str = "expected end of capture group name, but got end of pattern"; diff --git a/regex-lite/src/nfa.rs b/regex-lite/src/nfa.rs index 12404dab6..8f37a5451 100644 --- a/regex-lite/src/nfa.rs +++ b/regex-lite/src/nfa.rs @@ -375,7 +375,7 @@ impl Compiler { /// but no more than `max` times. /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_bounded( &self, @@ -438,7 +438,7 @@ impl Compiler { /// integer is likely to run afoul of any configured size limits.) /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_at_least( &self, @@ -506,7 +506,7 @@ impl Compiler { /// times. /// /// When `greedy` is true, then the preference is for the expression to - /// match as much as possible. Otheriwse, it will match as little as + /// match as much as possible. Otherwise, it will match as little as /// possible. fn c_zero_or_one( &self, diff --git a/regex-lite/src/string.rs b/regex-lite/src/string.rs index acf6aa00a..91b81d008 100644 --- a/regex-lite/src/string.rs +++ b/regex-lite/src/string.rs @@ -1343,7 +1343,7 @@ impl Regex { /// unnamed group that is always present and corresponds to the entire /// match. /// - /// Since the implict unnamed group is always included in this length, the + /// Since the implicit unnamed group is always included in this length, the /// length returned is guaranteed to be greater than zero. /// /// # Example @@ -2760,7 +2760,7 @@ impl RegexBuilder { /// /// * Unless dot-matches-new-line mode is enabled, `.` will now match any /// character except for `\n` and `\r`. - /// * When multi-line mode is enabled, `^` will match immediatelly + /// * When multi-line mode is enabled, `^` will match immediately /// following a `\n` or a `\r`. Similarly, `$` will match immediately /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match between /// `\r` and `\n`. diff --git a/regex-syntax/src/hir/literal.rs b/regex-syntax/src/hir/literal.rs index bcab2fb75..9461db989 100644 --- a/regex-syntax/src/hir/literal.rs +++ b/regex-syntax/src/hir/literal.rs @@ -23,7 +23,7 @@ effective literal optimizations: to lead to substring search that is only a little faster than a regex search, and thus the overhead of using literal optimizations in the first place might make things slower overall. -* The literals in your [`Seq`] shoudn't be too short. In general, longer is +* The literals in your [`Seq`] shouldn't be too short. In general, longer is better. A sequence corresponding to single bytes that occur frequently in the haystack, for example, is probably a bad literal optimization because it's likely to produce many false positive candidates. Longer literals are less @@ -692,7 +692,7 @@ impl Default for ExtractKind { /// from making assumptions about what literals are required in order to match /// a particular [`Hir`] expression. Generally speaking, when a set is in this /// state, literal optimizations are inhibited. A good example of a regex that -/// will cause this sort of set to apppear is `[A-Za-z]`. The character class +/// will cause this sort of set to appear is `[A-Za-z]`. The character class /// is just too big (and also too narrow) to be usefully expanded into 52 /// different literals. (Note that the decision for when a seq should become /// infinite is determined by the caller. A seq itself has no hard-coded diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index c6272cd4c..6c1d2745e 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -184,7 +184,7 @@ impl core::fmt::Display for ErrorKind { /// matches. /// /// For empty matches, those can occur at any position. It is the -/// repsonsibility of the regex engine to determine whether empty matches are +/// responsibility of the regex engine to determine whether empty matches are /// permitted between the code units of a single codepoint. /// /// # Stack space diff --git a/regex-test/lib.rs b/regex-test/lib.rs index 3287473e0..2b630666e 100644 --- a/regex-test/lib.rs +++ b/regex-test/lib.rs @@ -429,7 +429,7 @@ impl RegexTest { matches } - /// Returns the matches expected by this test, includng the spans of any + /// Returns the matches expected by this test, including the spans of any /// matching capture groups. fn captures(&self) -> Vec { self.matches.clone() @@ -1432,7 +1432,7 @@ pub enum SearchKind { /// Report matches as soon as they are found. /// /// This is somewhat tricky to test, as this semantic is specified in terms - /// of whatever the regex engine can do. For exmaple, an automata oriented + /// of whatever the regex engine can do. For example, an automata oriented /// engine might be able to report a match earlier than a backtracking /// engine. Earliest, diff --git a/src/builders.rs b/src/builders.rs index 285331a82..d19a0ffe2 100644 --- a/src/builders.rs +++ b/src/builders.rs @@ -395,7 +395,7 @@ pub(crate) mod string { /// /// * Unless dot-matches-new-line mode is enabled, `.` will now match /// any character except for `\n` and `\r`. - /// * When multi-line mode is enabled, `^` will match immediatelly + /// * When multi-line mode is enabled, `^` will match immediately /// following a `\n` or a `\r`. Similarly, `$` will match immediately /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match /// between `\r` and `\n`. @@ -973,7 +973,7 @@ pub(crate) mod string { /// /// * Unless dot-matches-new-line mode is enabled, `.` will now match /// any character except for `\n` and `\r`. - /// * When multi-line mode is enabled, `^` will match immediatelly + /// * When multi-line mode is enabled, `^` will match immediately /// following a `\n` or a `\r`. Similarly, `$` will match immediately /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match /// between `\r` and `\n`. @@ -1563,7 +1563,7 @@ pub(crate) mod bytes { /// /// * Unless dot-matches-new-line mode is enabled, `.` will now match /// any character except for `\n` and `\r`. - /// * When multi-line mode is enabled, `^` will match immediatelly + /// * When multi-line mode is enabled, `^` will match immediately /// following a `\n` or a `\r`. Similarly, `$` will match immediately /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match /// between `\r` and `\n`. @@ -2162,7 +2162,7 @@ pub(crate) mod bytes { /// /// * Unless dot-matches-new-line mode is enabled, `.` will now match /// any character except for `\n` and `\r`. - /// * When multi-line mode is enabled, `^` will match immediatelly + /// * When multi-line mode is enabled, `^` will match immediately /// following a `\n` or a `\r`. Similarly, `$` will match immediately /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match /// between `\r` and `\n`. diff --git a/src/bytes.rs b/src/bytes.rs index c81c1a43d..3f53a3ea5 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -44,7 +44,7 @@ let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; // Notice that despite the `.*` at the end, it will only match valid UTF-8 // because Unicode mode was enabled with the `u` flag. Without the `u` flag, -// the `.*` would match the rest of the bytes regardless of whehter they were +// the `.*` would match the rest of the bytes regardless of whether they were // valid UTF-8. let (_, [title]) = re.captures(hay).unwrap().extract(); assert_eq!(title, b"\xE2\x98\x83"); diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs index 86437e1c2..6522ee7e3 100644 --- a/src/regex/bytes.rs +++ b/src/regex/bytes.rs @@ -1316,7 +1316,7 @@ impl Regex { /// unnamed group that is always present and corresponds to the entire /// match. /// - /// Since the implict unnamed group is always included in this length, the + /// Since the implicit unnamed group is always included in this length, the /// length returned is guaranteed to be greater than zero. /// /// # Example diff --git a/src/regex/string.rs b/src/regex/string.rs index e8d625655..65a76740e 100644 --- a/src/regex/string.rs +++ b/src/regex/string.rs @@ -1315,7 +1315,7 @@ impl Regex { /// unnamed group that is always present and corresponds to the entire /// match. /// - /// Since the implict unnamed group is always included in this length, the + /// Since the implicit unnamed group is always included in this length, the /// length returned is guaranteed to be greater than zero. /// /// # Example diff --git a/tests/replace.rs b/tests/replace.rs index 08b80a241..f26ae4603 100644 --- a/tests/replace.rs +++ b/tests/replace.rs @@ -39,7 +39,7 @@ replace!( "", "trim me" ); -replace!(number_hypen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); +replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); // replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b"); replace!( simple_expand,