Skip to content

Commit

Permalink
lite: fix stack overflow test
Browse files Browse the repository at this point in the history
It turns out that we missed another case where the stack could overflow:
dropping a deeply nested Hir. Namely, since we permit deeply nested Hirs
to be constructed and only reject them after determining they are too
deeply nested, they still then need to be dropped. We fix this by
implementing a custom a Drop impl that uses the heap to traverse the Hir
and drop things without using unbounded stack space.

An alternative way to fix this would be to adjust the parser somehow to
avoid building deeply nested Hir values in the first place. But that
seems trickier, so we just stick with this for now.
  • Loading branch information
BurntSushi committed Oct 15, 2023
1 parent 4ae1472 commit 0086dec
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 3 deletions.
60 changes: 60 additions & 0 deletions regex-lite/src/hir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,24 @@ impl Hir {
}
}

impl HirKind {
/// Returns a slice of this kind's sub-expressions, if any.
fn subs(&self) -> &[Hir] {
use core::slice::from_ref;

match *self {
HirKind::Empty
| HirKind::Char(_)
| HirKind::Class(_)
| HirKind::Look(_) => &[],
HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
HirKind::Concat(ref subs) => subs,
HirKind::Alternation(ref subs) => subs,
}
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct Class {
pub(crate) ranges: Vec<ClassRange>,
Expand Down Expand Up @@ -747,3 +765,45 @@ fn prev_char(ch: char) -> Option<char> {
// and U+E000 yields a valid scalar value.
Some(char::from_u32(u32::from(ch).checked_sub(1)?).unwrap())
}

impl Drop for Hir {
fn drop(&mut self) {
use core::mem;

match *self.kind() {
HirKind::Empty
| HirKind::Char(_)
| HirKind::Class(_)
| HirKind::Look(_) => return,
HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
return
}
HirKind::Concat(ref x) if x.is_empty() => return,
HirKind::Alternation(ref x) if x.is_empty() => return,
_ => {}
}

let mut stack = vec![mem::replace(self, Hir::empty())];
while let Some(mut expr) = stack.pop() {
match expr.kind {
HirKind::Empty
| HirKind::Char(_)
| HirKind::Class(_)
| HirKind::Look(_) => {}
HirKind::Capture(ref mut x) => {
stack.push(mem::replace(&mut x.sub, Hir::empty()));
}
HirKind::Repetition(ref mut x) => {
stack.push(mem::replace(&mut x.sub, Hir::empty()));
}
HirKind::Concat(ref mut x) => {
stack.extend(x.drain(..));
}
HirKind::Alternation(ref mut x) => {
stack.extend(x.drain(..));
}
}
}
}
}
6 changes: 4 additions & 2 deletions regex-lite/src/hir/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1328,8 +1328,10 @@ fn into_class_item_range(hir: Hir) -> Result<char, Error> {
}
}

fn into_class_item_ranges(hir: Hir) -> Result<Vec<hir::ClassRange>, Error> {
match hir.kind {
fn into_class_item_ranges(
mut hir: Hir,
) -> Result<Vec<hir::ClassRange>, Error> {
match core::mem::replace(&mut hir.kind, HirKind::Empty) {
HirKind::Char(ch) => Ok(vec![hir::ClassRange { start: ch, end: ch }]),
HirKind::Class(hir::Class { ranges }) => Ok(ranges),
_ => Err(Error::new(ERR_CLASS_INVALID_ITEM)),
Expand Down
2 changes: 1 addition & 1 deletion regex-lite/tests/fuzz/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn captures_wrong_order_min() {
#[test]
fn many_zero_to_many_reps() {
let pat = format!(".{}", "*".repeat(1 << 15));
let Ok(re) = regex_lite::RegexBuilder::new(&pat).build() else { return };
let Ok(re) = regex_lite::Regex::new(&pat) else { return };
re.is_match("");
}

Expand Down

0 comments on commit 0086dec

Please sign in to comment.