Skip to content

Commit

Permalink
fix(lib): correctly handle miss for loop in loop (#393)
Browse files Browse the repository at this point in the history
* chore(test): extend cases for loop in loop

* fix(lib): correctly handle miss for loop in loop

* chore(lib): avoid cloning MIR

---------

Co-authored-by: Lukas Markeffsky <@>
  • Loading branch information
lukas-code authored Jun 2, 2024
1 parent 0bcfb6a commit cbd6218
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 32 deletions.
72 changes: 41 additions & 31 deletions logos-codegen/src/graph/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ use crate::mir::{Class, ClassUnicode, Literal, Mir};

impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId {
self.parse_mir(mir, then, None, None, false)
self.parse_mir(&mir, then, None, None, false)
}

fn parse_mir(
&mut self,
mir: Mir,
mir: &Mir,
then: NodeId,
miss: Option<NodeId>,
reserved: Option<ReservedId>,
Expand All @@ -21,24 +21,36 @@ impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
match mir {
Mir::Empty => then,
Mir::Loop(mir) => {
let miss = match miss {
Some(id) => self.merge(id, then),
None => then,
};
let this = match reserved {
Some(rid) => rid,
None => self.reserve(),
};
let reserved_first = reserved.unwrap_or_else(|| self.reserve());

let (new_then, new_miss);
if let Some(old_miss) = miss {
// We have to separate the first iteration from the other iterations,
// because the `old_miss` path must only be taken if we miss the first
// iteration.
let reserved_next = self.reserve();
new_then = self.parse_mir(
mir,
reserved_next.get(),
Some(then),
Some(reserved_next),
true,
);
new_miss = self.merge(old_miss, then);
} else {
new_then = reserved_first.get();
new_miss = then;
}

self.parse_mir(*mir, this.get(), Some(miss), Some(this), true)
self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true)
}
Mir::Maybe(mir) => {
let miss = match miss {
Some(id) => self.merge(id, then),
None => then,
};

self.parse_mir(*mir, then, Some(miss), reserved, true)
self.parse_mir(mir, then, Some(miss), reserved, true)
}
Mir::Alternation(alternation) => {
let mut fork = Fork::new().miss(miss);
Expand All @@ -57,7 +69,7 @@ impl<Leaf: Disambiguate + Debug> Graph<Leaf> {

self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss))
}
Mir::Concat(mut concat) => {
Mir::Concat(concat) => {
// We'll be writing from the back, so need to allocate enough
// space here. Worst case scenario is all unicode codepoints
// producing 4 byte utf8 sequences
Expand All @@ -66,52 +78,50 @@ impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
let mut end = ropebuf.len();
let mut then = then;

let mut handle_bytes = |graph: &mut Self, mir, then: &mut NodeId| match mir {
let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir {
Mir::Literal(Literal(bytes)) => {
cur -= bytes.len();
for (i, byte) in bytes.iter().enumerate() {
ropebuf[cur + i] = byte.into();
}
None
true
}
Mir::Class(Class::Unicode(class)) if is_one_ascii(&class, repeated) => {
Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => {
cur -= 1;
ropebuf[cur] = class.ranges()[0].into();
None
true
}
Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => {
cur -= 1;
ropebuf[cur] = class.ranges()[0].into();
None
true
}
mir => {
_ => {
if end > cur {
let rope = Rope::new(&ropebuf[cur..end], *then);

*then = graph.push(rope);
end = cur;
}

Some(mir)
false
}
};

for mir in concat.drain(1..).rev() {
if let Some(mir) = handle_bytes(self, mir, &mut then) {
for mir in concat[1..].iter().rev() {
if !handle_bytes(self, mir, &mut then) {
then = self.parse_mir(mir, then, None, None, false);
}
}

match handle_bytes(self, concat.remove(0), &mut then) {
None => {
let rope = Rope::new(&ropebuf[cur..end], then).miss(miss);

self.insert_or_push(reserved, rope)
}
Some(mir) => self.parse_mir(mir, then, miss, reserved, false),
let first_mir = &concat[0];
if handle_bytes(self, first_mir, &mut then) {
let rope = Rope::new(&ropebuf[cur..end], then).miss(miss);
self.insert_or_push(reserved, rope)
} else {
self.parse_mir(first_mir, then, miss, reserved, false)
}
}
Mir::Class(Class::Unicode(class)) if !is_ascii(&class, repeated) => {
Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => {
let mut ropes = class
.iter()
.flat_map(|range| Utf8Sequences::new(range.start(), range.end()))
Expand Down
8 changes: 7 additions & 1 deletion tests/tests/edgecase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,19 @@ mod loop_in_loop {
#[test]
fn test_a_loop_in_a_loop() {
assert_lex(
"foo ffoo ffffooffoooo foooo foofffffoo",
"foo ffoo ffffooffoooo foooo foofffffoo f ff ffo ffoofo",
&[
(Ok(Token::Foo), "foo", 0..3),
(Ok(Token::Foo), "ffoo", 4..8),
(Ok(Token::Foo), "ffffooffoooo", 9..21),
(Ok(Token::Foo), "foooo", 22..27),
(Ok(Token::Foo), "foofffffoo", 28..38),
(Ok(Token::Foo), "f", 39..40),
(Err(()), "ff", 41..43),
(Err(()), "ff", 44..46),
(Err(()), "o", 46..47),
(Err(()), "ffoof", 48..53),
(Err(()), "o", 53..54),
],
);
}
Expand Down

0 comments on commit cbd6218

Please sign in to comment.