Skip to content

Commit

Permalink
Add {n,m} repetition range support (#278)
Browse files Browse the repository at this point in the history
* Add {n,m} regex support

* Added test for repeating character class
  • Loading branch information
Plaba committed Feb 26, 2023
1 parent 51c1f8c commit 1ecd6a4
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 4 deletions.
32 changes: 29 additions & 3 deletions logos-codegen/src/mir.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::convert::TryFrom;

use regex_syntax::hir::{Hir, HirKind, RepetitionKind};
use regex_syntax::hir::{Hir, HirKind, RepetitionKind, RepetitionRange};
use regex_syntax::ParserBuilder;

pub use regex_syntax::hir::{Class, ClassUnicode, Literal};
Expand Down Expand Up @@ -118,8 +118,34 @@ impl TryFrom<Hir> for Mir {
RepetitionKind::OneOrMore => {
Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))]))
}
RepetitionKind::Range(..) => {
Err("#[regex]: {n,m} repetition range is currently unsupported.".into())
RepetitionKind::Range(range) => {
match range {
RepetitionRange::Exactly(n) => {
let mut out = Vec::with_capacity(n as usize);
for _ in 0..n {
out.push(mir.clone());
}
Ok(Mir::Concat(out))
}
RepetitionRange::AtLeast(n) => {
let mut out = Vec::with_capacity(n as usize );
for _ in 0..n {
out.push(mir.clone());
}
out.push(Mir::Loop(Box::new(mir)));
Ok(Mir::Concat(out))
}
RepetitionRange::Bounded(n, m) => {
let mut out = Vec::with_capacity(m as usize);
for _ in 0..n {
out.push(mir.clone());
}
for _ in n..m {
out.push(Mir::Maybe(Box::new(mir.clone())));
}
Ok(Mir::Concat(out))
}
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion tests/tests/lexer_modes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ enum Inner {
#[regex(r"\\u\{[^}]*\}")]
EscapedCodepoint,

#[regex(r"\\[0-7]{1,3}")]
EscapedOctal,

#[token(r#"\""#)]
EscapedQuote,

Expand Down Expand Up @@ -105,7 +108,7 @@ impl<'source> Iterator for ModeBridge<'source> {
fn iterating_modes() {
use Inner::*;
use Tokens::*;
let s = r#""Hello W\u{00f4}rld\n""#;
let s = r#""Hello W\u{00f4}\162ld\n""#;
let moded = ModeBridge {
mode: Modes::new(s),
};
Expand All @@ -115,6 +118,7 @@ fn iterating_modes() {
OuterToken(Outer::StartString),
InnerToken(Text),
InnerToken(EscapedCodepoint),
InnerToken(EscapedOctal),
InnerToken(Text),
InnerToken(EscapedNewline),
InnerToken(EndString),
Expand Down

0 comments on commit 1ecd6a4

Please sign in to comment.