diff --git a/.gitignore b/.gitignore index ccc4833436..fd3afa8a97 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /Cargo.lock /regex_macros/target /regex_macros/Cargo.lock +.*.swp diff --git a/src/re.rs b/src/re.rs index f1d76988d1..b883dfc18d 100644 --- a/src/re.rs +++ b/src/re.rs @@ -15,6 +15,7 @@ use std::borrow::IntoCow; use std::collections::HashMap; use std::fmt; use std::string::CowString; +use unicode::str::utf8_char_width; use compile::Program; use parse; @@ -881,7 +882,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> { // Don't accept empty matches immediately following a match. // i.e., no infinite loops please. if e == s && Some(self.last_end) == self.last_match { - self.last_end += 1; + if self.last_end >= self.search.len() { + return None; + } + self.last_end += + utf8_char_width(self.search.as_bytes()[self.last_end]); return self.next() } self.last_end = e; @@ -925,7 +930,11 @@ impl<'r, 't> Iterator for FindMatches<'r, 't> { // Don't accept empty matches immediately following a match. // i.e., no infinite loops please. if e == s && Some(self.last_end) == self.last_match { - self.last_end += 1; + if self.last_end >= self.search.len() { + return None; + } + self.last_end += + utf8_char_width(self.search.as_bytes()[self.last_end]); return self.next() } self.last_end = e; diff --git a/tests/tests.rs b/tests/tests.rs index c257a983eb..bbe994e8bc 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -64,6 +64,38 @@ fn range_ends_with_escape() { assert_eq!(ms, vec![(0, 1), (1, 2)]); } +#[test] +fn empty_match_find_iter() { + let re = regex!(r".*?"); + let ms: Vec<_> = re.find_iter("abc").collect(); + assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); +} + +#[test] +fn empty_match_captures_iter() { + let re = regex!(r".*?"); + let ms: Vec<_> = re.captures_iter("abc") + .map(|c| c.pos(0).unwrap()) + .collect(); + assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); +} + +#[test] +fn empty_match_unicode_find_iter() { + let re = regex!(r".*?"); + let ms: Vec<_> = re.find_iter("Ⅰ1Ⅱ2").collect(); + assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]); +} + +#[test] +fn empty_match_unicode_captures_iter() { + let re = regex!(r".*?"); + let ms: Vec<_> = re.captures_iter("Ⅰ1Ⅱ2") + .map(|c| c.pos(0).unwrap()) + .collect(); + assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]); +} + macro_rules! replace( ($name:ident, $which:ident, $re:expr, $search:expr, $replace:expr, $result:expr) => (