diff --git a/src/regex.nim b/src/regex.nim index b5991b1..3af49ce 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -426,6 +426,11 @@ template runeIncAt(s: string, n: var int) = else: n = s.len+1 +template findNextImpl( + s, pattern, m, i, isNext: untyped +): untyped = + matchImpl(s, pattern, m, {mfFindMatch}, i, isNext) + iterator findAll*( s: string, pattern: Regex, @@ -447,7 +452,7 @@ iterator findAll*( var i = start var m: RegexMatch while i <= len(s): - if not find(s, pattern, m, i): + if not findNextImpl(s, pattern, m, i, i > start): break elif m.boundaries.b >= m.boundaries.a: doAssert i < m.boundaries.b+1 @@ -497,7 +502,7 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [].} = m: RegexMatch # This is pretty much findAll while i <= len(s): - if not find(s, sep, m, i): + if not findNextImpl(s, sep, m, i, true): i = s.len+1 last = s.len+1 elif m.boundaries.b >= m.boundaries.a: @@ -539,7 +544,7 @@ func splitIncl*(s: string, sep: Regex): seq[string] {.inline, raises: [].} = skipFirst = true m: RegexMatch while i <= len(s): - if not find(s, sep, m, i): + if not findNextImpl(s, sep, m, i, true): i = s.len+1 last = s.len+1 elif m.boundaries.b >= m.boundaries.a: diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim index dc25990..ed7d690 100644 --- a/src/regex/nfamacro.nim +++ b/src/regex/nfamacro.nim @@ -448,7 +448,8 @@ func matchImpl*( regex: static Regex, m: var RegexMatch, flags: static MatchFlags, - start = 0 + start = 0, + isContinuation = false ): bool {.inline.} = const eoeNode {.used.} = regex.eoeNode() # workaround Nim/issues/13252 @@ -468,7 +469,8 @@ func matchImpl*( smB = newSubmatches(regex.nfa.len) smA.add((0'i16, -1'i32, start .. start-1)) when mfFindMatch in flags: - if 0 <= start-1 and start-1 <= len(text)-1: + if isContinuation and + 0 <= start-1 and start-1 <= len(text)-1: cPrev = bwRuneAt(text, start-1).int32 while i < len(text): fastRuneAt(text, i, c, true) diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim index 23c5537..808e1c3 100644 --- a/src/regex/nfamatch.nim +++ b/src/regex/nfamatch.nim @@ -100,7 +100,8 @@ func matchImpl*( regex: Regex, m: var RegexMatch, flags: static MatchFlags, - start = 0 + start = 0, + isContinuation = false ): bool {.inline.} = m.clear() var @@ -114,7 +115,8 @@ func matchImpl*( smB = newSubmatches(regex.nfa.len) smA.add((0'i16, -1'i32, start .. start-1)) when mfFindMatch in flags: - if 0 <= start-1 and start-1 <= len(text)-1: + if isContinuation and + 0 <= start-1 and start-1 <= len(text)-1: cPrev = bwRuneAt(text, start-1).int32 while i < len(text): fastRuneAt(text, i, c, true) diff --git a/tests/tests.nim b/tests/tests.nim index 363f88e..0b2b0ba 100644 --- a/tests/tests.nim +++ b/tests/tests.nim @@ -1681,26 +1681,18 @@ test "tmisc2": bazz// //""" check replace(input, re"(?m)$", "//") == expected - check(not find("foobarbar", re"^bar", m, start=3)) - check find("foobar\nbar", re"(?m)^bar", m, start=3) and + # We treat start as text[start..^1], see issue #64 + check find("foobarbar", re"^bar", m, start=3) + check find("foobarbar", re"^bar", m, start=3) and + m.boundaries == 3 .. 5 + check find("foobar\nbar", re"(?m)^bar", m, start=4) and m.boundaries == 7 .. 9 - check find("foo\nbar\nbar", re"(?m)^bar", m, start=3) and - m.boundaries == 4 .. 6 - check find("foo\nbar\nbar", re"(?m)^bar", m, start=4) and - m.boundaries == 4 .. 6 block: # The bounds must contain the empty match index check find("foo\nbar\nbar", re"(?m)^", m) and m.boundaries == 0 .. -1 check find("foo\nbar\nbar", re"(?m)^", m, start=1) and - m.boundaries == 4 .. 3 - check find("foo\nbar\nbar", re"(?m)^", m, start=4) and - m.boundaries == 4 .. 3 - check find("foo\nbar\nbar", re"(?m)^", m, start=5) and - m.boundaries == 8 .. 7 - check find("foo\nbar\nbar", re"(?m)^", m, start=8) and - m.boundaries == 8 .. 7 - check(not find("foo\nbar\nbar", re"(?m)^", m, start=9)) + m.boundaries == 1 .. 0 check find("foo\nbar\nbar", re"(?m)$", m) and m.boundaries == 3 .. 2 check find("foo\nbar\nbar", re"(?m)$", m, start=3) and @@ -1759,3 +1751,6 @@ test "tmisc2": check split("aaa", re"a") == @["", "", "", ""] check split("a\na\na", re"(?m)^") == @["a\n", "a\n", "a"] check split("\n\n", re"(?m)^") == @["\n", "\n"] + # issue #64 + check match("xabc", re"^abc$", m, start = 1) + check find("xabc", re"^abc$", m, start = 1)