Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 40 additions & 29 deletions src/regex.nim
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ This means the whole text needs to match the regex for this function to return `
.. code-block:: nim
:test:
let text = "nim c --styleCheck:hint --colors:off regex.nim"
var m: RegexMatch2
var m = RegexMatch2()
if match(text, re2"nim c (?:--(\w+:\w+) *)+ (\w+).nim", m):
doAssert text[m.group(0)] == "colors:off"
doAssert text[m.group(1)] == "regex"
Expand Down Expand Up @@ -229,7 +229,7 @@ match a given regex.
"""
var match = ""
var capture = ""
var m: RegexMatch2
var m = RegexMatch2()
if find(text, re2"(\w+)@\w+\.\w+", m):
match = text[m.boundaries]
capture = text[m.group(0)]
Expand Down Expand Up @@ -509,7 +509,7 @@ func group*(m: RegexMatch2, i: int): Slice[int] {.inline, raises: [].} =
## and they are included same as in PCRE.
runnableExamples:
let text = "abc"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(\w)+", m)
doAssert text[m.group(0)] == "c"

Expand All @@ -521,7 +521,7 @@ func group*(
## return slices for a given named group
runnableExamples:
let text = "abc"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(?P<foo>\w)+", m)
doAssert text[m.group("foo")] == "c"

Expand All @@ -530,7 +530,7 @@ func group*(
func groupsCount*(m: RegexMatch2): int {.inline, raises: [].} =
## return the number of capturing groups
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "ab".match(re2"(a)(b)", m)
doAssert m.groupsCount == 2

Expand All @@ -540,7 +540,7 @@ func groupNames*(m: RegexMatch2): seq[string] {.inline, raises: [].} =
## return the names of capturing groups.
runnableExamples:
let text = "hello world"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(?P<greet>hello) (?P<who>world)", m)
doAssert m.groupNames == @["greet", "who"]

Expand Down Expand Up @@ -581,7 +581,7 @@ func match*(
## is similar to ``find(text, re"^regex$", m)``
## but has better performance
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "abcd".match(re2"abcd", m)
doAssert not "abcd".match(re2"abc", m)

Expand All @@ -590,7 +590,7 @@ func match*(

func match*(s: string, pattern: Regex2): bool {.raises: [].} =
debugCheckUtf8(s, pattern)
var m: RegexMatch2
var m = RegexMatch2()
result = matchImpl(s, pattern.toRegex, m)

when defined(noRegexOpt):
Expand Down Expand Up @@ -631,8 +631,8 @@ iterator findAll*(
debugCheckUtf8(s, pattern)
var i = start
var i2 = start-1
var m: RegexMatch2
var ms: RegexMatches2
var m = RegexMatch2()
var ms = RegexMatches2()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern.toRegex, ms, i)
Expand All @@ -649,6 +649,7 @@ func findAll*(
pattern: Regex2,
start = 0
): seq[RegexMatch2] {.raises: [].} =
result = newSeq[RegexMatch2]()
for m in findAll(s, pattern, start):
result.add m

Expand All @@ -670,7 +671,7 @@ iterator findAllBounds*(
debugCheckUtf8(s, pattern)
var i = start
var i2 = start-1
var ms: RegexMatches2
var ms = RegexMatches2()
let flags = {mfNoCaptures}
while i <= len(s):
doAssert(i > i2); i2 = i
Expand All @@ -687,6 +688,7 @@ func findAllBounds*(
pattern: Regex2,
start = 0
): seq[Slice[int]] {.raises: [].} =
result = newSeq[Slice[int]]()
for m in findAllBounds(s, pattern, start):
result.add m

Expand All @@ -699,7 +701,7 @@ func find*(
## search through the string looking for the first
## location where there is a match
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "abcd".find(re2"bc", m) and
m.boundaries == 1 .. 2
doAssert not "abcd".find(re2"de", m)
Expand Down Expand Up @@ -738,7 +740,7 @@ iterator split*(s: string, sep: Regex2): string {.inline, raises: [].} =
first, last, i = 0
i2 = -1
done = false
ms: RegexMatches2
ms = RegexMatches2()
flags = {mfNoCaptures}
while not done:
doAssert(i > i2); i2 = i
Expand All @@ -757,6 +759,7 @@ func split*(s: string, sep: Regex2): seq[string] {.raises: [].} =
doAssert split("11a22Ϊ33Ⓐ44弢55", re2"\d+") ==
@["", "a", "Ϊ", "Ⓐ", "弢", ""]

result = newSeq[string]()
for w in split(s, sep):
result.add w

Expand All @@ -770,12 +773,13 @@ func splitIncl*(s: string, sep: Regex2): seq[string] {.raises: [].} =

template ab: untyped = m.boundaries
debugCheckUtf8(s, sep)
result = newSeq[string]()
var
first, last, i = 0
i2 = -1
done = false
m: RegexMatch2
ms: RegexMatches2
m = RegexMatch2()
ms = RegexMatches2()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep.toRegex, ms, i)
Expand Down Expand Up @@ -967,7 +971,7 @@ proc toString(
proc toString(pattern: Regex2): string {.used.} =
## NFA to string representation.
## For debugging purposes
var visited: set[int16]
var visited: set[int16] = {}
result = pattern.toString(0, visited)

#
Expand Down Expand Up @@ -1013,6 +1017,7 @@ func group*(
func groupFirstCapture*(
m: RegexMatch, i: int, text: string
): string {.inline, raises: [], deprecated.} =
result = ""
for bounds in m.group i:
return text[bounds]

Expand Down Expand Up @@ -1083,7 +1088,7 @@ func match*(

func match*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use match(string, Regex2) instead".} =
debugCheckUtf8 s
var m: RegexMatch
var m = RegexMatch()
result = matchImpl(s, pattern, m)

iterator findAll*(
Expand All @@ -1094,8 +1099,8 @@ iterator findAll*(
debugCheckUtf8 s
var i = start
var i2 = start-1
var m: RegexMatch
var ms: RegexMatches
var m = RegexMatch()
var ms = RegexMatches()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern, ms, i)
Expand All @@ -1112,6 +1117,7 @@ func findAll*(
pattern: Regex,
start = 0
): seq[RegexMatch] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} =
result = newSeq[RegexMatch]()
for m in findAll(s, pattern, start):
result.add m

Expand All @@ -1123,7 +1129,7 @@ iterator findAllBounds*(
debugCheckUtf8 s
var i = start
var i2 = start-1
var ms: RegexMatches
var ms = RegexMatches()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern, ms, i)
Expand All @@ -1139,12 +1145,14 @@ func findAllBounds*(
pattern: Regex,
start = 0
): seq[Slice[int]] {.raises: [], deprecated: "use findAllBounds(string, Regex2) instead".} =
result = newSeq[Slice[int]]()
for m in findAllBounds(s, pattern, start):
result.add m

func findAndCaptureAll*(
s: string, pattern: Regex
): seq[string] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} =
result = newSeq[string]()
for m in s.findAll(pattern):
result.add s[m.boundaries]

Expand Down Expand Up @@ -1173,7 +1181,7 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated:
first, last, i = 0
i2 = -1
done = false
ms: RegexMatches
ms = RegexMatches()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep, ms, i)
Expand All @@ -1186,18 +1194,20 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated:
first = ab.b+1

func split*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use split(string, Regex2) instead".} =
result = newSeq[string]()
for w in split(s, sep):
result.add w

func splitIncl*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use splitIncl(string, Regex2) instead".} =
template ab: untyped = m.boundaries
debugCheckUtf8 s
result = newSeq[string]()
var
first, last, i = 0
i2 = -1
done = false
m: RegexMatch
ms: RegexMatches
m = RegexMatch()
ms = RegexMatches()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep, ms, i)
Expand Down Expand Up @@ -1231,7 +1241,7 @@ func endsWith*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use e
debugCheckUtf8 s
result = false
var
m: RegexMatch
m = default(RegexMatch)
i = 0
while i < s.len:
result = match(s, pattern, m, i)
Expand Down Expand Up @@ -1329,7 +1339,7 @@ proc toString(
proc toString(pattern: Regex): string {.used.} =
## NFA to string representation.
## For debugging purposes
var visited: set[int16]
var visited: set[int16] = {}
result = pattern.toString(0, visited)

{.pop.} # {.push warning[Deprecated]: off.}
Expand All @@ -1340,7 +1350,7 @@ when isMainModule:
import ./regex/dotgraph

func toAtoms(s: string): string =
var groups: GroupsCapture
var groups = default(GroupsCapture)
let atoms = s
.parse
.toAtoms(groups)
Expand Down Expand Up @@ -1403,7 +1413,7 @@ when isMainModule:
doAssert r"[[:xdigit:]]".toAtoms == "[[0-9a-fA-F]]"
doAssert r"[[:alpha:][:digit:]]".toAtoms == "[[a-zA-Z][0-9]]"

var m: RegexMatch2
var m = RegexMatch2()
#doAssert match("abc", re2(r"abc", {reAscii}), m)
doAssert match("abc", re2"abc", m)
doAssert match("ab", re2"a(b|c)", m)
Expand Down Expand Up @@ -1571,6 +1581,7 @@ when isMainModule:

# subset of tests.nim
proc raisesMsg(pattern: string): string =
result = ""
try:
discard re2(pattern)
except RegexError:
Expand All @@ -1582,7 +1593,7 @@ when isMainModule:
(proc() = body)()

test:
var m: RegexMatch2
var m = RegexMatch2()
doAssert match("ac", re2"a(b|c)", m)
doAssert(not match("ad", re2"a(b|c)", m))
doAssert match("ab", re2"(ab)*", m)
Expand Down Expand Up @@ -1692,7 +1703,7 @@ when isMainModule:
m.captures == @[0 .. 3, reNonCapture, reNonCapture]
block:
var m = false
var matches: seq[string]
var matches = newSeq[string]()
match "abc", rex"(\w+)":
doAssert matches == @["abc"]
m = true
Expand Down
1 change: 1 addition & 0 deletions src/regex/common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type
func verifyUtf8*(s: string): int =
## Return `-1` if `s` is a valid utf-8 string.
## Otherwise, return the index of the first bad char.
result = -1
var state = vusStart
var i = 0
let L = s.len
Expand Down
2 changes: 1 addition & 1 deletion src/regex/compiler.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ when defined(regexDotDir):
func reImpl*(s: string, flags: RegexFlags = {}): Regex =
if regexArbitraryBytes notin flags and verifyUtf8(s) != -1:
raise newException(RegexError, "Invalid utf-8 regex")
var groups: GroupsCapture
var groups = default(GroupsCapture)
let rpn = s
.parse(flags)
.transformExp(groups, flags)
Expand Down
1 change: 1 addition & 0 deletions src/regex/dotgraph.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import ./types
func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
doAssert not isEpsilonTransition(n)
doAssert nti <= n.next.len-1
result = newSeq[int]()
for i in nti+1 .. n.next.len-1:
if not isEpsilonTransition(nfa.s[n.next[i]]):
break
Expand Down
Loading