Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 58 additions & 46 deletions src/regex/nodematch.nim
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,45 @@ func swapCase*(r: Rune): Rune =
return
result = r.toUpper()

func matchAsciiSet(n: Node, r: Rune): bool =
assert n.shorthands.len == 0
result = r in n.cps or
r in n.ranges
result = (result and n.kind == reInSet) or
(not result and n.kind == reNotSet)

func matchShorthand(n: Node, r: Rune): bool =
case n.kind
of reWord: r.isWord()
of reNotAlphaNum: not r.isWord()
of reDigit: r.isDecimal()
of reNotDigit: not r.isDecimal()
of reWhiteSpace: r.isWhiteSpace()
of reNotWhiteSpace: not r.isWhiteSpace()
of reUCC: r.unicodeCategory() in n.cc
of reNotUCC: r.unicodeCategory() notin n.cc
of reWordAscii: r.isWordAscii()
of reNotAlphaNumAscii: not r.isWordAscii()
of reDigitAscii: r.isDigitAscii()
of reNotDigitAscii: not r.isDigitAscii()
of reWhiteSpaceAscii: r.isWhiteSpaceAscii()
of reNotWhiteSpaceAscii: not r.isWhiteSpaceAscii()
of reInSet, reNotSet: matchAsciiSet(n, r)
else:
doAssert false
false

func matchSet(n: Node, r: Rune): bool =
result = r in n.cps or
r in n.ranges
if not result:
for nn in n.shorthands:
result = matchShorthand(nn, r)
if result:
break
result = (result and n.kind == reInSet) or
(not result and n.kind == reNotSet)

func match*(n: Node, r: Rune): bool {.inline.} =
## match for ``Node`` of matchable kind.
## Return whether the node matches
Expand All @@ -115,52 +154,25 @@ func match*(n: Node, r: Rune): bool {.inline.} =
if n.kind == reChar:
return n.cp == r
case n.kind
of reEOE:
r == invalidRune
of reWord:
r.isWord()
of reNotAlphaNum:
not r.isWord()
of reDigit:
r.isDecimal()
of reNotDigit:
not r.isDecimal()
of reWhiteSpace:
r.isWhiteSpace()
of reNotWhiteSpace:
not r.isWhiteSpace()
of reInSet, reNotSet:
var matches = (
r in n.cps or
r in n.ranges)
if not matches:
for nn in n.shorthands:
matches = nn.match(r)
if matches: break
((matches and n.kind == reInSet) or
(not matches and n.kind == reNotSet))
of reAny:
r != lineBreakRune
of reAnyNL:
true
of reCharCI:
r == n.cp or r == n.cp.swapCase()
of reWordAscii:
r.isWordAscii()
of reDigitAscii:
r.isDigitAscii()
of reWhiteSpaceAscii:
r.isWhiteSpaceAscii()
of reUCC:
r.unicodeCategory() in n.cc
of reNotAlphaNumAscii:
not r.isWordAscii()
of reNotDigitAscii:
not r.isDigitAscii()
of reNotWhiteSpaceAscii:
not r.isWhiteSpaceAscii()
of reNotUCC:
r.unicodeCategory() notin n.cc
of reEOE: r == invalidRune
of reWord: r.isWord()
of reNotAlphaNum: not r.isWord()
of reDigit: r.isDecimal()
of reNotDigit: not r.isDecimal()
of reWhiteSpace: r.isWhiteSpace()
of reNotWhiteSpace: not r.isWhiteSpace()
of reAny: r != lineBreakRune
of reAnyNL: true
of reCharCI: r == n.cp or r == n.cp.swapCase()
of reUCC: r.unicodeCategory() in n.cc
of reNotUCC: r.unicodeCategory() notin n.cc
of reWordAscii: r.isWordAscii()
of reNotAlphaNumAscii: not r.isWordAscii()
of reDigitAscii: r.isDigitAscii()
of reNotDigitAscii: not r.isDigitAscii()
of reWhiteSpaceAscii: r.isWhiteSpaceAscii()
of reNotWhiteSpaceAscii: not r.isWhiteSpaceAscii()
of reInSet, reNotSet: matchSet(n, r)
else:
assert n.kind == reChar
n.cp == r
3 changes: 1 addition & 2 deletions src/regex/parser.nim
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,7 @@ func parseSetEscapedSeq(sc: Scanner[Rune]): Node =

func parseAsciiSet(sc: Scanner[Rune]): Node =
## Parse an ascii set (i.e: ``[:ascii:]``).
## The ascii set will get expanded
## and merged with the outer set
## An expanded ascii set is returned.
let startPos = sc.pos
assert sc.peek == ":".toRune
discard sc.next()
Expand Down
2 changes: 2 additions & 0 deletions tests/tests.nim
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ proc raises(pattern: string): bool =
result = true

proc raisesMsg(pattern: string): string =
result = ""
try:
discard pattern.re()
except RegexError:
Expand Down Expand Up @@ -71,6 +72,7 @@ func findAllCapt(s: string, reg: Regex): seq[seq[seq[Slice[int]]]] =
result = map(
findAll(s, reg),
func (m: RegexMatch): seq[seq[Slice[int]]] =
result = newSeq[seq[Slice[int]]]()
for i in 0 .. m.groupsCount-1:
result.add m.group(i))

Expand Down
15 changes: 9 additions & 6 deletions tests/tests2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ proc raises(pattern: string): bool =
result = true

proc raisesMsg(pattern: string): string =
result = ""
try:
discard pattern.re2()
except RegexError:
Expand All @@ -52,7 +53,7 @@ proc raisesMsg(pattern: string): string =
proc matchWithCapt(s: string, pattern: static Regex2): seq[string] =
var m = RegexMatch2()
check match(s, pattern, m)
result.setLen m.captures.len
result = newSeq[string](m.captures.len)
for i, bounds in m.captures.pairs:
result[i] = s[bounds]

Expand All @@ -62,7 +63,7 @@ proc matchWithBounds(s: string, pattern: static Regex2): seq[Slice[int]] =
return m.captures

proc toStrCaptures(m: RegexMatch2, s: string): seq[string] =
result.setLen m.captures.len
result = newSeq[string](m.captures.len)
for i, bounds in m.captures.pairs:
result[i] = s[bounds]

Expand All @@ -75,6 +76,7 @@ func findAllCapt(s: string, reg: Regex2): seq[seq[Slice[int]]] =
result = map(
findAll(s, reg),
func (m: RegexMatch2): seq[Slice[int]] =
result = newSeq[Slice[int]]()
for i in 0 .. m.groupsCount-1:
result.add m.group(i))

Expand Down Expand Up @@ -104,17 +106,18 @@ template matchMacro(s, r: untyped): untyped =

template matchMacroCapt(s, r: untyped): untyped =
(func (): seq[string] =
result = newSeq[string]()
var m = false
let exp = s
match exp, r:
m = true
result = matches
result.add matches
check m)()

test "tmatch_macro":
block hasOwnScope:
var m = false
var matches: seq[string]
var matches = newSeq[string]()
match "abc", rex"(\w+)":
check matches == @["abc"]
m = true
Expand Down Expand Up @@ -2242,7 +2245,7 @@ test "treuse_regex_match":

test "tisInitialized":
block:
var re: Regex2
var re = default(Regex2)
check(not re.isInitialized)
re = re2"foo"
check re.isInitialized
Expand Down Expand Up @@ -3092,7 +3095,7 @@ test "tverifyutf8":
raisesInvalidUtf8 endsWith("\xff", re2"abc")
raisesInvalidUtf8 replace("\xff", re2"abc", "abc")
raisesInvalidUtf8 replace("\xff", re2"abc",
(proc (m: RegexMatch2, s: string): string = discard))
(proc (m: RegexMatch2, s: string): string = return ""))
raisesInvalidUtf8 escapeRe("\xff")

# bug: raises invalid utf8 regex in Nim 1.0 + js target
Expand Down
Loading