Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ tests/test_bug
docs/ugh
bin/*
bench/bench
config.nims
2 changes: 1 addition & 1 deletion regex.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ srcDir = "src"
skipDirs = @["tests", "bench", "docs"]

requires "nim >= 1.6.0"
requires "unicodedb >= 0.7.2"
requires "unicodedb >= 0.13.1"

template execTest(lang, target: static string) =
doAssert lang in ["c", "js"]
Expand Down
13 changes: 7 additions & 6 deletions src/regex/exptransformation.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import std/sets
import std/tables
import std/algorithm

import pkg/unicodedb/casing

import ./exptype
import ./types
import ./common
import ./scanner

# todo: can not use unicodeplus due to
# https://github.com/nim-lang/Nim/issues/7059
func swapCase(r: Rune): Rune =
# Note a character can be
# non-lower and non-upper
Expand Down Expand Up @@ -178,10 +178,12 @@ func applyFlag(n: var Node, f: Flag) =
else:
discard
of flagCaseInsensitive:
if n.kind == reChar and n.cp != n.cp.swapCase():
if n.kind == reChar and n.cp.hasCaseFolds:
n.kind = reCharCI
n.cp = n.cp.simpleCaseFold
# todo: apply recursevely to
# shorthands of reInSet/reNotSet (i.e: [:ascii:])
# XXX add all casefolds that map to the cp instead of swapCase
if n.kind in {reInSet, reNotSet}:
var cps = newSeq[Rune]()
for cp in items n.cps:
Expand All @@ -190,9 +192,8 @@ func applyFlag(n: var Node, f: Flag) =
cps.add cp2
n.cps.add cps
for sl in n.ranges[0 .. ^1]:
let
cpa = sl.a.swapCase()
cpb = sl.b.swapCase()
let cpa = sl.a.swapCase()
let cpb = sl.b.swapCase()
if sl.a != cpa and sl.b != cpb:
n.ranges.add(cpa .. cpb)
of flagUnGreedy:
Expand Down
4 changes: 3 additions & 1 deletion src/regex/nfamacro.nim
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import std/tables
import std/sets
import std/algorithm

import pkg/unicodedb/casing
import pkg/unicodedb/properties
import pkg/unicodedb/types as utypes

Expand Down Expand Up @@ -124,7 +125,8 @@ func genMatch(c: NimNode, n: Node): NimNode =
quote do: true
of reCharCI:
let cp2Lit = newLit n.cp.swapCase().int32
quote do: `c` == `cpLit` or `c` == `cp2Lit`
let cp3Lit = newLit n.cp.simpleCaseFold().int32
quote do: `c` == `cpLit` or `c` == `cp2Lit` or simpleCaseFold(`c`) == Rune(`cp3Lit`)
of reWordAscii:
genWordAsciiMatch(c)
of reNotAlphaNumAscii:
Expand Down
11 changes: 2 additions & 9 deletions src/regex/nodematch.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import std/unicode except `==`

import pkg/unicodedb/casing
import pkg/unicodedb/properties
import pkg/unicodedb/types as utypes

Expand Down Expand Up @@ -97,14 +98,6 @@ func isDigitAscii(r: Rune): bool {.inline.} =
else:
false

# todo: can not use unicodeplus due to
# https://github.com/nim-lang/Nim/issues/7059
func swapCase*(r: Rune): Rune =
result = r.toLower()
if result != r:
return
result = r.toUpper()

func matchAsciiSet(n: Node, r: Rune): bool =
assert n.shorthands.len == 0
result = r in n.cps or
Expand Down Expand Up @@ -162,7 +155,7 @@ func match*(n: Node, r: Rune): bool {.inline.} =
of reNotWhiteSpace: not r.isWhiteSpace()
of reAny: r != lineBreakRune
of reAnyNL: true
of reCharCI: r == n.cp or r == n.cp.swapCase()
of reCharCI: r == n.cp or n.cp == r.simpleCaseFold
of reUCC: r.unicodeCategory() in n.cc
of reNotUCC: r.unicodeCategory() notin n.cc
of reWordAscii: r.isWordAscii()
Expand Down
23 changes: 23 additions & 0 deletions tests/tests_misc.nim
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func findAllCapt(s: string, reg: Regex2): seq[seq[Slice[int]]] =
result = map(
findAll(s, reg),
func (m: RegexMatch2): seq[Slice[int]] =
result = newSeq[Slice[int]]()
for i in 0 .. m.groupsCount-1:
result.add m.group(i))

Expand Down Expand Up @@ -696,3 +697,25 @@ test "rust_regression":
check findAllBounds(r"hiya \N{snowman} bye", re2"(\\N\{[^}]+})|([{}])") == @[5 .. 15]
check findAllCapt(r"hiya \N{snowman} bye", re2"(\\N\{[^}]+})|([{}])") ==
@[@[5 .. 15, nonCapture]]

# https://github.com/BurntSushi/rebar/pull/20
test "rebar":
block:
check match("ſ", re2(r"s", {regexCaseless}))
check match("s", re2(r"ſ", {regexCaseless}))
check match("ſ", re2(r"S", {regexCaseless}))
check match("S", re2(r"ſ", {regexCaseless}))
check "ſ".len == 2
check findAllBounds("ſ", re2(r"s", {regexCaseless})) == @[0 .. 1]
check findAllBounds("s", re2(r"ſ", {regexCaseless})) == @[0 .. 0]
check findAllBounds("ſ", re2(r"S", {regexCaseless})) == @[0 .. 1]
check findAllBounds("S", re2(r"ſ", {regexCaseless})) == @[0 .. 0]
# XXX fix
#check match("s", re2(r"[ſ]", {regexCaseless}))
#check match("ſ", re2(r"[s]", {regexCaseless}))
check match("a", re2(r"A", {regexCaseless}))
check match("A", re2(r"a", {regexCaseless}))
check match("@", re2(r"@", {regexCaseless}))
check findAllBounds("a", re2(r"A", {regexCaseless})) == @[0 .. 0]
check findAllBounds("A", re2(r"a", {regexCaseless})) == @[0 .. 0]
check findAllBounds("@", re2(r"@", {regexCaseless})) == @[0 .. 0]
Loading