From 0e223b00edbb430c217840327f99dbb3b9f36b0d Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 12:32:35 -0300 Subject: [PATCH 1/8] wip --- src/regex/compiler.nim | 2 +- src/regex/nfamatch2.nim | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/regex/compiler.nim b/src/regex/compiler.nim index f025ffb..275976e 100644 --- a/src/regex/compiler.nim +++ b/src/regex/compiler.nim @@ -8,7 +8,7 @@ import ./litopt when defined(regexDotDir): import ./dotgraph -func reImpl*(s: string, flags: RegexFlags = {}): Regex {.inline.} = +func reImpl*(s: string, flags: RegexFlags = {}): Regex = if regexArbitraryBytes notin flags and verifyUtf8(s) != -1: raise newException(RegexError, "Invalid utf-8 regex") var groups: GroupsCapture diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim index 6c8384c..6f0f437 100644 --- a/src/regex/nfamatch2.nim +++ b/src/regex/nfamatch2.nim @@ -18,7 +18,7 @@ type look: var Lookaround, start: int, flags: MatchFlags - ): bool {.noSideEffect, raises: [].} + ): bool {.nimcall, noSideEffect, raises: [].} BehindSig = proc ( smA, smB: var Submatches, capts: var Capts3, @@ -28,7 +28,7 @@ type look: var Lookaround, start, limit: int, flags: MatchFlags - ): int {.noSideEffect, raises: [].} + ): int {.nimcall, noSideEffect, raises: [].} Lookaround* = object ahead*: AheadSig behind*: BehindSig From 1fd8c6d2eb90ac5fe041b641e0c4991b8a46166b Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 12:39:03 -0300 Subject: [PATCH 2/8] wip --- src/regex.nim | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/regex.nim b/src/regex.nim index 5bdbff3..9dafcd0 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -498,7 +498,7 @@ when not defined(forceRegexAtRuntime): func re2*( s: static string, flags: static RegexFlags = {} - ): static[Regex2] {.inline.} = + ): static[Regex2] = ## Parse and compile a regular expression at compile-time toRegex2 reCt(s, flags) @@ -575,7 +575,7 @@ func match*( pattern: Regex2, m: var RegexMatch2, start = 0 -): bool {.inline, raises: [].} = +): bool {.raises: [].} = ## return a match if the whole string ## matches the regular expression. This ## is similar to ``find(text, re"^regex$", m)`` @@ -588,7 +588,7 @@ func match*( debugCheckUtf8(s, pattern) result = matchImpl(s, pattern.toRegex, m, start) -func match*(s: string, pattern: Regex2): bool {.inline, raises: [].} = +func match*(s: string, pattern: Regex2): bool {.raises: [].} = debugCheckUtf8(s, pattern) var m: RegexMatch2 result = matchImpl(s, pattern.toRegex, m) @@ -648,7 +648,7 @@ func findAll*( s: string, pattern: Regex2, start = 0 -): seq[RegexMatch2] {.inline, raises: [].} = +): seq[RegexMatch2] {.raises: [].} = for m in findAll(s, pattern, start): result.add m @@ -686,7 +686,7 @@ func findAllBounds*( s: string, pattern: Regex2, start = 0 -): seq[Slice[int]] {.inline, raises: [].} = +): seq[Slice[int]] {.raises: [].} = for m in findAllBounds(s, pattern, start): result.add m @@ -695,7 +695,7 @@ func find*( pattern: Regex2, m: var RegexMatch2, start = 0 -): bool {.inline, raises: [].} = +): bool {.raises: [].} = ## search through the string looking for the first ## location where there is a match runnableExamples: @@ -715,7 +715,7 @@ func find*( return false # XXX find shortest match; disable captures -func contains*(s: string, pattern: Regex2): bool {.inline, raises: [].} = +func contains*(s: string, pattern: Regex2): bool {.raises: [].} = runnableExamples: doAssert re2"bc" in "abcd" doAssert re2"(23)+" in "23232" @@ -751,7 +751,7 @@ iterator split*(s: string, sep: Regex2): string {.inline, raises: [].} = yield substr(s, first, last-1) first = ab.b+1 -func split*(s: string, sep: Regex2): seq[string] {.inline, raises: [].} = +func split*(s: string, sep: Regex2): seq[string] {.raises: [].} = ## return not matched substrings runnableExamples: doAssert split("11a22Ϊ33Ⓐ44弢55", re2"\d+") == @@ -760,7 +760,7 @@ func split*(s: string, sep: Regex2): seq[string] {.inline, raises: [].} = for w in split(s, sep): result.add w -func splitIncl*(s: string, sep: Regex2): seq[string] {.inline, raises: [].} = +func splitIncl*(s: string, sep: Regex2): seq[string] {.raises: [].} = ## return not matched substrings, including captured groups runnableExamples: let @@ -795,7 +795,7 @@ func startsWith*( s: string, pattern: Regex2, start = 0 -): bool {.inline, raises: [].} = +): bool {.raises: [].} = ## return whether the string ## starts with the pattern or not runnableExamples: @@ -805,7 +805,7 @@ func startsWith*( debugCheckUtf8(s, pattern) startsWithImpl2(s, pattern.toRegex, start) -func endsWith*(s: string, pattern: Regex2): bool {.inline, raises: [].} = +func endsWith*(s: string, pattern: Regex2): bool {.raises: [].} = ## return whether the string ## ends with the pattern or not runnableExamples: @@ -842,7 +842,7 @@ func replace*( pattern: Regex2, by: string, limit = 0 -): string {.inline, raises: [ValueError].} = +): string {.raises: [ValueError].} = ## Replace matched substrings. ## ## Matched groups can be accessed with ``$N`` @@ -887,7 +887,7 @@ func replace*( pattern: Regex2, by: proc (m: RegexMatch2, s: string): string, limit = 0 -): string {.inline, raises: [], effectsOf: by.} = +): string {.raises: [], effectsOf: by.} = ## Replace matched substrings. ## ## If ``limit`` is given, at most ``limit`` @@ -988,7 +988,7 @@ func re*( when not defined(forceRegexAtRuntime): func re*( s: static string - ): static[Regex] {.inline, deprecated: "use re2(static string) instead".} = + ): static[Regex] {.deprecated: "use re2(static string) instead".} = reCt(s) func toPattern*( @@ -1077,11 +1077,11 @@ func match*( pattern: Regex, m: var RegexMatch, start = 0 -): bool {.inline, raises: [], deprecated: "use match(string, Regex2, var RegexMatch2) instead".} = +): bool {.raises: [], deprecated: "use match(string, Regex2, var RegexMatch2) instead".} = debugCheckUtf8 s result = matchImpl(s, pattern, m, start) -func match*(s: string, pattern: Regex): bool {.inline, raises: [], deprecated: "use match(string, Regex2) instead".} = +func match*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use match(string, Regex2) instead".} = debugCheckUtf8 s var m: RegexMatch result = matchImpl(s, pattern, m) @@ -1111,7 +1111,7 @@ func findAll*( s: string, pattern: Regex, start = 0 -): seq[RegexMatch] {.inline, raises: [], deprecated: "use findAll(string, Regex2) instead".} = +): seq[RegexMatch] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} = for m in findAll(s, pattern, start): result.add m @@ -1138,17 +1138,17 @@ func findAllBounds*( s: string, pattern: Regex, start = 0 -): seq[Slice[int]] {.inline, raises: [], deprecated: "use findAllBounds(string, Regex2) instead".} = +): seq[Slice[int]] {.raises: [], deprecated: "use findAllBounds(string, Regex2) instead".} = for m in findAllBounds(s, pattern, start): result.add m func findAndCaptureAll*( s: string, pattern: Regex -): seq[string] {.inline, raises: [], deprecated: "use findAll(string, Regex2) instead".} = +): seq[string] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} = for m in s.findAll(pattern): result.add s[m.boundaries] -func contains*(s: string, pattern: Regex): bool {.inline, raises: [], deprecated: "use contains(string, Regex2) instead".} = +func contains*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use contains(string, Regex2) instead".} = for _ in findAllBounds(s, pattern): return true return false @@ -1158,7 +1158,7 @@ func find*( pattern: Regex, m: var RegexMatch, start = 0 -): bool {.inline, raises: [], deprecated: "use find(string, Regex2, var RegexMatch2) instead".} = +): bool {.raises: [], deprecated: "use find(string, Regex2, var RegexMatch2) instead".} = m.clear() for m2 in findAll(s, pattern, start): m.captures.add m2.captures @@ -1185,11 +1185,11 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated: yield substr(s, first, last-1) first = ab.b+1 -func split*(s: string, sep: Regex): seq[string] {.inline, raises: [], deprecated: "use split(string, Regex2) instead".} = +func split*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use split(string, Regex2) instead".} = for w in split(s, sep): result.add w -func splitIncl*(s: string, sep: Regex): seq[string] {.inline, raises: [], deprecated: "use splitIncl(string, Regex2) instead".} = +func splitIncl*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use splitIncl(string, Regex2) instead".} = template ab: untyped = m.boundaries debugCheckUtf8 s var @@ -1215,7 +1215,7 @@ func splitIncl*(s: string, sep: Regex): seq[string] {.inline, raises: [], deprec func startsWith*( s: string, pattern: Regex, start = 0 -): bool {.inline, raises: [], deprecated: "use startsWith(string, Regex2) instead".} = +): bool {.raises: [], deprecated: "use startsWith(string, Regex2) instead".} = debugCheckUtf8 s startsWithImpl(s, pattern, start) @@ -1227,7 +1227,7 @@ template runeIncAt(s: string, n: var int) = else: n = s.len+1 -func endsWith*(s: string, pattern: Regex): bool {.inline, raises: [], deprecated: "use endsWith(string, Regex2) instead".} = +func endsWith*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use endsWith(string, Regex2) instead".} = debugCheckUtf8 s result = false var @@ -1263,7 +1263,7 @@ func replace*( pattern: Regex, by: string, limit = 0 -): string {.inline, raises: [ValueError], deprecated: "use replace(string, Regex2, string) instead".} = +): string {.raises: [ValueError], deprecated: "use replace(string, Regex2, string) instead".} = debugCheckUtf8 s result = "" var @@ -1289,7 +1289,7 @@ func replace*( pattern: Regex, by: proc (m: RegexMatch, s: string): string, limit = 0 -): string {.inline, raises: [], effectsOf: by, deprecated: "use replace(string, Regex2, proc(RegexMatch2, string): string) instead".} = +): string {.raises: [], effectsOf: by, deprecated: "use replace(string, Regex2, proc(RegexMatch2, string): string) instead".} = debugCheckUtf8 s result = "" var i, j = 0 From 9b0d871af63459c08b1358c75cede974ce05ca57 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 12:52:59 -0300 Subject: [PATCH 3/8] wip --- src/regex/nfatype.nim | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index ceb30e8..62e0c2b 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -263,13 +263,11 @@ when defined(js) and (NimMajor, NimMinor) >= (1, 6) and (NimMajor, NimMinor) <= #flags*: set[RegexFlag] litOpt*: LitOpt - {.push inline, noSideEffect.} - converter toRegex2*(r: Regex): Regex2 = + func toRegex2*(r: Regex): Regex2 = Regex2(nfa: r.nfa, groupsCount: r.groupsCount, namedGroups: r.namedGroups, litOpt: r.litOpt) - converter toRegex*(r: Regex2): Regex = + func toRegex*(r: Regex2): Regex = Regex(nfa: r.nfa, groupsCount: r.groupsCount, namedGroups: r.namedGroups, litOpt: r.litOpt) - {.pop.} else: type Regex2* = distinct Regex From 9f3ab57a2fe5b2226c8152fc53f0ba6624fbb893 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 13:33:33 -0300 Subject: [PATCH 4/8] wip --- src/regex.nim | 2 +- src/regex/nfatype.nim | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/regex.nim b/src/regex.nim index 9dafcd0..aea028b 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -674,7 +674,7 @@ iterator findAllBounds*( let flags = {mfNoCaptures} while i <= len(s): doAssert(i > i2); i2 = i - i = findSomeOptTpl(s, pattern.toRegex, ms, i, flags) + i = findSomeOptTpl(s, pattern.toRegex, ms, i) #debugEcho i if i < 0: break for ab in ms.bounds: diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index 62e0c2b..f4bdb2f 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -49,10 +49,9 @@ func `[]`*(capts: var Capts3, i, j: Natural): var Slice[int] {.inline.} = doAssert j <= capts.blockSize-1 result = capts.s[(i shl capts.blockSizeL2) + j] # i * blockSize -func `[]=`(capts: var Capts3, i, j: Natural, x: Slice[int]) {.inline.} = - doAssert i <= capts.len-1 - doAssert j <= capts.blockSize-1 - capts.s[(i shl capts.blockSizeL2) + j] = x +func blockIdx(capts: Capts3, blockNum: Natural): int {.inline.} = + assert blockNum <= capts.len-1 + blockNum shl capts.blockSizeL2 when defined(js): func jsLog2(x: Natural): int {.importjs: "Math.log2(@)".} @@ -133,18 +132,21 @@ func unfreeze*(capts: var Capts3, freezeId: CaptState) = func diverge*(capts: var Capts3, captIdx: CaptIdx): CaptIdx = if capts.free.len > 0: result = capts.free.pop - for i in 0 .. capts.blockSize-1: - capts[result, i] = nonCapture capts.states[result].to stsInitial else: result = capts.len.CaptIdx - for _ in 0 .. capts.blockSize-1: - capts.s.add nonCapture + capts.s.setLen(capts.s.len+capts.blockSize) capts.states.add stsInitial doAssert result == capts.states.len-1 if captIdx != -1: + let idx = capts.blockIdx(result) + let cidx = capts.blockIdx(captIdx) + for i in 0 .. capts.blockSize-1: + capts.s[idx+i] = capts.s[cidx+i] + else: + let idx = capts.blockIdx(result) for i in 0 .. capts.blockSize-1: - capts[result, i] = capts[captIdx, i] + capts.s[idx+i] = nonCapture func recycle*(capts: var Capts3) = ## Free recyclable entries @@ -372,13 +374,13 @@ func setLen*(item: var SmLookaroundItem, size: int) {.inline.} = item.a.setLen size item.b.setLen size -template last*(sm: var SmLookaround): untyped = +template last*(sm: SmLookaround): untyped = sm.s[sm.i-1] -template lastA*(sm: var SmLookaround): untyped = +template lastA*(sm: SmLookaround): untyped = last(sm).a -template lastB*(sm: var SmLookaround): untyped = +template lastB*(sm: SmLookaround): untyped = last(sm).b func grow*(sm: var SmLookaround) {.inline.} = From ffbfaa1ef9f8f9bfffd26d70f6158ca4b25d64e5 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 13:38:43 -0300 Subject: [PATCH 5/8] wip --- src/regex/nfatype.nim | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index f4bdb2f..76f1947 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -394,6 +394,10 @@ func removeLast*(sm: var SmLookaround) {.inline.} = sm.i -= 1 when isMainModule: + func `[]=`(capts: var Capts3, i, j: Natural, x: Slice[int]) = + doAssert i <= capts.len-1 + doAssert j <= capts.blockSize-1 + capts.s[(i shl capts.blockSizeL2) + j] = x block: var capts = initCapts3(2) doAssert capts.len == 0 From d29157239ff797fa81a8a08ecd11156ee53aebd6 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 17:41:08 -0300 Subject: [PATCH 6/8] wip --- src/regex.nim | 2 +- src/regex/exptransformation.nim | 7 ++++--- src/regex/nfa.nim | 7 ++++--- src/regex/nfatype.nim | 3 +-- src/regex/nodematch.nim | 19 ++++++++++++------- src/regex/parser.nim | 7 ++++--- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/regex.nim b/src/regex.nim index aea028b..9dafcd0 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -674,7 +674,7 @@ iterator findAllBounds*( let flags = {mfNoCaptures} while i <= len(s): doAssert(i > i2); i2 = i - i = findSomeOptTpl(s, pattern.toRegex, ms, i) + i = findSomeOptTpl(s, pattern.toRegex, ms, i, flags) #debugEcho i if i < 0: break for ab in ms.bounds: diff --git a/src/regex/exptransformation.nim b/src/regex/exptransformation.nim index 520848a..b643e5a 100644 --- a/src/regex/exptransformation.nim +++ b/src/regex/exptransformation.nim @@ -20,9 +20,10 @@ func swapCase(r: Rune): Rune = else: result = r -func check(cond: bool, msg: string) = - if not cond: - raise newException(RegexError, msg) +template check(cond, msg: untyped): untyped = + {.line: instantiationInfo(fullPaths = true).}: + if not cond: + raise newException(RegexError, msg) func fixEmptyOps(exp: Exp): Exp = ## Handle "|", "(|)", "a|", "|b", "||", "a||b", ... diff --git a/src/regex/nfa.nim b/src/regex/nfa.nim index 312e9b8..e31534a 100644 --- a/src/regex/nfa.nim +++ b/src/regex/nfa.nim @@ -4,9 +4,10 @@ import std/algorithm import ./types import ./common -func check(cond: bool, msg: string) = - if not cond: - raise newException(RegexError, msg) +template check(cond, msg: untyped): untyped = + {.line: instantiationInfo(fullPaths = true).}: + if not cond: + raise newException(RegexError, msg) type End = seq[int16] diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index 76f1947..981a1ef 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -138,13 +138,12 @@ func diverge*(capts: var Capts3, captIdx: CaptIdx): CaptIdx = capts.s.setLen(capts.s.len+capts.blockSize) capts.states.add stsInitial doAssert result == capts.states.len-1 + let idx = capts.blockIdx(result) if captIdx != -1: - let idx = capts.blockIdx(result) let cidx = capts.blockIdx(captIdx) for i in 0 .. capts.blockSize-1: capts.s[idx+i] = capts.s[cidx+i] else: - let idx = capts.blockIdx(result) for i in 0 .. capts.blockSize-1: capts.s[idx+i] = nonCapture diff --git a/src/regex/nodematch.nim b/src/regex/nodematch.nim index f4cfc12..b03bf65 100644 --- a/src/regex/nodematch.nim +++ b/src/regex/nodematch.nim @@ -1,4 +1,4 @@ -import std/unicode +import std/unicode except `==` import std/sets import pkg/unicodedb/properties @@ -7,6 +7,9 @@ import pkg/unicodedb/types as utypes import ./types import ./common +func `==`(a, b: Rune): bool {.inline.} = + a.int32 == b.int32 + func isWord(r: Rune): bool {.inline.} = utmWord in unicodeTypes(r) @@ -16,7 +19,7 @@ func isDecimal(r: Rune): bool {.inline.} = func isWordAscii(r: Rune): bool {.inline.} = ## return ``true`` if the given ## rune is in ``[A-Za-z0-9]`` range - case r.int + case r.int32 of 'A'.ord .. 'Z'.ord, 'a'.ord .. 'z'.ord, '0'.ord .. '9'.ord, @@ -26,8 +29,8 @@ func isWordAscii(r: Rune): bool {.inline.} = false template isWordBoundaryImpl(r, nxt, isWordProc): bool = - (r.int > -1 and isWordProc(r)) xor - (nxt.int > -1 and isWordProc(nxt)) + (r.int32 > -1 and isWordProc(r)) xor + (nxt.int32 > -1 and isWordProc(nxt)) func isWordBoundary(r: Rune, nxt: Rune): bool {.inline.} = ## check if current match @@ -77,7 +80,7 @@ func isWhiteSpace(r: Rune): bool {.inline.} = utmWhiteSpace in unicodeTypes(r) func isWhiteSpaceAscii(r: Rune): bool {.inline.} = - case r.int + case r.int32 of ' '.ord, '\t'.ord, '\L'.ord, @@ -89,7 +92,7 @@ func isWhiteSpaceAscii(r: Rune): bool {.inline.} = false func isDigitAscii(r: Rune): bool {.inline.} = - case r.int + case r.int32 of '0'.ord .. '9'.ord: true else: @@ -107,8 +110,10 @@ func match*(n: Node, r: Rune): bool {.inline.} = ## match for ``Node`` of matchable kind. ## Return whether the node matches ## the current character or not - if r.int < 0: + if r.int32 < 0: return n.kind == reEOE + if n.kind == reChar: + return n.cp == r case n.kind of reEOE: r == invalidRune diff --git a/src/regex/parser.nim b/src/regex/parser.nim index f067505..fe5ba3d 100644 --- a/src/regex/parser.nim +++ b/src/regex/parser.nim @@ -10,9 +10,10 @@ import ./types import ./common import ./scanner -func check(cond: bool, msg: string) {.inline.} = - if not cond: - raise newException(RegexError, msg) +template check(cond, msg: untyped): untyped = + {.line: instantiationInfo(fullPaths = true).}: + if not cond: + raise newException(RegexError, msg) func isAsciiPrintable(s: string): bool = result = true From 0d9db5b3b2fb8536f23d83c97d733e41ce367446 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 17:50:37 -0300 Subject: [PATCH 7/8] wip --- src/regex/parser.nim | 66 +++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/regex/parser.nim b/src/regex/parser.nim index fe5ba3d..7bd5332 100644 --- a/src/regex/parser.nim +++ b/src/regex/parser.nim @@ -10,11 +10,6 @@ import ./types import ./common import ./scanner -template check(cond, msg: untyped): untyped = - {.line: instantiationInfo(fullPaths = true).}: - if not cond: - raise newException(RegexError, msg) - func isAsciiPrintable(s: string): bool = result = true for c in s.runes: @@ -24,33 +19,42 @@ func isAsciiPrintable(s: string): bool = else: return false -func check(cond: bool, msg: string, at: int, exp: string) = - if not cond: - # todo: overflow checks - const spaces = repeat(' ', "\n".len) - var exp = exp.replace("\n", spaces) - var start = max(0, at-15) - var mark = at - var expMsg = msg - expMsg.add("\n") - if not exp.runeSubStr(start, at-1).isAsciiPrintable: - start = at-1 - let cleft = "~$# chars~" %% $start - mark = cleft.len+1 - expMsg.add(cleft) - elif start > 0: - let cleft = "~$# chars~" %% $start - mark = cleft.len+15 - expMsg.add(cleft) - expMsg.add(exp.runeSubStr(start, 30)) - if start+30 < exp.len: - expMsg.add("~$# chars~" %% $(exp.len - start - 30)) - expMsg.add("\n") - expMsg.add(strutils.align("^", mark)) - raise newException(RegexError, expMsg) +template check(cond, msg: untyped): untyped = + {.line: instantiationInfo(fullPaths = true).}: + if not cond: + raise newException(RegexError, msg) + +func formatMsg(msg: string, at: int, exp: string): string = + # todo: overflow checks + const spaces = repeat(' ', "\n".len) + var exp = exp.replace("\n", spaces) + var start = max(0, at-15) + var mark = at + var expMsg = msg + expMsg.add("\n") + if not exp.runeSubStr(start, at-1).isAsciiPrintable: + start = at-1 + let cleft = "~$# chars~" %% $start + mark = cleft.len+1 + expMsg.add(cleft) + elif start > 0: + let cleft = "~$# chars~" %% $start + mark = cleft.len+15 + expMsg.add(cleft) + expMsg.add(exp.runeSubStr(start, 30)) + if start+30 < exp.len: + expMsg.add("~$# chars~" %% $(exp.len - start - 30)) + expMsg.add("\n") + expMsg.add(strutils.align("^", mark)) -template prettyCheck(cond: bool, msg: string) {.dirty.} = - check(cond, msg, startPos, sc.raw) +template check(cond, msg, at, exp: untyped): untyped = + {.line: instantiationInfo(fullPaths = true).}: + if not cond: + raise newException(RegexError, formatMsg(msg, at, exp)) + +template prettyCheck(cond, msg: untyped): untyped {.dirty.} = + {.line: instantiationInfo(fullPaths = true).}: + check(cond, msg, startPos, sc.raw) func toShorthandNode(r: Rune): Node = ## the given character must be a shorthand or From d3c4d9645538dadd3ba2ba9876233faa1419d685 Mon Sep 17 00:00:00 2001 From: nitely Date: Sat, 23 Nov 2024 17:55:07 -0300 Subject: [PATCH 8/8] wip --- src/regex/parser.nim | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/regex/parser.nim b/src/regex/parser.nim index 7bd5332..4e41af0 100644 --- a/src/regex/parser.nim +++ b/src/regex/parser.nim @@ -27,25 +27,25 @@ template check(cond, msg: untyped): untyped = func formatMsg(msg: string, at: int, exp: string): string = # todo: overflow checks const spaces = repeat(' ', "\n".len) - var exp = exp.replace("\n", spaces) + let exp = exp.replace("\n", spaces) var start = max(0, at-15) var mark = at - var expMsg = msg - expMsg.add("\n") + result = msg + result.add("\n") if not exp.runeSubStr(start, at-1).isAsciiPrintable: start = at-1 let cleft = "~$# chars~" %% $start mark = cleft.len+1 - expMsg.add(cleft) + result.add(cleft) elif start > 0: let cleft = "~$# chars~" %% $start mark = cleft.len+15 - expMsg.add(cleft) - expMsg.add(exp.runeSubStr(start, 30)) + result.add(cleft) + result.add(exp.runeSubStr(start, 30)) if start+30 < exp.len: - expMsg.add("~$# chars~" %% $(exp.len - start - 30)) - expMsg.add("\n") - expMsg.add(strutils.align("^", mark)) + result.add("~$# chars~" %% $(exp.len - start - 30)) + result.add("\n") + result.add(strutils.align("^", mark)) template check(cond, msg, at, exp: untyped): untyped = {.line: instantiationInfo(fullPaths = true).}: