diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe0444f..4e2c11f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - nim: [1.6.18, 2.0.0, 2.2.0] + nim: [1.6.18, 1.6.20, 2.0.0, 2.0.14, 2.2.0] steps: - uses: actions/checkout@v2 - name: Run Tests diff --git a/bench/bench.nim b/bench/bench.nim index 8585659..937676c 100644 --- a/bench/bench.nim +++ b/bench/bench.nim @@ -244,4 +244,8 @@ when isMainModule: # open the log with KCachegrind $ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2 + +# Bench + +$ nim c -r --threads:off -d:danger --mm:arc -o:bin/bench bench/bench.nim ]# diff --git a/src/regex/nfafindall.nim b/src/regex/nfafindall.nim index 40bc480..4aa4f0c 100644 --- a/src/regex/nfafindall.nim +++ b/src/regex/nfafindall.nim @@ -19,7 +19,7 @@ type s: seq[MatchItem] i: int RegexMatches* = object - a, b: Submatches + a, b: Pstates m: Matches c: Capts look: Lookaround @@ -46,22 +46,18 @@ func add(ms: var Matches, m: MatchItem) {.inline.} = func clear(ms: var Matches) {.inline.} = ms.i = 0 -template initMaybeImpl( +func initMaybeImpl( ms: var RegexMatches, size: int -) = - if ms.a == nil: - assert ms.b == nil - ms.a = newSubmatches size - ms.b = newSubmatches size - ms.look = initLook() - doAssert ms.a.cap >= size and - ms.b.cap >= size +) {.inline.} = + ms.a.reset size + ms.b.reset size + ms.look = initLook() -template initMaybeImpl( +func initMaybeImpl( ms: var RegexMatches, regex: Regex -) = +) {.inline.} = initMaybeImpl(ms, regex.nfa.s.len) func hasMatches(ms: RegexMatches): bool {.inline.} = @@ -130,7 +126,7 @@ func submatch( while nti < L: let isEoe = ntn.kind == reEoe let nt0 = nt - matched = not smB.hasState(nt) and + matched = nt notin smB and (ntn.match(c.Rune) or ntn.kind == reEoe) inc nti captx = capt @@ -158,10 +154,10 @@ func submatch( smA.clear() if not eoeFound: eoeFound = true - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) smi = -1 break - smB.add (nt0, captx, bounds.a .. i-1) + smB.add initPstate(nt0, captx, bounds.a .. i-1) inc smi swap smA, smB @@ -181,7 +177,7 @@ func findSomeImpl*( i = start.int iPrev = start.int optFlag = mfFindMatchOpt in flags - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) if start-1 in 0 .. text.len-1: cPrev = bwRuneAt(text, start-1).int32 while i < text.len: @@ -200,7 +196,7 @@ func findSomeImpl*( # else: # XXX clear captures if optFlag: return i - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) iPrev = i cPrev = c.int32 submatch(ms, text, regex, iPrev, cPrev, -1'i32) diff --git a/src/regex/nfafindall2.nim b/src/regex/nfafindall2.nim index 0507cc7..e8d950b 100644 --- a/src/regex/nfafindall2.nim +++ b/src/regex/nfafindall2.nim @@ -54,28 +54,24 @@ type bounds: Bounds Matches = seq[MatchItem] RegexMatches2* = object - a, b: Submatches + a, b: Pstates m: Matches c: Capts3 look: Lookaround -template initMaybeImpl( +func initMaybeImpl( ms: var RegexMatches2, size, groupsLen: int -) = - if ms.a == nil: - assert ms.b == nil - ms.a = newSubmatches size - ms.b = newSubmatches size - ms.c = initCapts3 groupsLen - ms.look = initLook() - doAssert ms.a.cap >= size and - ms.b.cap >= size +) {.inline.} = + ms.a.reset(size) + ms.b.reset(size) + ms.c.reset(groupsLen) + ms.look = initLook() -template initMaybeImpl( +func initMaybeImpl( ms: var RegexMatches2, regex: Regex -) = +) {.inline.} = initMaybeImpl(ms, regex.nfa.s.len, regex.groupsCount) func add(ms: var RegexMatches2, m: MatchItem) {.inline.} = @@ -170,7 +166,7 @@ func nextState( while nti < L: let isEoe = ntn.kind == reEoe let nt0 = nt - matched = not smB.hasState(nt) and + matched = nt notin smB and (ntn.match(c.Rune) or ntn.kind == reEoe) inc nti captx = capt @@ -187,10 +183,10 @@ func nextState( smA.clear() if not eoeFound: eoeFound = true - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) smi = -1 break - smB.add (nt0, captx, bounds.a .. i-1) + smB.add initPstate(nt0, captx, bounds.a .. i-1) inc smi swap smA, smB capts.recycle() @@ -214,7 +210,7 @@ func findSomeImpl*( flags = regex.flags.toMatchFlags + flags optFlag = mfFindMatchOpt in flags binFlag = mfBytesInput in flags - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) if start-1 in 0 .. text.len-1: cPrev = if binFlag: text[start-1].int32 @@ -236,7 +232,7 @@ func findSomeImpl*( return i if optFlag: return i - smA.add (0'i16, -1.CaptIdx, i .. i-1) + smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1) iPrev = i cPrev = c.int32 nextState(ms, text, regex, iPrev, cPrev, -1'i32, flags) diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim index 5af2fb8..5a6839d 100644 --- a/src/regex/nfamacro.nim +++ b/src/regex/nfamacro.nim @@ -38,7 +38,6 @@ type ): NimNode {.nimcall, noSideEffect, raises: [].} Lookaround = object ahead, behind: Sig - smL: NimNode # todo: can not use unicodeplus due to # https://github.com/nim-lang/Nim/issues/7059 @@ -240,9 +239,7 @@ func genLookaroundMatch( look: Lookaround ): NimNode = template nfa: untyped = n.subExp.nfa - template smL: untyped = look.smL - let smlA = quote do: lastA(`smL`) - let smlB = quote do: lastB(`smL`) + defVars smlA, smlB var flags = {mfAnchored} if n.subExp.reverseCapts: flags.incl mfReverseCapts @@ -262,10 +259,9 @@ func genLookaroundMatch( `matched` = not `matched` let nfaLenLit = newLit nfa.s.len result = quote do: - grow `smL` - `smL`.last.setLen `nfaLenLit` + var `smlA` = initPstates(`nfaLenLit`) + var `smlB` = initPstates(`nfaLenLit`) `lookaroundStmt` - removeLast `smL` func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] = doAssert not isEpsilonTransition(n) @@ -293,7 +289,7 @@ func genMatchedBody( let eTransitions = getEpsilonTransitions(nfa, n, nti) if eTransitions.len == 0: return quote do: - add(`smB`, (`ntLit`, `capt`, `bounds2`)) + add(`smB`, initPstate(`ntLit`, `capt`, `bounds2`)) var matchedBody = newSeq[NimNode]() matchedBody.add quote do: `matched` = true @@ -325,7 +321,7 @@ func genMatchedBody( doAssert false matchedBody.add quote do: if `matched`: - add(`smB`, (`ntLit`, `captx`, `bounds2`)) + add(`smB`, initPstate(`ntLit`, `captx`, `bounds2`)) return newStmtList matchedBody func genNextState( @@ -339,10 +335,10 @@ func genNextState( #[ case n of 0: - if not smB.hasState(1): + if not smB.contains(1): if c == 'a': smB.add((1, capt, bounds)) - if not smB.hasState(4): + if not smB.contains(4): if c == 'b': smB.add((4, capt, bounds)) of 1: @@ -384,11 +380,11 @@ func genNextState( i, nti, nfa, look, flags) if mfAnchored in flags and s[nt].kind == reEoe: branchBodyN.add quote do: - if not hasState(`smB`, `ntLit`): + if not contains(`smB`, `ntLit`): `matchedBodyStmt` else: branchBodyN.add quote do: - if not hasState(`smB`, `ntLit`) and `matchCond`: + if not contains(`smB`, `ntLit`) and `matchCond`: `matchedBodyStmt` doAssert eoeOnly or branchBodyN.len > 0 if branchBodyN.len > 0: @@ -418,12 +414,15 @@ func nextState( flags: set[MatchFlag], eoeOnly = false ): NimNode = - defForVars n, capt, bounds + defForVars pstate + let n = quote do: `pstate`.ni + let capt = quote do: `pstate`.ci + let bounds = quote do: `pstate`.bounds let eoeBailOut = if mfAnchored in flags: quote do: if `n` == `eoe`: - if not hasState(`smB`, `n`): - add(`smB`, (`n`, `capt`, `bounds`)) + if not contains(`smB`, `n`): + add(`smB`, initPstate(`n`, `capt`, `bounds`)) break else: newEmptyNode() @@ -433,7 +432,7 @@ func nextState( flags, eoeOnly) result = quote do: `smB`.clear() - for `n`, `capt`, `bounds` in `smA`.items: + for `pstate` in `smA`.items: `eoeBailOut` `nextStateStmt` swap `smA`, `smB` @@ -483,7 +482,7 @@ func matchImpl( if `start`-1 in 0 .. `text`.len-1: `cPrev` = bwRuneAt(`text`, `start`-1).int32 clear(`smA`) - add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1)) + add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1)) while `i` < `text`.len: fastRuneAt(`text`, iNext, `c`, true) `nextStateStmt` @@ -534,7 +533,7 @@ func reversedMatchImpl( if `start` in 0 .. `text`.len-1: `cPrev` = runeAt(`text`, `start`).int32 clear(`smA`) - add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1)) + add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1)) while iNext > 0: bwFastRuneAt(`text`, iNext, `c`) `nextStateStmt` @@ -551,11 +550,11 @@ func reversedMatchImpl( `captsStmt` `matched` = `smA`.len > 0 -template look(smL: NimNode): untyped = +template look: untyped = Lookaround( ahead: matchImpl, - behind: reversedMatchImpl, - smL: smL) + behind: reversedMatchImpl + ) template constructSubmatches2( captures, txt, capts, capt, size: untyped @@ -578,24 +577,23 @@ proc matchImpl*(text, expLit, body: NimNode): NimNode = if not (expLit.kind == nnkCallStrLit and $expLit[0] == "rex"): error "not a regex literal; only rex\"regex\" is allowed", expLit let exp = expLit[1] - defVars smA, smB, capts, capt, matched, smL + defVars smA, smB, capts, capt, matched let regex = reCt(exp.strVal) let startLit = newLit 0 let flags: set[MatchFlag] = {} let matchImplStmt = matchImpl( smA, smB, capts, capt, matched, - text, startLit, regex.nfa, look(smL), flags) + text, startLit, regex.nfa, look(), flags) let nfaLenLit = newLit regex.nfa.s.len let nfaGroupsLen = int(regex.groupsCount) result = quote do: block: var - `smA` = newSubmatches `nfaLenLit` - `smB` = newSubmatches `nfaLenLit` + `smA` = initPstates `nfaLenLit` + `smB` = initPstates `nfaLenLit` `capts` = default(Capts) `capt` = -1'i32 `matched` = false - `smL` {.used.} = default(SmLookaround) `matchImplStmt` if `matched`: var matches {.used, inject.} = newSeq[string]() diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim index 7bc34f0..9df4bfa 100644 --- a/src/regex/nfamatch.nim +++ b/src/regex/nfamatch.nim @@ -10,7 +10,7 @@ import ./nfatype type AheadSig = proc ( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts, captIdx: var CaptIdx, text: string, @@ -20,7 +20,7 @@ type flags: set[MatchFlag] ): bool {.nimcall, noSideEffect, raises: [].} BehindSig = proc ( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts, captIdx: var CaptIdx, text: string, @@ -32,19 +32,16 @@ type Lookaround* = object ahead*: AheadSig behind*: BehindSig - smL*: SmLookaround + #smL*: SmLookaround template lookAroundTpl*: untyped {.dirty.} = - template smL: untyped = look.smL - template smLa: untyped = smL.lastA - template smLb: untyped = smL.lastB template zNfa: untyped = ntn.subExp.nfa let flags2 = if ntn.subExp.reverseCapts: {mfAnchored, mfReverseCapts} else: {mfAnchored} - smL.grow() - smL.last.setLen zNfa.s.len + var smLa = initPstates(zNfa.s.len) + var smLb = initPstates(zNfa.s.len) matched = case ntn.kind of reLookahead: look.ahead( @@ -65,24 +62,26 @@ template lookAroundTpl*: untyped {.dirty.} = else: doAssert false false - smL.removeLast() template nextStateTpl(bwMatch = false): untyped {.dirty.} = template bounds2: untyped = when bwMatch: i .. bounds.b else: bounds.a .. i-1 template nt: untyped = nfa.s[n].next[nti] template ntn: untyped = nfa.s[nt] + template n: untyped = pstate.ni + template capt: untyped = pstate.ci + template bounds: untyped = pstate.bounds smB.clear() - for n, capt, bounds in items smA: + for pstate in items smA: if anchored and nfa.s[n].kind == reEoe: - if not smB.hasState n: - smB.add (n, capt, bounds) + if n notin smB: + smB.add initPstate(n, capt, bounds) break let L = nfa.s[n].next.len var nti = 0 while nti < L: let nt0 = nt - matched = not smB.hasState(nt) and + matched = nt notin smB and (ntn.match(c) or (anchored and ntn.kind == reEoe)) inc nti captx = capt @@ -107,11 +106,11 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} = discard inc nti if matched: - smB.add (nt0, captx, bounds2) + smB.add initPstate(nt0, captx, bounds2) swap smA, smB func matchImpl( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts, captIdx: var CaptIdx, text: string, @@ -131,7 +130,7 @@ func matchImpl( if start-1 in 0 .. text.len-1: cPrev = bwRuneAt(text, start-1).int32 smA.clear() - smA.add (0'i16, captIdx, i .. i-1) + smA.add initPstate(0'i16, captIdx, i .. i-1) while i < text.len: fastRuneAt(text, iNext, c, true) nextStateTpl() @@ -151,7 +150,7 @@ func matchImpl( return smA.len > 0 func reversedMatchImpl( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts, captIdx: var CaptIdx, text: string, @@ -174,7 +173,7 @@ func reversedMatchImpl( if start in 0 .. text.len-1: cPrev = text.runeAt(start).int32 smA.clear() - smA.add (0'i16, captIdx, i .. i-1) + smA.add initPstate(0'i16, captIdx, i .. i-1) while iNext > limit: bwFastRuneAt(text, iNext, c) nextStateTpl(bwMatch = true) @@ -188,17 +187,17 @@ func reversedMatchImpl( if iNext > 0: bwFastRuneAt(text, iNext, c) nextStateTpl(bwMatch = true) - for n, capt, bounds in items smA: - if nfa.s[n].kind == reEoe: + for pstate in items smA: + if nfa.s[pstate.ni].kind == reEoe: if mfReverseCapts in flags: - captIdx = reverse(capts, capt, captIdx) + captIdx = reverse(capts, pstate.ci, captIdx) else: - captIdx = capt - return bounds.a + captIdx = pstate.ci + return pstate.bounds.a return -1 func reversedMatchImpl*( - smA, smB: var Submatches, + smA, smB: var Pstates, text: string, nfa: Nfa, look: var Lookaround, @@ -223,8 +222,8 @@ func matchImpl*( ): bool = m.clear() var - smA = newSubmatches(regex.nfa.s.len) - smB = newSubmatches(regex.nfa.s.len) + smA = initPstates(regex.nfa.s.len) + smB = initPstates(regex.nfa.s.len) capts = default(Capts) capt = -1.CaptIdx look = initLook() @@ -241,8 +240,8 @@ func startsWithImpl*(text: string, regex: Regex, start: int): bool = # XXX optimize mfShortestMatch, mfNoCaptures template flags: untyped = {mfAnchored, mfShortestMatch, mfNoCaptures} var - smA = newSubmatches(regex.nfa.s.len) - smB = newSubmatches(regex.nfa.s.len) + smA = initPstates(regex.nfa.s.len) + smB = initPstates(regex.nfa.s.len) capts = default(Capts) capt = -1.CaptIdx look = initLook() diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim index d66d7c6..9abe2aa 100644 --- a/src/regex/nfamatch2.nim +++ b/src/regex/nfamatch2.nim @@ -10,7 +10,7 @@ import ./nfatype type AheadSig = proc ( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts3, captIdx: var CaptIdx, text: string, @@ -20,7 +20,7 @@ type flags: MatchFlags ): bool {.nimcall, noSideEffect, raises: [].} BehindSig = proc ( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts3, captIdx: var CaptIdx, text: string, @@ -32,7 +32,7 @@ type Lookaround* = object ahead*: AheadSig behind*: BehindSig - smL*: SmLookaround + #smL*: SmLookaround func lookAround( ntn: Node, @@ -43,17 +43,16 @@ func lookAround( start: int, flags: MatchFlags ): bool = - template smL: untyped = look.smL - template smLa: untyped = smL.lastA - template smLb: untyped = smL.lastB template subNfa: untyped = ntn.subExp.nfa var flags2 = {mfAnchored} if ntn.subExp.reverseCapts: flags2.incl mfReverseCapts if mfBytesInput in flags: flags2.incl mfBytesInput - smL.grow() - smL.last.setLen subNfa.s.len + # XXX store lookaround number + count, and use a fixed + # size seq to reduce allocations; use look.smL + var smLa = initPstates(subNfa.s.len) + var smLb = initPstates(subNfa.s.len) result = case ntn.kind of reLookahead: look.ahead( @@ -74,7 +73,6 @@ func lookAround( else: doAssert false false - smL.removeLast() func epsilonMatch*( matched: var bool, @@ -120,7 +118,7 @@ func epsilonMatch*( discard func nextState( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts3, look: var Lookaround, text: string, @@ -136,22 +134,25 @@ func nextState( if bwMatch: i .. bounds.b else: bounds.a .. i-1 template nt: untyped = nfa[n].next[nti] template ntn: untyped = nfa[nt] + template n: untyped = pstate.ni + template capt: untyped = pstate.ci + template bounds: untyped = pstate.bounds let anchored = mfAnchored in flags var captx = 0.CaptIdx var matched = true smB.clear() - for n, capt, bounds in items smA: + for pstate in items smA: if capt != -1: capts.keepAlive capt if anchored and nfa[n].kind == reEoe: - if not smB.hasState n: - smB.add (n, capt, bounds) + if n notin smB: + smB.add initPstate(n, capt, bounds) break let L = nfa[n].next.len var nti = 0 while nti < L: let nt0 = nt - matched = not smB.hasState(nt) and + matched = nt notin smB and (ntn.match(c) or (anchored and ntn.kind == reEoe)) inc nti captx = capt @@ -162,12 +163,12 @@ func nextState( ) inc nti if matched: - smB.add (nt0, captx, bounds2) + smB.add initPstate(nt0, captx, bounds2) swap smA, smB capts.recycle() func matchImpl( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts3, captIdx: var CaptIdx, text: string, @@ -190,7 +191,7 @@ func matchImpl( else: bwRuneAt(text, start-1).int32 smA.clear() - smA.add (0'i16, captIdx, i .. i-1) + smA.add initPstate(0'i16, captIdx, i .. i-1) while i < text.len: if binFlag: c = text[iNext].Rune @@ -211,7 +212,7 @@ func matchImpl( return smA.len > 0 func reversedMatchImpl( - smA, smB: var Submatches, + smA, smB: var Pstates, capts: var Capts3, captIdx: var CaptIdx, text: string, @@ -236,7 +237,7 @@ func reversedMatchImpl( else: runeAt(text, start).int32 smA.clear() - smA.add (0'i16, captIdx, i .. i-1) + smA.add initPstate(0'i16, captIdx, i .. i-1) while iNext > limit: if binFlag: c = text[iNext-1].Rune @@ -258,14 +259,14 @@ func reversedMatchImpl( else: bwFastRuneAt(text, iNext, c) nextState(smA, smB, capts, look, text, nfa, i, cPrev, c, flags, bwMatch = true) - for n, capt, bounds in items smA: - if nfa.s[n].kind == reEoe: - captIdx = capt - return bounds.a + for pstate in items smA: + if nfa.s[pstate.ni].kind == reEoe: + captIdx = pstate.ci + return pstate.bounds.a return -1 func reversedMatchImpl*( - smA, smB: var Submatches, + smA, smB: var Pstates, text: string, nfa: Nfa, look: var Lookaround, @@ -295,8 +296,8 @@ func matchImpl*( m.clear() let flags = regex.flags.toMatchFlags + flags var - smA = newSubmatches(regex.nfa.s.len) - smB = newSubmatches(regex.nfa.s.len) + smA = initPstates(regex.nfa.s.len) + smB = initPstates(regex.nfa.s.len) capts = initCapts3(regex.groupsCount) captIdx = -1.CaptIdx look = initLook() @@ -323,8 +324,8 @@ func startsWithImpl2*( # XXX optimize mfShortestMatch, mfNoCaptures let flags = regex.flags.toMatchFlags + {mfAnchored, mfShortestMatch, mfNoCaptures} var - smA = newSubmatches(regex.nfa.s.len) - smB = newSubmatches(regex.nfa.s.len) + smA = initPstates(regex.nfa.s.len) + smB = initPstates(regex.nfa.s.len) capts = initCapts3(regex.groupsCount) captIdx = -1.CaptIdx look = initLook() diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index ff43a8f..3fd7069 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -23,7 +23,7 @@ const type # XXX int16 same as max parallel states or max regex len - # but it's used by PState and the old capts + # but it's used by Pstate and the old capts CaptIdx* = int32 Capts3* = object ## Seq of captures divided into blocks @@ -67,14 +67,19 @@ template fastLog2Tpl(x: Natural): untyped = else: fastLog2(x) +func reset*(capts: var Capts3, groupsLen: int) = + capts.freezeId = stsFrozen.a + capts.s.setLen 0 + capts.states.setLen 0 + capts.free.setLen 0 + if capts.groupsLen != groupsLen: + let blockSize = max(2, nextPowerOfTwo groupsLen) + capts.groupsLen = groupsLen + capts.blockSize = blockSize + capts.blockSizeL2 = fastLog2Tpl blockSize + func initCapts3*(groupsLen: int): Capts3 = - let blockSize = max(2, nextPowerOfTwo groupsLen) - Capts3( - groupsLen: groupsLen, - blockSize: blockSize, - blockSizeL2: fastLog2Tpl blockSize, - freezeId: stsFrozen.a - ) + reset(result, groupsLen) func check(curr, next: CaptState): bool = ## Check if transition from state curr to next is allowed @@ -300,35 +305,42 @@ func clear*(m: var RegexMatch2) {.inline.} = type NodeIdx* = int16 Bounds* = Slice[int] - PState* = tuple - ni: NodeIdx - ci: CaptIdx - bounds: Bounds - Submatches* = ref object - ## Parallel states would be a better name. + Pstate* = object + ni*: NodeIdx + ci*: CaptIdx + bounds*: Bounds + # XXX this is a ref because of Nim JS bugs; it works in +2.2.0 + Pstates* = ref object ## This is a sparse set - sx: seq[PState] + sx: seq[Pstate] ss: seq[int16] si: int16 -func newSubmatches*(size: int): Submatches {.inline.} = - result = new Submatches - result.sx = newSeq[PState](8) - result.ss = newSeq[int16](size) - result.si = 0 +func initPstate*(ni: NodeIdx, ci: CaptIdx, bounds: Bounds): Pstate {.inline.} = + Pstate(ni: ni, ci: ci, bounds: bounds) when defined(release): {.push checks: off.} -func `[]`*(sm: Submatches, i: int): PState {.inline.} = +func reset*(sm: var Pstates, size: int) {.inline.} = + if sm == nil: + sm = Pstates() + sm.sx.setLen 8 + sm.ss.setLen size + sm.si = 0 + +func initPstates*(size: int): Pstates {.inline.} = + reset(result, size) + +func `[]`*(sm: Pstates, i: int): lent Pstate {.inline.} = assert i < sm.si sm.sx[i] -func hasState*(sm: Submatches, n: int16): bool {.inline.} = +func contains*(sm: Pstates, n: int16): bool {.inline.} = sm.ss[n] < sm.si and sm.sx[sm.ss[n]].ni == n -func add*(sm: var Submatches, item: PState) {.inline.} = - assert(not sm.hasState(item.ni)) +func add*(sm: var Pstates, item: sink Pstate) {.inline.} = + assert(item.ni notin sm) assert sm.si <= sm.sx.len if (sm.si == sm.sx.len).unlikely: sm.sx.setLen(sm.sx.len * 2) @@ -336,68 +348,22 @@ func add*(sm: var Submatches, item: PState) {.inline.} = sm.ss[item.ni] = sm.si sm.si += 1'i16 -func len*(sm: Submatches): int {.inline.} = +func len*(sm: Pstates): int {.inline.} = sm.si -func clear*(sm: var Submatches) {.inline.} = +func clear*(sm: var Pstates) {.inline.} = sm.si = 0 -iterator items*(sm: Submatches): PState {.inline.} = +iterator items*(sm: Pstates): lent Pstate {.inline.} = for i in 0 .. sm.len-1: yield sm.sx[i] -# does not work in Nim <= 0.20 -#iterator mitems*(sm: Submatches): var PState {.inline.} = -# for i in 0 .. sm.len-1: -# yield sm.sx[i] - -func cap*(sm: Submatches): int {.inline.} = +func cap*(sm: Pstates): int {.inline.} = sm.ss.len -func setLen*(sm: var Submatches, size: int) {.inline.} = - sm.ss.setLen size - when defined(release): {.pop.} -# XXX maybe store the lookaround number + count, and use a fixed -# size seq to reduce allocations -type - SmLookaroundItem* = object - a, b: Submatches - SmLookaround* = object - s: seq[SmLookaroundItem] - i: int - -func setLen*(item: var SmLookaroundItem, size: int) {.inline.} = - if item.a == nil: - doAssert item.b == nil - item.a = newSubmatches size - item.b = newSubmatches size - else: - doAssert item.b != nil - item.a.setLen size - item.b.setLen size - -template last*(sm: SmLookaround): untyped = - sm.s[sm.i-1] - -template lastA*(sm: SmLookaround): untyped = - last(sm).a - -template lastB*(sm: SmLookaround): untyped = - last(sm).b - -func grow*(sm: var SmLookaround) {.inline.} = - doAssert sm.i <= sm.s.len - if sm.i == sm.s.len: - sm.s.setLen(max(1, sm.s.len) * 2) - sm.i += 1 - -func removeLast*(sm: var SmLookaround) {.inline.} = - doAssert sm.i > 0 - sm.i -= 1 - when isMainModule: func `[]=`(capts: var Capts3, i, j: Natural, x: Slice[int]) = doAssert i <= capts.len-1