From 029d04909bccd79469f77e02006ebb6dda32a3e3 Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Fri, 25 Aug 2023 05:15:04 -0300
Subject: [PATCH 1/9] ORC speedups

---
 bench/bench.nim         | 240 ++++++++++++++++++++--------------------
 bench/bench2.nim        |  23 ++++
 src/regex/nfa.nim       |  44 ++++----
 src/regex/nfamatch2.nim |  54 +++++----
 4 files changed, 198 insertions(+), 163 deletions(-)
 create mode 100644 bench/bench2.nim

diff --git a/bench/bench.nim b/bench/bench.nim
index 09967d9..8d48ec1 100644
--- a/bench/bench.nim
+++ b/bench/bench.nim
@@ -29,159 +29,163 @@ benchRelative(regex_sol, m):
     discard regex.match(text, pattern4, m2)
   doNotOptimizeAway(m2)
 
-benchRelative(regex_macro_sol, m):
-  var d: bool
-  for i in 0 ..< m:
-    regex.match text, regex.rex"\w*sol\w*":
-      d = true
-  doNotOptimizeAway(d)
+when isMainModule:
+  runBenchmarks()
 
-var dummyTextNums = """650-253-0001"""
+when false:
+  benchRelative(regex_macro_sol, m):
+    var d: bool
+    for i in 0 ..< m:
+      regex.match text, regex.rex"\w*sol\w*":
+        d = true
+    doNotOptimizeAway(d)
 
-var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"
+  var dummyTextNums = """650-253-0001"""
 
-bench(re_nums, m):
-  var d: bool
-  for i in 0 ..< m:
-    d = re.match(dummyTextNums, pattern_nums)
-  doNotOptimizeAway(d)
+  var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"
 
-const n_pattern_nums = regex.re2"[0-9]+-[0-9]+-[0-9]+"
-
-benchRelative(regex_nums, m):
-  var m2: regex.RegexMatch2
-  for i in 0 ..< m:
-    discard regex.match(dummyTextNums, n_pattern_nums, m2)
-  doNotOptimizeAway(m2)
+  bench(re_nums, m):
+    var d: bool
+    for i in 0 ..< m:
+      d = re.match(dummyTextNums, pattern_nums)
+    doNotOptimizeAway(d)
 
-benchRelative(regex_macro_nums, m):
-  var d: bool
-  for i in 0 ..< m:
-    regex.match text, regex.rex"[0-9]+-[0-9]+-[0-9]+":
-      d = true
-  doNotOptimizeAway(d)
+  const n_pattern_nums = regex.re2"[0-9]+-[0-9]+-[0-9]+"
 
-var pattern_nums2 = re.re"^[0-9]+..*$"
+  benchRelative(regex_nums, m):
+    var m2: regex.RegexMatch2
+    for i in 0 ..< m:
+      discard regex.match(dummyTextNums, n_pattern_nums, m2)
+    doNotOptimizeAway(m2)
 
-bench(re_nums2, m):
-  var d: bool
-  for i in 0 ..< m:
-    d = re.match(dummyTextNums, pattern_nums2)
-  doNotOptimizeAway(d)
+  benchRelative(regex_macro_nums, m):
+    var d: bool
+    for i in 0 ..< m:
+      regex.match text, regex.rex"[0-9]+-[0-9]+-[0-9]+":
+        d = true
+    doNotOptimizeAway(d)
 
-const n_pattern_nums2 = regex.re2"[0-9]+..*"
+  var pattern_nums2 = re.re"^[0-9]+..*$"
 
-benchRelative(regex_nums2, m):
-  var m3: regex.RegexMatch2
-  for i in 0 ..< m:
-    discard regex.match(dummyTextNums, n_pattern_nums2, m3)
-  doNotOptimizeAway(m3)
+  bench(re_nums2, m):
+    var d: bool
+    for i in 0 ..< m:
+      d = re.match(dummyTextNums, pattern_nums2)
+    doNotOptimizeAway(d)
 
-benchRelative(regex_macro_nums2, m):
-  var d: bool
-  for i in 0 ..< m:
-    regex.match text, regex.rex"[0-9]+..*":
-      d = true
-  doNotOptimizeAway(d)
+  const n_pattern_nums2 = regex.re2"[0-9]+..*"
 
-when false:  # XXX remove
-  var lits_find_re = re.re"do|re|mi|fa|sol"
+  benchRelative(regex_nums2, m):
+    var m3: regex.RegexMatch2
+    for i in 0 ..< m:
+      discard regex.match(dummyTextNums, n_pattern_nums2, m3)
+    doNotOptimizeAway(m3)
 
-  bench(re_lits_find, m):
-    var d: int
+  benchRelative(regex_macro_nums2, m):
+    var d: bool
     for i in 0 ..< m:
-      d = re.find(text, lits_find_re)
+      regex.match text, regex.rex"[0-9]+..*":
+        d = true
     doNotOptimizeAway(d)
 
-  const lits_find = regex.re2"do|re|mi|fa|sol"
+  when false:  # XXX remove
+    var lits_find_re = re.re"do|re|mi|fa|sol"
 
-  benchRelative(regex_lits_find, m):
-    var m2: regex.RegexMatch2
-    for i in 0 ..< m:
-      discard regex.find(text, lits_find, m2)
-    doNotOptimizeAway(m2)
+    bench(re_lits_find, m):
+      var d: int
+      for i in 0 ..< m:
+        d = re.find(text, lits_find_re)
+      doNotOptimizeAway(d)
 
-const bench_text = staticRead("input-text.txt")
+    const lits_find = regex.re2"do|re|mi|fa|sol"
 
-var email_find_all_re = re.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
+    benchRelative(regex_lits_find, m):
+      var m2: regex.RegexMatch2
+      for i in 0 ..< m:
+        discard regex.find(text, lits_find, m2)
+      doNotOptimizeAway(m2)
 
-bench(re_email_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in re.findAll(bench_text, email_find_all_re):
-      d += 1
-  doAssert d == 92
-  doNotOptimizeAway(d)
+  const bench_text = staticRead("input-text.txt")
 
-const email_find_all = regex.re2"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
+  var email_find_all_re = re.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
 
-benchRelative(regex_email_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in regex.findAll(bench_text, email_find_all):
-      d += 1
-  doAssert d == 92
-  doNotOptimizeAway(d)
+  bench(re_email_find_all, m):
+    var d = 0
+    for i in 0 ..< m:
+      for _ in re.findAll(bench_text, email_find_all_re):
+        d += 1
+    doAssert d == 92
+    doNotOptimizeAway(d)
 
-var uri_find_all_re = re.re"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
+  const email_find_all = regex.re2"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
 
-bench(re_uri_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in re.findAll(bench_text, uri_find_all_re):
-      d += 1
-  doAssert d == 5301
-  doNotOptimizeAway(d)
+  benchRelative(regex_email_find_all, m):
+    var d = 0
+    for i in 0 ..< m:
+      for _ in regex.findAll(bench_text, email_find_all):
+        d += 1
+    doAssert d == 92
+    doNotOptimizeAway(d)
 
-const uri_find_all = regex.re2"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
+  var uri_find_all_re = re.re"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
 
-benchRelative(regex_uri_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in regex.findAll(bench_text, uri_find_all):
-      d += 1
-  doAssert d == 5301
-  doNotOptimizeAway(d)
+  bench(re_uri_find_all, m):
+    var d = 0
+    for i in 0 ..< m:
+      for _ in re.findAll(bench_text, uri_find_all_re):
+        d += 1
+    doAssert d == 5301
+    doNotOptimizeAway(d)
 
-var ip_find_all_re = re.re"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
+  const uri_find_all = regex.re2"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
 
-bench(re_ip_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in re.findAll(bench_text, ip_find_all_re):
-      d += 1
-  doAssert d == 5
-  doNotOptimizeAway(d)
+  benchRelative(regex_uri_find_all, m):
+    var d = 0
+    for i in 0 ..< m:
+      for _ in regex.findAll(bench_text, uri_find_all):
+        d += 1
+    doAssert d == 5301
+    doNotOptimizeAway(d)
 
-const ip_find_all = regex.re2"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
+  var ip_find_all_re = re.re"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
 
-benchRelative(regex_ip_find_all, m):
-  var d = 0
-  for i in 0 ..< m:
-    for _ in regex.findAll(bench_text, ip_find_all):
-      d += 1
-  doAssert d == 5
-  doNotOptimizeAway(d)
+  bench(re_ip_find_all, m):
+    var d = 0
+    for i in 0 ..< m:
+      for _ in re.findAll(bench_text, ip_find_all_re):
+        d += 1
+    doAssert d == 5
+    doNotOptimizeAway(d)
 
-when true:
-  bench(runes, m):
+  const ip_find_all = regex.re2"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
+
+  benchRelative(regex_ip_find_all, m):
     var d = 0
     for i in 0 ..< m:
-      for _ in bench_text.runes:
+      for _ in regex.findAll(bench_text, ip_find_all):
         d += 1
+    doAssert d == 5
     doNotOptimizeAway(d)
 
-bench(dummy, m):
-  for i in 0 ..< m:
-    memoryClobber()
+  when true:
+    bench(runes, m):
+      var d = 0
+      for i in 0 ..< m:
+        for _ in bench_text.runes:
+          d += 1
+      doNotOptimizeAway(d)
 
-when isMainModule:
-  runBenchmarks()
+  bench(dummy, m):
+    for i in 0 ..< m:
+      memoryClobber()
+
+  when isMainModule:
+    runBenchmarks()
 
-#[
-# Profiling:
-# (but extract the bench to another module without nimbench)
-# open the log with KCachegrind
+  #[
+  # Profiling:
+  # (but extract the bench to another module without nimbench)
+  # open the log with KCachegrind
 
-$ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2
-]#
+  $ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2
+  ]#
diff --git a/bench/bench2.nim b/bench/bench2.nim
new file mode 100644
index 0000000..9f09372
--- /dev/null
+++ b/bench/bench2.nim
@@ -0,0 +1,23 @@
+import unicode
+from regex import nil
+
+func genText(): string {.compileTime.} =
+  result = ""
+  for _ in 0 .. 100000:
+    result.add("a")
+  result.add("sol")
+  for _ in 0 .. 100000:
+    result.add("b")
+  #result.add("ฅ")
+const text = genText()
+
+const pattern4 = regex.re2(r"\w*sol\w*") #, {regex.RegexFlag.reAscii})
+
+proc runBenchmarks() =
+  var m2: regex.RegexMatch2
+  for i in 0 ..< 500:
+    discard regex.match(text, pattern4, m2)
+  echo m2.captures
+
+when isMainModule:
+  runBenchmarks()
\ No newline at end of file
diff --git a/src/regex/nfa.nim b/src/regex/nfa.nim
index cf16427..507c333 100644
--- a/src/regex/nfa.nim
+++ b/src/regex/nfa.nim
@@ -146,10 +146,10 @@ func eNfa*(exp: RpnExp): Enfa {.raises: [RegexError].} =
   result.s.add initSkipNode(states)
 
 type
-  Zclosure = seq[int16]
-  TeClosure = seq[(int16, Zclosure)]
+  Etransitions = seq[int16]  # xxx transitions
+  TeClosure = seq[(int16, Etransitions)]
 
-func isTransitionZ(n: Node): bool {.inline.} =
+func isEpsilonTransition(n: Node): bool {.inline.} =
   result = case n.kind
     of groupKind:
       n.isCapturing
@@ -163,24 +163,24 @@ func teClosure(
   eNfa: Enfa,
   state: int16,
   processing: var seq[int16],
-  zTransitions: Zclosure
+  eTransitions: Etransitions
 ) =
-  var zTransitionsCurr = zTransitions
-  if isTransitionZ eNfa.s[state]:
-    zTransitionsCurr.add state
+  var eTransitionsCurr = eTransitions
+  if isEpsilonTransition eNfa.s[state]:
+    eTransitionsCurr.add state
   if eNfa.s[state].kind in matchableKind + {reEOE}:
-    result.add (state, zTransitionsCurr)
+    result.add (state, eTransitionsCurr)
     return
   for i, s in pairs eNfa.s[state].next:
     # Enter loops only once. "a", re"(a*)*" -> ["a", ""]
     if eNfa.s[state].kind in repetitionKind:
       if s notin processing or i == int(eNfa.s[state].isGreedy):
         processing.add s
-        teClosure(result, eNfa, s, processing, zTransitionsCurr)
+        teClosure(result, eNfa, s, processing, eTransitionsCurr)
         discard processing.pop()
       # else skip loop
     else:
-      teClosure(result, eNfa, s, processing, zTransitionsCurr)
+      teClosure(result, eNfa, s, processing, eTransitionsCurr)
 
 func teClosure(
   result: var TeClosure,
@@ -189,9 +189,9 @@ func teClosure(
   processing: var seq[int16]
 ) =
   doAssert processing.len == 0
-  var zclosure: Zclosure
+  var eTransitions: Etransitions
   for s in eNfa.s[state].next:
-    teClosure(result, eNfa, s, processing, zclosure)
+    teClosure(result, eNfa, s, processing, eTransitions)
 
 when (NimMajor, NimMinor, NimPatch) < (1,4,0) and not declared(IndexDefect):
   # avoids a warning
@@ -206,7 +206,6 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
   #echo eNfa
   result.s = newSeq[Node](eNfa.s.len)
   result.s.setLen 0
-  result.t.allZ.setLen eNfa.s.len
   var statesMap = newSeq[int16](eNfa.s.len)
   for i in 0 .. statesMap.len-1:
     statesMap[i] = -1
@@ -214,7 +213,6 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
   result.s.add eNfa.s[start]
   statesMap[start] = 0'i16
   var closure: TeClosure
-  var zc: seq[Node]
   var qw = initDeque[int16](2)
   qw.addFirst start
   var qu: set[int16]
@@ -228,25 +226,21 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
       doAssert false
     closure.setLen 0
     teClosure(closure, eNfa, qa, processing)
+    doAssert statesMap[qa] > -1
     result.s[statesMap[qa]].next.setLen 0
-    for qb, zclosure in closure.items:
+    for qb, eTransitions in closure.items:
+      for eti in eTransitions:
+        if statesMap[eti] == -1:
+          result.s.add eNfa.s[eti]
+          statesMap[eti] = result.s.len.int16-1
+        result.s[statesMap[qa]].next.add statesMap[eti]
       if statesMap[qb] == -1:
         result.s.add eNfa.s[qb]
         statesMap[qb] = result.s.len.int16-1
-      doAssert statesMap[qb] > -1
-      doAssert statesMap[qa] > -1
       result.s[statesMap[qa]].next.add statesMap[qb]
-      result.t.allZ[statesMap[qa]].add -1'i16
-      zc.setLen 0
-      for z in zclosure:
-        zc.add eNfa.s[z]
-      if zc.len > 0:
-        result.t.z.add zc
-        result.t.allZ[statesMap[qa]][^1] = int16(result.t.z.len-1)
       if qb notin qu:
         qu.incl qb
         qw.addFirst qb
-  result.t.allZ.setLen result.s.len
 
 func reverse(eNfa: Enfa): Enfa =
   template state0: untyped = int16(eNfa.s.len-1)
diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim
index f3e1bdc..31e23ba 100644
--- a/src/regex/nfamatch2.nim
+++ b/src/regex/nfamatch2.nim
@@ -67,11 +67,22 @@ template lookAroundTpl*: untyped {.dirty.} =
     false
   smL.removeLast()
 
+func isEpsilonTransition(n: Node): bool {.inline.} =
+  result = case n.kind
+  of groupKind, assertionKind:
+    true
+  else:
+    false
+
+template s(nfa: openArray[Node]): untyped =
+  nfa
+
 template nextStateTpl(bwMatch = false): untyped {.dirty.} =
   template bounds2: untyped =
     when bwMatch: i .. bounds.b else: bounds.a .. i-1
   template captElm: untyped =
-    capts[captx, z.idx]
+    capts[captx, nfa.s[nt].idx]
+  template z: untyped = nfa.s[nt]
   smB.clear()
   for n, capt, bounds in items smA:
     if capt != -1:
@@ -80,24 +91,29 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
       if not smB.hasState n:
         smB.add (n, capt, bounds)
       break
-    for nti, nt in pairs nfa.s[n].next:
-      if smB.hasState nt:
-        continue
-      if not match(nfa.s[nt], c):
-        if not (anchored and nfa.s[nt].kind == reEoe):
+    matched = true
+    captx = capt
+    for nti, nt in pairs toOpenArray(nfa.s[n].next, 0, nfa.s[n].next.len-1):
+      if not isEpsilonTransition(nfa.s[n]):
+        if not matched:
+          matched = true
+          captx = capt
           continue
-      if nfa.t.allZ[n][nti] == -1'i16:
-        smB.add (nt, capt, bounds2)
+        if smB.hasState nt:
+          captx = capt
+          continue
+        if not match(nfa.s[nt], c):
+          if not (anchored and nfa.s[nt].kind == reEoe):
+            captx = capt
+            continue
+        smB.add (nt, captx, bounds2)
+        captx = capt
         continue
-      matched = true
-      captx = capt
-      for z in nfa.t.z[nfa.t.allZ[n][nti]]:
-        if not matched:
-          break
-        case z.kind
+      if not matched:
+        continue
+      case nfa.s[nt].kind
         of reGroupStart:
-          # XXX this can be avoided on 1st z loop iteration
-          #     and also on 1st nti loop iteration
+          # XXX this can be avoided in some cases?
           captx = capts.diverge captx
           if mfReverseCapts notin flags or
               captElm.a == nonCapture.a:
@@ -109,9 +125,9 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
             captElm.b = i-1
         of assertionKind - lookaroundKind:
           when bwMatch:
-            matched = match(z, c, cPrev.Rune)
+            matched = match(nfa.s[nt], c, cPrev.Rune)
           else:
-            matched = match(z, cPrev.Rune, c)
+            matched = match(nfa.s[nt], cPrev.Rune, c)
         of lookaroundKind:
           let freezed = capts.freeze()
           lookAroundTpl()
@@ -121,8 +137,6 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
         else:
           doAssert false
           discard
-      if matched:
-        smB.add (nt, captx, bounds2)
   swap smA, smB
   capts.recycle()
 

From 8fac4837d3b20fd14fe41f53f40bb81044a4eb7c Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Wed, 13 Sep 2023 20:55:13 -0300
Subject: [PATCH 2/9] progress

---
 src/regex.nim             |  5 +++-
 src/regex/nfafindall.nim  | 48 ++++++++++++++++-----------------
 src/regex/nfafindall2.nim | 56 +++++++++++++++++++--------------------
 src/regex/nfamatch.nim    | 23 +++++++---------
 src/regex/nfamatch2.nim   | 41 +++++++++-------------------
 src/regex/types.nim       |  7 +++++
 6 files changed, 86 insertions(+), 94 deletions(-)

diff --git a/src/regex.nim b/src/regex.nim
index 5beb1e1..d2aa408 100644
--- a/src/regex.nim
+++ b/src/regex.nim
@@ -301,7 +301,10 @@ import ./regex/nfamatch2
 when not defined(noRegexOpt):
   import ./regex/litopt
 
-const canUseMacro = (NimMajor, NimMinor) >= (1, 1)
+when not defined(noRegexMacro):
+  const canUseMacro = (NimMajor, NimMinor) >= (1, 1)
+else:
+  const canUseMacro = false
 
 when canUseMacro:
   import ./regex/nfamacro
diff --git a/src/regex/nfafindall.nim b/src/regex/nfafindall.nim
index 864d4d0..e748985 100644
--- a/src/regex/nfafindall.nim
+++ b/src/regex/nfafindall.nim
@@ -118,38 +118,37 @@ func submatch(
   template capt: untyped = ms.a[smi].ci
   template bounds: untyped = ms.a[smi].bounds
   template look: untyped = ms.look
+  template z: untyped = nfa[nt]
+  template nt: untyped = nfa[n].next[nti]
   smB.clear()
   var captx: int32
   var matched = true
   var eoeFound = false
   var smi = 0
   while smi < smA.len:
-    for nti, nt in nfa[n].next.pairs:
-      if smB.hasState nt:
-        continue
-      if nfa[nt].kind != reEoe and not match(nfa[nt], c.Rune):
-        continue
+    var nti = 0
+    while nti <= nfa[n].next.len-1:
       matched = true
       captx = capt
-      if tns.allZ[n][nti] > -1:
-        for z in tns.z[tns.allZ[n][nti]]:
-          if not matched:
-            break
-          case z.kind
-          of groupKind:
-            capts.add CaptNode(
-              parent: captx,
-              bound: i,
-              idx: z.idx)
-            captx = (capts.len-1).int32
-          of assertionKind - lookaroundKind:
-            matched = match(z, cPrev.Rune, c.Rune)
-          of lookaroundKind:
-            lookAroundTpl()
-          else:
-            assert false
-            discard
-      if matched:
+      while isEpsilonTransition(nfa[nt]) and matched:
+        case z.kind
+        of groupKind:
+          capts.add CaptNode(
+            parent: captx,
+            bound: i,
+            idx: z.idx)
+          captx = (capts.len-1).int32
+        of assertionKind - lookaroundKind:
+          matched = match(z, cPrev.Rune, c.Rune)
+        of lookaroundKind:
+          lookAroundTpl()
+        else:
+          assert false
+          discard
+        inc nti
+      if matched and
+          not smB.hasState(nt) and
+          (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
         if nfa[nt].kind == reEoe:
           #debugEcho "eoe ", bounds, " ", ms.m
           ms.m.add (captx, bounds.a .. i-1)
@@ -160,6 +159,7 @@ func submatch(
           smi = -1
           break
         smB.add (nt, captx, bounds.a .. i-1)
+      inc nti
     inc smi
   swap smA, smB
 
diff --git a/src/regex/nfafindall2.nim b/src/regex/nfafindall2.nim
index 5a2a7cd..1e47623 100644
--- a/src/regex/nfafindall2.nim
+++ b/src/regex/nfafindall2.nim
@@ -155,6 +155,8 @@ func submatch(
   template capt: untyped = ms.a[smi].ci
   template bounds: untyped = ms.a[smi].bounds
   template look: untyped = ms.look
+  template z: untyped = nfa[nt]
+  template nt: untyped = nfa[n].next[nti]
   smB.clear()
   var captx: int32
   var matched = true
@@ -163,36 +165,33 @@ func submatch(
   while smi < smA.len:
     if capt != -1:
       capts.keepAlive capt
-    for nti, nt in nfa[n].next.pairs:
-      if smB.hasState nt:
-        continue
-      if nfa[nt].kind != reEoe and not match(nfa[nt], c.Rune):
-        continue
+    var nti = 0
+    while nti <= nfa[n].next.len-1:
       matched = true
       captx = capt
-      if tns.allZ[n][nti] > -1:
-        for z in tns.z[tns.allZ[n][nti]]:
-          if not matched:
-            break
-          case z.kind
-          of reGroupStart:
-            captx = capts.diverge captx
-            capts[captx, z.idx].a = i
-          of reGroupEnd:
-            captx = capts.diverge captx
-            capts[captx, z.idx].b = i-1
-          of assertionKind - lookaroundKind:
-            matched = match(z, cPrev.Rune, c.Rune)
-          of lookaroundKind:
-            let freezed = capts.freeze()
-            lookAroundTpl()
-            capts.unfreeze freezed
-            if captx != -1:
-              capts.keepAlive captx
-          else:
-            assert false
-            discard
-      if matched:
+      while isEpsilonTransition(nfa[nt]) and matched:
+        case z.kind
+        of reGroupStart:
+          captx = capts.diverge captx
+          capts[captx, z.idx].a = i
+        of reGroupEnd:
+          captx = capts.diverge captx
+          capts[captx, z.idx].b = i-1
+        of assertionKind - lookaroundKind:
+          matched = match(z, cPrev.Rune, c.Rune)
+        of lookaroundKind:
+          let freezed = capts.freeze()
+          lookAroundTpl()
+          capts.unfreeze freezed
+          if captx != -1:
+            capts.keepAlive captx
+        else:
+          assert false
+          discard
+        inc nti
+      if matched and
+          not smB.hasState(nt) and
+          (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
         if nfa[nt].kind == reEoe:
           #debugEcho "eoe ", bounds, " ", ms.m
           ms.add (captx, bounds.a .. i-1)
@@ -203,6 +202,7 @@ func submatch(
           smi = -1
           break
         smB.add (nt, captx, bounds.a .. i-1)
+      inc nti
     inc smi
   swap smA, smB
   capts.recycle()
diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim
index fc3db56..3c8fce0 100644
--- a/src/regex/nfamatch.nim
+++ b/src/regex/nfamatch.nim
@@ -70,26 +70,19 @@ template lookAroundTpl*: untyped {.dirty.} =
 template nextStateTpl(bwMatch = false): untyped {.dirty.} =
   template bounds2: untyped =
     when bwMatch: i .. bounds.b else: bounds.a .. i-1
+  template z: untyped = nfa.s[nt]
+  template nt: untyped = nfa.s[n].next[nti]
   smB.clear()
   for n, capt, bounds in items smA:
     if anchored and nfa.s[n].kind == reEoe:
       if not smB.hasState n:
         smB.add (n, capt, bounds)
       break
-    for nti, nt in pairs nfa.s[n].next:
-      if smB.hasState nt:
-        continue
-      if not match(nfa.s[nt], c):
-        if not (anchored and nfa.s[nt].kind == reEoe):
-          continue
-      if nfa.t.allZ[n][nti] == -1'i16:
-        smB.add (nt, capt, bounds2)
-        continue
+    var nti = 0
+    while nti <= nfa.s[n].next.len-1:
       matched = true
       captx = capt
-      for z in nfa.t.z[nfa.t.allZ[n][nti]]:
-        if not matched:
-          break
+      while isEpsilonTransition(nfa.s[nt]) and matched:
         case z.kind
         of groupKind:
           capts.add CaptNode(
@@ -107,8 +100,12 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
         else:
           doAssert false
           discard
-      if matched:
+        inc nti
+      if matched and
+          not smB.hasState(nt) and
+          (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):
         smB.add (nt, captx, bounds2)
+      inc nti
   swap smA, smB
 
 func matchImpl(
diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim
index 31e23ba..6606ceb 100644
--- a/src/regex/nfamatch2.nim
+++ b/src/regex/nfamatch2.nim
@@ -67,13 +67,6 @@ template lookAroundTpl*: untyped {.dirty.} =
     false
   smL.removeLast()
 
-func isEpsilonTransition(n: Node): bool {.inline.} =
-  result = case n.kind
-  of groupKind, assertionKind:
-    true
-  else:
-    false
-
 template s(nfa: openArray[Node]): untyped =
   nfa
 
@@ -83,6 +76,7 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
   template captElm: untyped =
     capts[captx, nfa.s[nt].idx]
   template z: untyped = nfa.s[nt]
+  template nt: untyped = nfa.s[n].next[nti]
   smB.clear()
   for n, capt, bounds in items smA:
     if capt != -1:
@@ -91,27 +85,12 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
       if not smB.hasState n:
         smB.add (n, capt, bounds)
       break
-    matched = true
-    captx = capt
-    for nti, nt in pairs toOpenArray(nfa.s[n].next, 0, nfa.s[n].next.len-1):
-      if not isEpsilonTransition(nfa.s[n]):
-        if not matched:
-          matched = true
-          captx = capt
-          continue
-        if smB.hasState nt:
-          captx = capt
-          continue
-        if not match(nfa.s[nt], c):
-          if not (anchored and nfa.s[nt].kind == reEoe):
-            captx = capt
-            continue
-        smB.add (nt, captx, bounds2)
-        captx = capt
-        continue
-      if not matched:
-        continue
-      case nfa.s[nt].kind
+    var nti = 0
+    while nti <= nfa.s[n].next.len-1:
+      matched = true
+      captx = capt
+      while isEpsilonTransition(nfa.s[nt]) and matched:
+        case nfa.s[nt].kind
         of reGroupStart:
           # XXX this can be avoided in some cases?
           captx = capts.diverge captx
@@ -137,6 +116,12 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
         else:
           doAssert false
           discard
+        inc nti
+      if matched and
+          not smB.hasState(nt) and
+          (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):
+        smB.add (nt, captx, bounds2)
+      inc nti
   swap smA, smB
   capts.recycle()
 
diff --git a/src/regex/types.nim b/src/regex/types.nim
index 569a1dd..e3b329a 100644
--- a/src/regex/types.nim
+++ b/src/regex/types.nim
@@ -278,6 +278,13 @@ const
     reGroupEnd}
   groupStartKind* = {reGroupStart} + lookaroundKind
 
+func isEpsilonTransition*(n: Node): bool {.inline.} =
+  result = case n.kind
+  of groupKind, assertionKind:
+    true
+  else:
+    false
+
 func `$`*(n: Node): string =
   ## return the string representation
   ## of a `Node`. The string is always

From 0e598756a4696b2773fb99360b3408ec11ccd2b5 Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Thu, 14 Sep 2023 22:56:06 -0300
Subject: [PATCH 3/9] progress

---
 src/regex/nfamacro.nim | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim
index 9f032df..a4535f8 100644
--- a/src/regex/nfamacro.nim
+++ b/src/regex/nfamacro.nim
@@ -4,6 +4,7 @@ import std/macros
 import std/unicode
 import std/tables
 import std/sets
+import std/algorithm
 
 import pkg/unicodedb/properties
 import pkg/unicodedb/types as utypes
@@ -268,6 +269,13 @@ func genLookaroundMatch(
     `lookaroundStmt`
     removeLast `smL`
 
+func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
+  for i in countdown(nti-1, 0):
+    if not isEpsilonTransition(nfa.s[n.next[i]]):
+      break
+    result.add n.next[i]
+  result.reverse()
+
 func genMatchedBody(
   smB, ntLit, capt, bounds, matched, captx,
   capts, charIdx, cPrev, c, text: NimNode,
@@ -276,19 +284,21 @@ func genMatchedBody(
   look: Lookaround,
   flags: set[MatchFlag]
 ): NimNode =
-  template t: untyped = nfa.t
+  template n: untyped = nfa.s[i]
+  template z: untyped = nfa.s[eti]
   let bounds2 = if mfBwMatch in flags:
     quote do: `charIdx` .. `bounds`.b
   else:
     quote do: `bounds`.a .. `charIdx`-1
-  if t.allZ[i][nti] == -1'i16:
+  let eTransitions = getEpsilonTransitions(nfa, n, nti)
+  if eTransitions.len == 0:
     return quote do:
       add(`smB`, (`ntLit`, `capt`, `bounds2`))
   var matchedBody: seq[NimNode]
   matchedBody.add quote do:
     `matched` = true
     `captx` = `capt`
-  for z in t.z[t.allZ[i][nti]]:
+  for eti in eTransitions:
     case z.kind
     of groupKind:
       let zIdx = newLit z.idx
@@ -347,10 +357,14 @@ func genNextState(
   for i in 0 .. s.len-1:
     if s[i].kind == reEoe:
       continue
+    if isEpsilonTransition(s[i]):
+      continue
     var branchBodyN: seq[NimNode]
     for nti, nt in s[i].next.pairs:
       if eoeOnly and s[nt].kind != reEoe:
         continue
+      if isEpsilonTransition(s[nt]):
+        continue
       let matchCond = case s[nt].kind
         of reEoe:
           quote do: `c` == -1'i32

From e3c5893207a0f72569ddbada1b6dbce810b208af Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Thu, 14 Sep 2023 23:28:43 -0300
Subject: [PATCH 4/9] progress

---
 src/regex.nim          |  8 ++++----
 src/regex/dotgraph.nim | 27 +++++++++++++++++++--------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/regex.nim b/src/regex.nim
index d2aa408..0c7dba4 100644
--- a/src/regex.nim
+++ b/src/regex.nim
@@ -1371,10 +1371,10 @@ when isMainModule:
 
   doAssert graph(Regex(re2"^a+$")) == """digraph graphname {
     0 [label="q0";color=blue];
-    1 [label="q1";color=black];
-    2 [label="q2";color=blue];
-    0 -> 1 [label="a, {^}, i=0"];
-    1 -> 1 [label="a, i=0"];1 -> 2 [label="{eoe}, {$}, i=1"];
+    2 [label="q1";color=black];
+    4 [label="q2";color=blue];
+    0 -> 2 [label="a, {^}, i=0"];
+    2 -> 2 [label="a, i=0"];2 -> 4 [label="{eoe}, {$}, i=1"];
 }
 """
 
diff --git a/src/regex/dotgraph.nim b/src/regex/dotgraph.nim
index a1ddd61..3548f4c 100644
--- a/src/regex/dotgraph.nim
+++ b/src/regex/dotgraph.nim
@@ -14,23 +14,34 @@ func color(n: Node): string =
 func graph*(nfa: Nfa): string =
   result = "digraph graphname {\n"
   let tab = "    "
+  var qi = 0
   for i, n in pairs nfa.s:
+    if isEpsilonTransition(n):
+      continue
     result.add tab
-    result.add($i & " [label=\"q" & $i & "\";color=" & n.color & "];")
+    result.add($i & " [label=\"q" & $qi & "\";color=" & n.color & "];")
     result.add '\n'
+    inc qi
   for i, n in pairs nfa.s:
     if n.next.len == 0:
       continue
+    if isEpsilonTransition(n):
+      continue
     result.add tab
-    for i2, n2 in pairs n.next:
-      var t = ""
-      if nfa.t.allZ[i][i2] > -1:
-        for i3, z in pairs nfa.t.z[nfa.t.allZ[i][i2]]:
-          if i3 > 0: t &= ", "
-          t &= $z
+    var t = ""
+    var ii = 0
+    for n2 in n.next:
+      if isEpsilonTransition(nfa.s[n2]):
+        if t.len > 0:
+          t &= ", "
+        t &= $nfa.s[n2]
+        continue
+      if t.len > 0:
         t = ", {" & t & "}"
-      let label = ($nfa.s[n2] & t & ", i=" & $i2).replace(r"\", r"\\")
+      let label = ($nfa.s[n2] & t & ", i=" & $ii).replace(r"\", r"\\")
       result.add($i & " -> " & $n2 & " [label=\"" & label & "\"];")
+      t = ""
+      inc ii
     result.add '\n'
   result.add "}\n"
 

From 3c6db18071379560a00508d1aeebe8d395cabb1c Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Thu, 14 Sep 2023 23:45:32 -0300
Subject: [PATCH 5/9] progress

---
 src/regex.nim        | 2 ++
 src/regex/litopt.nim | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/regex.nim b/src/regex.nim
index 0c7dba4..42e6459 100644
--- a/src/regex.nim
+++ b/src/regex.nim
@@ -1146,6 +1146,8 @@ proc toString(
   result = "["
   result.add($n)
   for nn in n.next:
+    if isEpsilonTransition(pattern.nfa.s[nn]):
+      continue
     result.add(", ")
     result.add(pattern.toString(nn, visited))
   result.add("]")
diff --git a/src/regex/litopt.nim b/src/regex/litopt.nim
index 7285ca0..f378da6 100644
--- a/src/regex/litopt.nim
+++ b/src/regex/litopt.nim
@@ -287,6 +287,8 @@ when isMainModule:
     result = "["
     result.add $n.cp
     for nn in n.next:
+      if isEpsilonTransition(nfa.s[nn]):
+        continue
       result.add ", "
       result.add toString(nfa, nn, visited)
     result.add "]"

From ba29d9079e66dbd747577ae40ece0ece565c9417 Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Fri, 15 Sep 2023 17:41:07 -0300
Subject: [PATCH 6/9] progress

---
 src/regex/nfafindall.nim  | 5 ++++-
 src/regex/nfafindall2.nim | 5 ++++-
 src/regex/nfamatch.nim    | 3 +++
 src/regex/nfamatch2.nim   | 3 +++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/regex/nfafindall.nim b/src/regex/nfafindall.nim
index e748985..db953bc 100644
--- a/src/regex/nfafindall.nim
+++ b/src/regex/nfafindall.nim
@@ -143,9 +143,12 @@ func submatch(
         of lookaroundKind:
           lookAroundTpl()
         else:
-          assert false
+          doAssert false
           discard
         inc nti
+      while isEpsilonTransition(nfa[nt]):
+        # skip unmatched epsilons
+        inc nti
       if matched and
           not smB.hasState(nt) and
           (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
diff --git a/src/regex/nfafindall2.nim b/src/regex/nfafindall2.nim
index 1e47623..0868ce3 100644
--- a/src/regex/nfafindall2.nim
+++ b/src/regex/nfafindall2.nim
@@ -186,9 +186,12 @@ func submatch(
           if captx != -1:
             capts.keepAlive captx
         else:
-          assert false
+          doAssert false
           discard
         inc nti
+      while isEpsilonTransition(nfa[nt]):
+        # skip unmatched epsilons
+        inc nti
       if matched and
           not smB.hasState(nt) and
           (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim
index 3c8fce0..470bf4a 100644
--- a/src/regex/nfamatch.nim
+++ b/src/regex/nfamatch.nim
@@ -101,6 +101,9 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
           doAssert false
           discard
         inc nti
+      while isEpsilonTransition(nfa.s[nt]):
+        # skip unmatched epsilons
+        inc nti
       if matched and
           not smB.hasState(nt) and
           (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):
diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim
index 6606ceb..3c15a8b 100644
--- a/src/regex/nfamatch2.nim
+++ b/src/regex/nfamatch2.nim
@@ -117,6 +117,9 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
           doAssert false
           discard
         inc nti
+      while isEpsilonTransition(nfa.s[nt]):
+        # skip unmatched epsilons
+        inc nti
       if matched and
           not smB.hasState(nt) and
           (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):

From 6fa5e1fd66fd5a01a601a42ff2def08a53b50a07 Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Fri, 15 Sep 2023 18:05:11 -0300
Subject: [PATCH 7/9] cleanup

---
 src/regex.nim             |  5 +--
 src/regex/nfa.nim         |  4 +--
 src/regex/nfafindall.nim  | 41 +++++++++++------------
 src/regex/nfafindall2.nim | 49 +++++++++++++--------------
 src/regex/nfamatch.nim    | 48 +++++++++++++--------------
 src/regex/nfamatch2.nim   | 69 ++++++++++++++++++---------------------
 src/regex/types.nim       |  6 ----
 7 files changed, 100 insertions(+), 122 deletions(-)

diff --git a/src/regex.nim b/src/regex.nim
index 42e6459..b958cfc 100644
--- a/src/regex.nim
+++ b/src/regex.nim
@@ -301,10 +301,7 @@ import ./regex/nfamatch2
 when not defined(noRegexOpt):
   import ./regex/litopt
 
-when not defined(noRegexMacro):
-  const canUseMacro = (NimMajor, NimMinor) >= (1, 1)
-else:
-  const canUseMacro = false
+const canUseMacro = (NimMajor, NimMinor) >= (1, 1)
 
 when canUseMacro:
   import ./regex/nfamacro
diff --git a/src/regex/nfa.nim b/src/regex/nfa.nim
index 507c333..f51afb4 100644
--- a/src/regex/nfa.nim
+++ b/src/regex/nfa.nim
@@ -149,7 +149,7 @@ type
   Etransitions = seq[int16]  # xxx transitions
   TeClosure = seq[(int16, Etransitions)]
 
-func isEpsilonTransition(n: Node): bool {.inline.} =
+func isEpsilonTransition2(n: Node): bool {.inline.} =
   result = case n.kind
     of groupKind:
       n.isCapturing
@@ -166,7 +166,7 @@ func teClosure(
   eTransitions: Etransitions
 ) =
   var eTransitionsCurr = eTransitions
-  if isEpsilonTransition eNfa.s[state]:
+  if isEpsilonTransition2 eNfa.s[state]:
     eTransitionsCurr.add state
   if eNfa.s[state].kind in matchableKind + {reEOE}:
     result.add (state, eTransitionsCurr)
diff --git a/src/regex/nfafindall.nim b/src/regex/nfafindall.nim
index db953bc..200efad 100644
--- a/src/regex/nfafindall.nim
+++ b/src/regex/nfafindall.nim
@@ -109,7 +109,6 @@ func submatch(
   i: int,
   cPrev, c: int32
 ) {.inline.} =
-  template tns: untyped = regex.nfa.t
   template nfa: untyped = regex.nfa.s
   template smA: untyped = ms.a
   template smB: untyped = ms.b
@@ -118,8 +117,8 @@ func submatch(
   template capt: untyped = ms.a[smi].ci
   template bounds: untyped = ms.a[smi].bounds
   template look: untyped = ms.look
-  template z: untyped = nfa[nt]
   template nt: untyped = nfa[n].next[nti]
+  template ntn: untyped = nfa[nt]
   smB.clear()
   var captx: int32
   var matched = true
@@ -130,29 +129,27 @@ func submatch(
     while nti <= nfa[n].next.len-1:
       matched = true
       captx = capt
-      while isEpsilonTransition(nfa[nt]) and matched:
-        case z.kind
-        of groupKind:
-          capts.add CaptNode(
-            parent: captx,
-            bound: i,
-            idx: z.idx)
-          captx = (capts.len-1).int32
-        of assertionKind - lookaroundKind:
-          matched = match(z, cPrev.Rune, c.Rune)
-        of lookaroundKind:
-          lookAroundTpl()
-        else:
-          doAssert false
-          discard
-        inc nti
-      while isEpsilonTransition(nfa[nt]):
-        # skip unmatched epsilons
+      while isEpsilonTransition(ntn):
+        if matched:
+          case ntn.kind
+          of groupKind:
+            capts.add CaptNode(
+              parent: captx,
+              bound: i,
+              idx: ntn.idx)
+            captx = (capts.len-1).int32
+          of assertionKind - lookaroundKind:
+            matched = match(ntn, cPrev.Rune, c.Rune)
+          of lookaroundKind:
+            lookAroundTpl()
+          else:
+            doAssert false
+            discard
         inc nti
       if matched and
           not smB.hasState(nt) and
-          (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
-        if nfa[nt].kind == reEoe:
+          (ntn.match(c.Rune) or ntn.kind == reEoe):
+        if ntn.kind == reEoe:
           #debugEcho "eoe ", bounds, " ", ms.m
           ms.m.add (captx, bounds.a .. i-1)
           smA.clear()
diff --git a/src/regex/nfafindall2.nim b/src/regex/nfafindall2.nim
index 0868ce3..fdd3500 100644
--- a/src/regex/nfafindall2.nim
+++ b/src/regex/nfafindall2.nim
@@ -146,7 +146,6 @@ func submatch(
   i: int,
   cPrev, c: int32
 ) {.inline.} =
-  template tns: untyped = regex.nfa.t
   template nfa: untyped = regex.nfa.s
   template smA: untyped = ms.a
   template smB: untyped = ms.b
@@ -155,8 +154,8 @@ func submatch(
   template capt: untyped = ms.a[smi].ci
   template bounds: untyped = ms.a[smi].bounds
   template look: untyped = ms.look
-  template z: untyped = nfa[nt]
   template nt: untyped = nfa[n].next[nti]
+  template ntn: untyped = nfa[nt]
   smB.clear()
   var captx: int32
   var matched = true
@@ -169,33 +168,31 @@ func submatch(
     while nti <= nfa[n].next.len-1:
       matched = true
       captx = capt
-      while isEpsilonTransition(nfa[nt]) and matched:
-        case z.kind
-        of reGroupStart:
-          captx = capts.diverge captx
-          capts[captx, z.idx].a = i
-        of reGroupEnd:
-          captx = capts.diverge captx
-          capts[captx, z.idx].b = i-1
-        of assertionKind - lookaroundKind:
-          matched = match(z, cPrev.Rune, c.Rune)
-        of lookaroundKind:
-          let freezed = capts.freeze()
-          lookAroundTpl()
-          capts.unfreeze freezed
-          if captx != -1:
-            capts.keepAlive captx
-        else:
-          doAssert false
-          discard
-        inc nti
-      while isEpsilonTransition(nfa[nt]):
-        # skip unmatched epsilons
+      while isEpsilonTransition(ntn):
+        if matched:
+          case ntn.kind
+          of reGroupStart:
+            captx = capts.diverge captx
+            capts[captx, ntn.idx].a = i
+          of reGroupEnd:
+            captx = capts.diverge captx
+            capts[captx, ntn.idx].b = i-1
+          of assertionKind - lookaroundKind:
+            matched = match(ntn, cPrev.Rune, c.Rune)
+          of lookaroundKind:
+            let freezed = capts.freeze()
+            lookAroundTpl()
+            capts.unfreeze freezed
+            if captx != -1:
+              capts.keepAlive captx
+          else:
+            doAssert false
+            discard
         inc nti
       if matched and
           not smB.hasState(nt) and
-          (nfa[nt].match(c.Rune) or nfa[nt].kind == reEoe):
-        if nfa[nt].kind == reEoe:
+          (ntn.match(c.Rune) or ntn.kind == reEoe):
+        if ntn.kind == reEoe:
           #debugEcho "eoe ", bounds, " ", ms.m
           ms.add (captx, bounds.a .. i-1)
           smA.clear()
diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim
index 470bf4a..863d74a 100644
--- a/src/regex/nfamatch.nim
+++ b/src/regex/nfamatch.nim
@@ -38,14 +38,14 @@ template lookAroundTpl*: untyped {.dirty.} =
   template smL: untyped = look.smL
   template smLa: untyped = smL.lastA
   template smLb: untyped = smL.lastB
-  template zNfa: untyped = z.subExp.nfa
-  let flags2 = if z.subExp.reverseCapts:
+  template zNfa: untyped = ntn.subExp.nfa
+  let flags2 = if ntn.subExp.reverseCapts:
     {mfAnchored, mfReverseCapts}
   else:
     {mfAnchored}
   smL.grow()
   smL.last.setLen zNfa.s.len
-  matched = case z.kind
+  matched = case ntn.kind
   of reLookahead:
     look.ahead(
       smLa, smLb, capts, captx,
@@ -70,8 +70,8 @@ template lookAroundTpl*: untyped {.dirty.} =
 template nextStateTpl(bwMatch = false): untyped {.dirty.} =
   template bounds2: untyped =
     when bwMatch: i .. bounds.b else: bounds.a .. i-1
-  template z: untyped = nfa.s[nt]
   template nt: untyped = nfa.s[n].next[nti]
+  template ntn: untyped = nfa.s[nt]
   smB.clear()
   for n, capt, bounds in items smA:
     if anchored and nfa.s[n].kind == reEoe:
@@ -82,31 +82,29 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
     while nti <= nfa.s[n].next.len-1:
       matched = true
       captx = capt
-      while isEpsilonTransition(nfa.s[nt]) and matched:
-        case z.kind
-        of groupKind:
-          capts.add CaptNode(
-            parent: captx,
-            bound: i,
-            idx: z.idx)
-          captx = (capts.len-1).int32
-        of assertionKind - lookaroundKind:
-          when bwMatch:
-            matched = match(z, c, cPrev.Rune)
+      while isEpsilonTransition(ntn):
+        if matched:
+          case ntn.kind
+          of groupKind:
+            capts.add CaptNode(
+              parent: captx,
+              bound: i,
+              idx: ntn.idx)
+            captx = (capts.len-1).int32
+          of assertionKind - lookaroundKind:
+            when bwMatch:
+              matched = match(ntn, c, cPrev.Rune)
+            else:
+              matched = match(ntn, cPrev.Rune, c)
+          of lookaroundKind:
+            lookAroundTpl()
           else:
-            matched = match(z, cPrev.Rune, c)
-        of lookaroundKind:
-          lookAroundTpl()
-        else:
-          doAssert false
-          discard
-        inc nti
-      while isEpsilonTransition(nfa.s[nt]):
-        # skip unmatched epsilons
+            doAssert false
+            discard
         inc nti
       if matched and
           not smB.hasState(nt) and
-          (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):
+          (ntn.match(c) or (anchored and ntn.kind == reEoe)):
         smB.add (nt, captx, bounds2)
       inc nti
   swap smA, smB
diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim
index 3c15a8b..91c35e5 100644
--- a/src/regex/nfamatch2.nim
+++ b/src/regex/nfamatch2.nim
@@ -38,14 +38,14 @@ template lookAroundTpl*: untyped {.dirty.} =
   template smL: untyped = look.smL
   template smLa: untyped = smL.lastA
   template smLb: untyped = smL.lastB
-  template zNfa: untyped = z.subExp.nfa
-  let flags2 = if z.subExp.reverseCapts:
+  template zNfa: untyped = ntn.subExp.nfa
+  let flags2 = if ntn.subExp.reverseCapts:
     {mfAnchored, mfReverseCapts}
   else:
     {mfAnchored}
   smL.grow()
   smL.last.setLen zNfa.s.len
-  matched = case z.kind
+  matched = case ntn.kind
   of reLookahead:
     look.ahead(
       smLa, smLb, capts, captx,
@@ -67,16 +67,13 @@ template lookAroundTpl*: untyped {.dirty.} =
     false
   smL.removeLast()
 
-template s(nfa: openArray[Node]): untyped =
-  nfa
-
 template nextStateTpl(bwMatch = false): untyped {.dirty.} =
   template bounds2: untyped =
     when bwMatch: i .. bounds.b else: bounds.a .. i-1
   template captElm: untyped =
     capts[captx, nfa.s[nt].idx]
-  template z: untyped = nfa.s[nt]
   template nt: untyped = nfa.s[n].next[nti]
+  template ntn: untyped = nfa.s[nt]
   smB.clear()
   for n, capt, bounds in items smA:
     if capt != -1:
@@ -89,40 +86,38 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
     while nti <= nfa.s[n].next.len-1:
       matched = true
       captx = capt
-      while isEpsilonTransition(nfa.s[nt]) and matched:
-        case nfa.s[nt].kind
-        of reGroupStart:
-          # XXX this can be avoided in some cases?
-          captx = capts.diverge captx
-          if mfReverseCapts notin flags or
-              captElm.a == nonCapture.a:
-            captElm.a = i
-        of reGroupEnd:
-          captx = capts.diverge captx
-          if mfReverseCapts notin flags or
-              captElm.b == nonCapture.b:
-            captElm.b = i-1
-        of assertionKind - lookaroundKind:
-          when bwMatch:
-            matched = match(nfa.s[nt], c, cPrev.Rune)
+      while isEpsilonTransition(ntn):
+        if matched:
+          case ntn.kind
+          of reGroupStart:
+            # XXX this can be avoided in some cases?
+            captx = capts.diverge captx
+            if mfReverseCapts notin flags or
+                captElm.a == nonCapture.a:
+              captElm.a = i
+          of reGroupEnd:
+            captx = capts.diverge captx
+            if mfReverseCapts notin flags or
+                captElm.b == nonCapture.b:
+              captElm.b = i-1
+          of assertionKind - lookaroundKind:
+            when bwMatch:
+              matched = match(ntn, c, cPrev.Rune)
+            else:
+              matched = match(ntn, cPrev.Rune, c)
+          of lookaroundKind:
+            let freezed = capts.freeze()
+            lookAroundTpl()
+            capts.unfreeze freezed
+            if captx != -1:
+              capts.keepAlive captx
           else:
-            matched = match(nfa.s[nt], cPrev.Rune, c)
-        of lookaroundKind:
-          let freezed = capts.freeze()
-          lookAroundTpl()
-          capts.unfreeze freezed
-          if captx != -1:
-            capts.keepAlive captx
-        else:
-          doAssert false
-          discard
-        inc nti
-      while isEpsilonTransition(nfa.s[nt]):
-        # skip unmatched epsilons
+            doAssert false
+            discard
         inc nti
       if matched and
           not smB.hasState(nt) and
-          (nfa.s[nt].match(c) or (anchored and nfa.s[nt].kind == reEoe)):
+          (ntn.match(c) or (anchored and ntn.kind == reEoe)):
         smB.add (nt, captx, bounds2)
       inc nti
   swap smA, smB
diff --git a/src/regex/types.nim b/src/regex/types.nim
index e3b329a..c275fff 100644
--- a/src/regex/types.nim
+++ b/src/regex/types.nim
@@ -28,14 +28,8 @@ type
   # nfatype.nim
   Enfa* = object
     s*: seq[Node]
-  TransitionsAll* = seq[seq[int16]]
-  ZclosureStates* = seq[seq[Node]]
-  Transitions* = object
-    allZ*: TransitionsAll
-    z*: ZclosureStates
   Nfa* = object
     s*: seq[Node]
-    t*: Transitions
 
   # nodetype.nim
   Flag* = enum

From 86bf5381efaac70d35f6ccf13e1975a3be1c1a77 Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Fri, 15 Sep 2023 18:13:56 -0300
Subject: [PATCH 8/9] cleanup

---
 bench/bench.nim  | 240 +++++++++++++++++++++++------------------------
 bench/bench2.nim |  23 -----
 2 files changed, 118 insertions(+), 145 deletions(-)
 delete mode 100644 bench/bench2.nim

diff --git a/bench/bench.nim b/bench/bench.nim
index 8d48ec1..09967d9 100644
--- a/bench/bench.nim
+++ b/bench/bench.nim
@@ -29,163 +29,159 @@ benchRelative(regex_sol, m):
     discard regex.match(text, pattern4, m2)
   doNotOptimizeAway(m2)
 
-when isMainModule:
-  runBenchmarks()
+benchRelative(regex_macro_sol, m):
+  var d: bool
+  for i in 0 ..< m:
+    regex.match text, regex.rex"\w*sol\w*":
+      d = true
+  doNotOptimizeAway(d)
 
-when false:
-  benchRelative(regex_macro_sol, m):
-    var d: bool
-    for i in 0 ..< m:
-      regex.match text, regex.rex"\w*sol\w*":
-        d = true
-    doNotOptimizeAway(d)
+var dummyTextNums = """650-253-0001"""
 
-  var dummyTextNums = """650-253-0001"""
+var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"
 
-  var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"
+bench(re_nums, m):
+  var d: bool
+  for i in 0 ..< m:
+    d = re.match(dummyTextNums, pattern_nums)
+  doNotOptimizeAway(d)
 
-  bench(re_nums, m):
-    var d: bool
-    for i in 0 ..< m:
-      d = re.match(dummyTextNums, pattern_nums)
-    doNotOptimizeAway(d)
+const n_pattern_nums = regex.re2"[0-9]+-[0-9]+-[0-9]+"
 
-  const n_pattern_nums = regex.re2"[0-9]+-[0-9]+-[0-9]+"
+benchRelative(regex_nums, m):
+  var m2: regex.RegexMatch2
+  for i in 0 ..< m:
+    discard regex.match(dummyTextNums, n_pattern_nums, m2)
+  doNotOptimizeAway(m2)
 
-  benchRelative(regex_nums, m):
-    var m2: regex.RegexMatch2
-    for i in 0 ..< m:
-      discard regex.match(dummyTextNums, n_pattern_nums, m2)
-    doNotOptimizeAway(m2)
+benchRelative(regex_macro_nums, m):
+  var d: bool
+  for i in 0 ..< m:
+    regex.match text, regex.rex"[0-9]+-[0-9]+-[0-9]+":
+      d = true
+  doNotOptimizeAway(d)
 
-  benchRelative(regex_macro_nums, m):
-    var d: bool
-    for i in 0 ..< m:
-      regex.match text, regex.rex"[0-9]+-[0-9]+-[0-9]+":
-        d = true
-    doNotOptimizeAway(d)
+var pattern_nums2 = re.re"^[0-9]+..*$"
 
-  var pattern_nums2 = re.re"^[0-9]+..*$"
+bench(re_nums2, m):
+  var d: bool
+  for i in 0 ..< m:
+    d = re.match(dummyTextNums, pattern_nums2)
+  doNotOptimizeAway(d)
 
-  bench(re_nums2, m):
-    var d: bool
-    for i in 0 ..< m:
-      d = re.match(dummyTextNums, pattern_nums2)
-    doNotOptimizeAway(d)
+const n_pattern_nums2 = regex.re2"[0-9]+..*"
 
-  const n_pattern_nums2 = regex.re2"[0-9]+..*"
+benchRelative(regex_nums2, m):
+  var m3: regex.RegexMatch2
+  for i in 0 ..< m:
+    discard regex.match(dummyTextNums, n_pattern_nums2, m3)
+  doNotOptimizeAway(m3)
 
-  benchRelative(regex_nums2, m):
-    var m3: regex.RegexMatch2
-    for i in 0 ..< m:
-      discard regex.match(dummyTextNums, n_pattern_nums2, m3)
-    doNotOptimizeAway(m3)
+benchRelative(regex_macro_nums2, m):
+  var d: bool
+  for i in 0 ..< m:
+    regex.match text, regex.rex"[0-9]+..*":
+      d = true
+  doNotOptimizeAway(d)
+
+when false:  # XXX remove
+  var lits_find_re = re.re"do|re|mi|fa|sol"
 
-  benchRelative(regex_macro_nums2, m):
-    var d: bool
+  bench(re_lits_find, m):
+    var d: int
     for i in 0 ..< m:
-      regex.match text, regex.rex"[0-9]+..*":
-        d = true
+      d = re.find(text, lits_find_re)
     doNotOptimizeAway(d)
 
-  when false:  # XXX remove
-    var lits_find_re = re.re"do|re|mi|fa|sol"
+  const lits_find = regex.re2"do|re|mi|fa|sol"
 
-    bench(re_lits_find, m):
-      var d: int
-      for i in 0 ..< m:
-        d = re.find(text, lits_find_re)
-      doNotOptimizeAway(d)
-
-    const lits_find = regex.re2"do|re|mi|fa|sol"
+  benchRelative(regex_lits_find, m):
+    var m2: regex.RegexMatch2
+    for i in 0 ..< m:
+      discard regex.find(text, lits_find, m2)
+    doNotOptimizeAway(m2)
 
-    benchRelative(regex_lits_find, m):
-      var m2: regex.RegexMatch2
-      for i in 0 ..< m:
-        discard regex.find(text, lits_find, m2)
-      doNotOptimizeAway(m2)
+const bench_text = staticRead("input-text.txt")
 
-  const bench_text = staticRead("input-text.txt")
+var email_find_all_re = re.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
 
-  var email_find_all_re = re.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
+bench(re_email_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in re.findAll(bench_text, email_find_all_re):
+      d += 1
+  doAssert d == 92
+  doNotOptimizeAway(d)
 
-  bench(re_email_find_all, m):
-    var d = 0
-    for i in 0 ..< m:
-      for _ in re.findAll(bench_text, email_find_all_re):
-        d += 1
-    doAssert d == 92
-    doNotOptimizeAway(d)
+const email_find_all = regex.re2"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
 
-  const email_find_all = regex.re2"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
+benchRelative(regex_email_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in regex.findAll(bench_text, email_find_all):
+      d += 1
+  doAssert d == 92
+  doNotOptimizeAway(d)
 
-  benchRelative(regex_email_find_all, m):
-    var d = 0
-    for i in 0 ..< m:
-      for _ in regex.findAll(bench_text, email_find_all):
-        d += 1
-    doAssert d == 92
-    doNotOptimizeAway(d)
+var uri_find_all_re = re.re"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
 
-  var uri_find_all_re = re.re"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
+bench(re_uri_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in re.findAll(bench_text, uri_find_all_re):
+      d += 1
+  doAssert d == 5301
+  doNotOptimizeAway(d)
 
-  bench(re_uri_find_all, m):
-    var d = 0
-    for i in 0 ..< m:
-      for _ in re.findAll(bench_text, uri_find_all_re):
-        d += 1
-    doAssert d == 5301
-    doNotOptimizeAway(d)
+const uri_find_all = regex.re2"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
 
-  const uri_find_all = regex.re2"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
+benchRelative(regex_uri_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in regex.findAll(bench_text, uri_find_all):
+      d += 1
+  doAssert d == 5301
+  doNotOptimizeAway(d)
 
-  benchRelative(regex_uri_find_all, m):
-    var d = 0
-    for i in 0 ..< m:
-      for _ in regex.findAll(bench_text, uri_find_all):
-        d += 1
-    doAssert d == 5301
-    doNotOptimizeAway(d)
+var ip_find_all_re = re.re"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
 
-  var ip_find_all_re = re.re"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
+bench(re_ip_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in re.findAll(bench_text, ip_find_all_re):
+      d += 1
+  doAssert d == 5
+  doNotOptimizeAway(d)
 
-  bench(re_ip_find_all, m):
-    var d = 0
-    for i in 0 ..< m:
-      for _ in re.findAll(bench_text, ip_find_all_re):
-        d += 1
-    doAssert d == 5
-    doNotOptimizeAway(d)
+const ip_find_all = regex.re2"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
 
-  const ip_find_all = regex.re2"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
+benchRelative(regex_ip_find_all, m):
+  var d = 0
+  for i in 0 ..< m:
+    for _ in regex.findAll(bench_text, ip_find_all):
+      d += 1
+  doAssert d == 5
+  doNotOptimizeAway(d)
 
-  benchRelative(regex_ip_find_all, m):
+when true:
+  bench(runes, m):
     var d = 0
     for i in 0 ..< m:
-      for _ in regex.findAll(bench_text, ip_find_all):
+      for _ in bench_text.runes:
         d += 1
-    doAssert d == 5
     doNotOptimizeAway(d)
 
-  when true:
-    bench(runes, m):
-      var d = 0
-      for i in 0 ..< m:
-        for _ in bench_text.runes:
-          d += 1
-      doNotOptimizeAway(d)
-
-  bench(dummy, m):
-    for i in 0 ..< m:
-      memoryClobber()
+bench(dummy, m):
+  for i in 0 ..< m:
+    memoryClobber()
 
-  when isMainModule:
-    runBenchmarks()
+when isMainModule:
+  runBenchmarks()
 
-  #[
-  # Profiling:
-  # (but extract the bench to another module without nimbench)
-  # open the log with KCachegrind
+#[
+# Profiling:
+# (but extract the bench to another module without nimbench)
+# open the log with KCachegrind
 
-  $ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2
-  ]#
+$ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2
+]#
diff --git a/bench/bench2.nim b/bench/bench2.nim
deleted file mode 100644
index 9f09372..0000000
--- a/bench/bench2.nim
+++ /dev/null
@@ -1,23 +0,0 @@
-import unicode
-from regex import nil
-
-func genText(): string {.compileTime.} =
-  result = ""
-  for _ in 0 .. 100000:
-    result.add("a")
-  result.add("sol")
-  for _ in 0 .. 100000:
-    result.add("b")
-  #result.add("ฅ")
-const text = genText()
-
-const pattern4 = regex.re2(r"\w*sol\w*") #, {regex.RegexFlag.reAscii})
-
-proc runBenchmarks() =
-  var m2: regex.RegexMatch2
-  for i in 0 ..< 500:
-    discard regex.match(text, pattern4, m2)
-  echo m2.captures
-
-when isMainModule:
-  runBenchmarks()
\ No newline at end of file

From bdfef55ce87488f4728aec5c89f7c344cedc6f8e Mon Sep 17 00:00:00 2001
From: nitely <ecastroborsani@gmail.com>
Date: Fri, 15 Sep 2023 18:19:02 -0300
Subject: [PATCH 9/9] remove todo

---
 src/regex/types.nim | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/regex/types.nim b/src/regex/types.nim
index c275fff..3c62326 100644
--- a/src/regex/types.nim
+++ b/src/regex/types.nim
@@ -13,12 +13,6 @@ import ./common
 
 # XXX split nfatype.nim and nodetype.nim
 #     once acyclic imports are supported
-# XXX refactor transitions, add tIdx: int16
-#     to Node, make TransitionsAll dense;
-#     remove z and store transition Nodes in
-#     the NFA; flatten TransitionsAll to seq[int16]
-#     + delimiter (-1'i16) or set first bit of
-#     every last tn idx
 
 type
   # exptype.nim