Skip to content

Commit

Permalink
Improve NFA generation terseness
Browse files Browse the repository at this point in the history
Improve NFA generation, removing unnecesary epsilon transitions.
  • Loading branch information
marianobarrios committed May 14, 2016
1 parent 56cf28a commit c8adecb
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/dregex/impl/LookaroundExpander.scala
Expand Up @@ -103,7 +103,7 @@ object LookaroundExpander extends StrictLogging {
throw new UnsupportedException("lookaround with variable-length prefix")
}
val suffix = juxt.values.slice(i + 1, juxt.values.size)
val wildcard = Rep(min = 0, max = -1, value = Wildcard)
val wildcard = Rep(min = 0, max = None, value = Wildcard)
TreeOperation(op, expandImpl(Juxt(prefix ++ suffix)), AtomTree(Juxt(prefix :+ value :+ wildcard)))
case Lookaround(Behind, cond, value) =>
throw new UnsupportedException("lookbehind")
Expand Down
67 changes: 54 additions & 13 deletions src/main/scala/dregex/impl/Nfa.scala
Expand Up @@ -38,8 +38,8 @@ object Nfa {
case Juxt(Seq(head)) =>
fromTreeImpl(head, from, to)

// doing this iteratively prevents stack overflows in the case of long literal strings
case Juxt(init :+ last) =>
// doing this iteratively prevents stack overflows in the case of long literal strings
var merged = Map[State, Map[NormTree.Char, Set[State]]]()
var prev = from
for (part <- init) {
Expand All @@ -49,13 +49,36 @@ object Nfa {
}
mergeTransitions(merged, fromTreeImpl(last, prev, to))

case Disj(Seq()) => Map()
case Disj(parts) => mergeTransitions(parts.map(part => fromTreeImpl(part, from, to)): _*)
case Disj(Seq()) =>
Map()

case Disj(parts) =>
mergeTransitions(parts.map(part => fromTreeImpl(part, from, to)): _*)

// trivial cases

case Rep(1, Some(1), value) =>
fromTreeImpl(value, from, to)

case Rep(0, Some(0), value) =>
Map(from -> Map(Epsilon -> Set(to)))

case Rep(n, -1, value) if n > 0 => fromTreeImpl(Juxt(Seq.fill(n)(value) :+ Rep(0, -1, value)), from, to)
case Rep(n, m, value) if n > 0 => fromTreeImpl(Juxt(Seq.fill(n)(value) :+ Rep(0, m - n, value)), from, to)
// infinite repetitions

case Rep(n, None, value) if n > 1 =>
val juxt = Juxt(Seq.fill(n)(value) :+ Rep(0, None, value))
fromTreeImpl(juxt, from, to)

case Rep(0, -1, value) =>
case Rep(1, None, value) =>
val int1 = new State
val int2 = new State
mergeTransitions(
fromTreeImpl(value, int1, int2),
Map(from -> Map(Epsilon -> Set(int1))),
Map(int2 -> Map(Epsilon -> Set(to))),
Map(int2 -> Map(Epsilon -> Set(int1))))

case Rep(0, None, value) =>
val int1 = new State
val int2 = new State
mergeTransitions(
Expand All @@ -64,12 +87,28 @@ object Nfa {
Map(int2 -> Map(Epsilon -> Set(to))),
Map(from -> Map(Epsilon -> Set(to))),
Map(int2 -> Map(Epsilon -> Set(int1))))

case Rep(0, 0, value) =>
Map(from -> Map(Epsilon -> Set(to)))

// doing this iteratively prevents stack overflows in the case of long repetitions
case Rep(0, m, value) if m > 0 =>

// finite repetitions

case Rep(n, Some(m), value) if n > 1 =>
val x = n - 1
val juxt = Juxt(Seq.fill(x)(value) :+ Rep(1, Some(m - x), value))
fromTreeImpl(juxt, from, to)

case Rep(1, Some(m), value) if m > 0 =>
// doing this iteratively prevents stack overflows in the case of long repetitions
val int1 = new State
var merged = fromTreeImpl(value, from, int1)
var prev = int1
for (i <- 1 until m - 1) {
val int = new State
merged = mergeTransitions(merged, fromTreeImpl(value, prev, int), Map(prev -> Map(Epsilon -> Set(to))))
prev = int
}
mergeTransitions(merged, fromTreeImpl(value, prev, to), Map(prev -> Map(Epsilon -> Set(to))))

case Rep(0, Some(m), value) if m > 0 =>
// doing this iteratively prevents stack overflows in the case of long repetitions
var merged = Map[State, Map[NormTree.Char, Set[State]]]()
var prev = from
for (i <- 0 until m - 1) {
Expand All @@ -78,8 +117,10 @@ object Nfa {
prev = int
}
mergeTransitions(merged, fromTreeImpl(value, prev, to), Map(prev -> Map(Epsilon -> Set(to))))


case c: Char => Map(from -> Map(c -> Set(to)))
case c: Char =>
Map(from -> Map(c -> Set(to)))

}

Expand Down
8 changes: 6 additions & 2 deletions src/main/scala/dregex/impl/NormTree.scala
Expand Up @@ -23,10 +23,14 @@ object NormTree {
override def toString = s"Disj(${values.mkString(", ")})"
}

case class Rep(min: Int, max: Int, value: Node) extends Node
/**
* A repetition of at least {@link #min} up to {@link #max}.
* If {@link #max} is {@link Option#None} it means infinite.
*/
case class Rep(min: Int, max: Option[Int], value: Node) extends Node

case class Juxt(values: Seq[Node]) extends Node {
override def toString = s"Juxt(${values.mkString(", ")})"
}

}
12 changes: 6 additions & 6 deletions src/main/scala/dregex/impl/RegexParser.scala
Expand Up @@ -142,24 +142,24 @@ class RegexParser extends JavaTokenParsers {
// Lazy quantifiers (by definition) don't change whether the text matches or not, so can be ignored for our purposes

def quantifiedBranch = regexAtom ~ ("+" | "*" | "?") ~ "?".? ^^ {
case atom ~ "+" ~ _ => Rep(min = 1, max = -1, value = atom)
case atom ~ "*" ~ _ => Rep(min = 0, max = -1, value = atom)
case atom ~ "?" ~ _ => Rep(min = 0, max = 1, value = atom)
case atom ~ "+" ~ _ => Rep(min = 1, max = None, value = atom)
case atom ~ "*" ~ _ => Rep(min = 0, max = None, value = atom)
case atom ~ "?" ~ _ => Rep(min = 0, max = Some(1), value = atom)
}

def generalQuantifier = "{" ~ number ~ ("," ~ number.?).? ~ "}" ~ "?".? ^^ {
case _ ~ minVal ~ Some(comma ~ Some(maxVal)) ~ _ ~ _ =>
// Quantifiers of the for {min,max}
if (minVal <= maxVal)
(minVal, maxVal)
(minVal, Some(maxVal))
else
throw new InvalidRegexException("invalid range in quantifier")
case _ ~ minVal ~ Some(comma ~ None) ~ _ ~ _ =>
// Quantifiers of the form {min,}
(minVal, -1)
(minVal, None)
case _ ~ minVal ~ None ~ _ ~ _ =>
// Quantifiers of the form "{n}", the value is captured as "min", despite being also the max
(minVal, minVal)
(minVal, Some(minVal))
}

def generallyQuantifiedBranch = regexAtom ~ generalQuantifier ^^ {
Expand Down
6 changes: 3 additions & 3 deletions src/main/scala/dregex/impl/RegexTree.scala
Expand Up @@ -113,11 +113,11 @@ object RegexTree {

}

case class Rep(min: Int, max: Int, value: Node) extends SingleComplexPart {
case class Rep(min: Int, max: Option[Int], value: Node) extends SingleComplexPart {

def length = (min, max) match {
case (_, -1) => None
case (n, m) if n == m => Some(n)
case (_, None) => None
case (n, Some(m)) if n == m => Some(n)
case (_, _) => None
}

Expand Down

0 comments on commit c8adecb

Please sign in to comment.