Skip to content

Commit

Permalink
Improve logging
Browse files Browse the repository at this point in the history
  • Loading branch information
marianobarrios committed Apr 2, 2016
1 parent fda9237 commit 80871e7
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 41 deletions.
6 changes: 3 additions & 3 deletions src/main/scala/dregex/CompiledRegex.scala
Expand Up @@ -4,16 +4,16 @@ import com.typesafe.scalalogging.slf4j.StrictLogging
import dregex.impl.RegexTree
import dregex.impl.Operations
import dregex.impl.Util
import dregex.impl.Dfa

/**
* A fully-compiled regular expression that was generated from a string literal.
*/
class CompiledRegex private[dregex] (originalString: String, val parsedRegex: ParsedRegex, val universe: Universe)
extends Regex with StrictLogging {

val (dfa, t2) = Util.time(Operations.resolve(parsedRegex.metaTree, universe))
logger.trace(s"Time to resolve ${t2 / 1000} ms")
val dfa: Dfa = Operations.resolve(parsedRegex.metaTree, universe)

override def toString = originalString
override def toString = s"[$originalString] (DFA states: ${dfa.stateCount})"

}
97 changes: 66 additions & 31 deletions src/main/scala/dregex/Regex.scala
Expand Up @@ -3,12 +3,14 @@ package dregex
import dregex.impl.RegexParser
import dregex.impl.Dfa
import dregex.impl.NormTree
import com.typesafe.scalalogging.slf4j.StrictLogging
import dregex.impl.Util

/**
* A regular expression, ready to be tested against strings, or to take part in an operation against another.
* Internally, instances of this type have a DFA (Deterministic Finite Automaton).
*/
trait Regex {
trait Regex extends StrictLogging {

def dfa: Dfa
def universe: Universe
Expand All @@ -17,18 +19,18 @@ trait Regex {
if (other.universe != universe)
throw new Exception("cannot make operations between regex from different universes")
}

/**
* Return whether a string is matched by the regular expression (i.e. whether the string is included in the language
* generated by the expression).
* Return whether a string is matched by the regular expression (i.e. whether the string is included in the language
* generated by the expression).
* As the match is done using a DFA, its complexity is O(n), where n is the length of the string. It is constant
* with respect to the length of the expression.
*/
def matches(string: String): Boolean = {
val (result, _) = matchAndReport(string)
result
}

/**
* Similar to method [[matches]], except that also return how many characters were successfully matched in case of
* failure.
Expand Down Expand Up @@ -59,43 +61,55 @@ trait Regex {
* the DFA of the operands.
*/
def intersect(other: Regex): Regex = {
checkUniverse(other)
new SynteticRegex(dfa intersect other.dfa, universe)
val (res, time) = Util.time {
checkUniverse(other)
new SynteticRegex(dfa intersect other.dfa, universe)
}
logger.debug(s"$this and $other intersected in $time")
res
}

/**
* Subtract other regular expression from this one. The resulting expression will match the strings that are
* matched this expression and are not matched by the other, and only those. Differences take O(n*m) time, where n
* matched this expression and are not matched by the other, and only those. Differences take O(n*m) time, where n
* and m are the number of states of the DFA of the operands.
*/
def diff(other: Regex): Regex = {
checkUniverse(other)
new SynteticRegex(dfa diff other.dfa, universe)
val (res, time) = Util.time {
checkUniverse(other)
new SynteticRegex(dfa diff other.dfa, universe)
}
logger.debug(s"$this and $other diffed in $time")
res
}

/**
* Unite this regular expression with another. The resulting expression will match the strings that are matched by
* either of the operands, and only those. Unions take O(n*m) time, where n and m are the number of states of the DFA
* Unite this regular expression with another. The resulting expression will match the strings that are matched by
* either of the operands, and only those. Unions take O(n*m) time, where n and m are the number of states of the DFA
* of the operands.
*/
def union(other: Regex): Regex = {
checkUniverse(other)
new SynteticRegex(dfa union other.dfa, universe)
val (res, time) = Util.time {
checkUniverse(other)
new SynteticRegex(dfa union other.dfa, universe)
}
logger.debug(s"$this and $other unioned in $time")
res
}

/**
* Return whether this expression matches at least one string in common with another. Intersections take O(n*m) time,
* Return whether this expression matches at least one string in common with another. Intersections take O(n*m) time,
* where n and m are the number of states of the DFA of the operands.
*/
def doIntersect(other: Regex): Boolean = intersect(other).matchesAnything()

def isSubsetOf(other: Regex): Boolean = !(this diff other matchesAnything)
def isSubsetOf(other: Regex): Boolean = !(this diff other matchesAnything)

def isProperSubsetOf(other: Regex): Boolean = (this isSubsetOf other) && (other diff this matchesAnything)
def isProperSubsetOf(other: Regex): Boolean = (this isSubsetOf other) && (other diff this matchesAnything)

/**
* Return whether this regular expression is equivalent to other. Two regular expressions are equivalent if they
* match exactly the same set of strings. This operation takes O(n*m) time, where n and m are the number of states of
* match exactly the same set of strings. This operation takes O(n*m) time, where n and m are the number of states of
* the DFA of the operands.
*/
def equiv(other: Regex): Boolean = {
Expand All @@ -110,28 +124,49 @@ trait Regex {

}

object Regex {
object Regex extends StrictLogging {

def parse(regex: String): ParsedRegex = {
val (parsedRegex, time) = Util.time {
new ParsedRegex(RegexParser.parse(regex))
}
// log parsing time as trace, because this is usually quite fast
logger.trace(s"regex [$regex] parsed in $time")
parsedRegex
}

def parse(regex: String): ParsedRegex = new ParsedRegex(RegexParser.parse(regex))

def compile(regex: String): CompiledRegex = {
val tree = parse(regex)
new CompiledRegex(regex, tree, new Universe(Seq(tree)))
val (compiled, time) = Util.time {
new CompiledRegex(regex, tree, new Universe(Seq(tree)))
}
logger.debug(s"$compiled compiled in $time")
compiled
}

def compileParsed(originalString: String, tree: ParsedRegex, universe: Universe): CompiledRegex = {
new CompiledRegex(originalString, tree, universe)
val (compiled, time) = Util.time {
new CompiledRegex(originalString, tree, universe)
}
logger.debug(s"$compiled compiled in $time")
compiled
}

def compile(regexs: Seq[String]): Seq[(String, CompiledRegex)] = {
val trees = regexs.map(r => (r, parse(r)))
val universe = new Universe(trees.unzip._2)
for ((regex, tree) <- trees) yield regex -> new CompiledRegex(regex, tree, universe)
for ((regex, tree) <- trees) yield {
val (res, time) = Util.time {
regex -> new CompiledRegex(regex, tree, universe)
}
logger.debug(s"${res._2} compiled in $time")
res
}
}

/**
* Create a regular expression that does not match anything. Note that that is different from matching the empty
* string. Despite the theoretical equivalence of automata and regular expressions, in practice there is no regular
* Create a regular expression that does not match anything. Note that that is different from matching the empty
* string. Despite the theoretical equivalence of automata and regular expressions, in practice there is no regular
* expression that does not match anything.
*/
def nullRegex(u: Universe) = new SynteticRegex(Dfa.NothingDfa, u)
Expand Down
6 changes: 5 additions & 1 deletion src/main/scala/dregex/SynteticRegex.scala
Expand Up @@ -6,5 +6,9 @@ import dregex.impl.Dfa
* A regular expression that was generated by an operation between others (not parsing a string), so it lacks a
* literal expression or NFA.
*/
class SynteticRegex private[dregex] (val dfa: Dfa, val universe: Universe) extends Regex
class SynteticRegex private[dregex] (val dfa: Dfa, val universe: Universe) extends Regex {

override def toString = s"[synthetic] (DFA states: ${dfa.stateCount})"

}

8 changes: 6 additions & 2 deletions src/main/scala/dregex/Universe.scala
Expand Up @@ -4,11 +4,15 @@ import dregex.impl.Normalizer
import dregex.impl.NormTree

/**
* Represent the set of characters that is the union of the sets of characters of a group of regular expressions.
* Represent the set of characters that is the union of the sets of characters of a group of regular expressions.
* Regex must belong to the same Universe to be able to make operations between them.
*/
class Universe(parsedRegex: Seq[ParsedRegex]) {
val alphabet: Set[NormTree.SglChar] =

val alphabet: Set[NormTree.SglChar] =
parsedRegex.map(r => Normalizer.alphabet(r.tree)).flatten.map(NormTree.Lit(_)).toSet + NormTree.Other

// TODO: toString using hash

}

4 changes: 3 additions & 1 deletion src/main/scala/dregex/impl/Dfa.scala
Expand Up @@ -14,6 +14,8 @@ class Dfa(val impl: GenericDfa[State], val minimal: Boolean = false) extends Str

case class BiState(first: State, second: State)

lazy val stateCount = impl.stateCount

/*
* Intersections, unions and differences between DFA are done using the "product construction"
* The following pages include graphical examples of this technique:
Expand Down Expand Up @@ -109,7 +111,7 @@ class Dfa(val impl: GenericDfa[State], val minimal: Boolean = false) extends Str
}

/**
* Return whether a DFA matches anything. A DFA matches at least some language if there is a path from the initial
* Return whether a DFA matches anything. A DFA matches at least some language if there is a path from the initial
* state to any of the accepting states
*/
def matchesAnything(): Boolean = {
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/dregex/impl/GenericDfa.scala
Expand Up @@ -14,6 +14,8 @@ case class GenericDfa[A](initial: A, transitions: Map[A, Map[NormTree.SglChar, A

lazy val allChars = transitions.values.map(_.keys).flatten.toSet

lazy val stateCount = allStates.size

/**
* Rewrite a DFA using canonical names for the states.
* Useful for simplifying the DFA product of intersections or NFA conversions.
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/dregex/impl/Util.scala
@@ -1,6 +1,7 @@
package dregex.impl

import com.typesafe.scalalogging.slf4j.StrictLogging
import java.time.Duration

object Util extends StrictLogging {

Expand All @@ -19,10 +20,10 @@ object Util extends StrictLogging {

def doIntersect[A](left: Set[A], right: Set[A]) = left exists right

def time[A](thunk: => A): (A, Long) = {
def time[A](thunk: => A): (A, Duration) = {
val start = System.nanoTime()
val res = thunk
val time = (System.nanoTime() - start) / 1000
val time = Duration.ofNanos(System.nanoTime() - start)
(res, time)
}

Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/logback.xml
Expand Up @@ -19,7 +19,7 @@
</appender>

<root level="debug">
<!-- <appender-ref ref="FILE" /> -->
<appender-ref ref="FILE" />
<appender-ref ref="STDOUT"/>
</root>

Expand Down

0 comments on commit 80871e7

Please sign in to comment.