Improve logging

marianobarrios · Apr 2, 2016 · 80871e7 · 80871e7
1 parent fda9237
commit 80871e7
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 41 deletions.
diff --git a/src/main/scala/dregex/CompiledRegex.scala b/src/main/scala/dregex/CompiledRegex.scala
@@ -4,16 +4,16 @@ import com.typesafe.scalalogging.slf4j.StrictLogging
 import dregex.impl.RegexTree
 import dregex.impl.Operations
 import dregex.impl.Util
+import dregex.impl.Dfa
 
 /**
  * A fully-compiled regular expression that was generated from a string literal.
  */
 class CompiledRegex private[dregex] (originalString: String, val parsedRegex: ParsedRegex, val universe: Universe)
   extends Regex with StrictLogging {
 
-  val (dfa, t2) = Util.time(Operations.resolve(parsedRegex.metaTree, universe))
-  logger.trace(s"Time to resolve ${t2 / 1000} ms")
+  val dfa: Dfa = Operations.resolve(parsedRegex.metaTree, universe)
 
-  override def toString = originalString
+  override def toString = s"[$originalString] (DFA states: ${dfa.stateCount})"
 
 }
diff --git a/src/main/scala/dregex/Regex.scala b/src/main/scala/dregex/Regex.scala
@@ -3,12 +3,14 @@ package dregex
 import dregex.impl.RegexParser
 import dregex.impl.Dfa
 import dregex.impl.NormTree
+import com.typesafe.scalalogging.slf4j.StrictLogging
+import dregex.impl.Util
 
 /**
  * A regular expression, ready to be tested against strings, or to take part in an operation against another.
  * Internally, instances of this type have a DFA (Deterministic Finite Automaton).
  */
-trait Regex {
+trait Regex extends StrictLogging {
 
   def dfa: Dfa
   def universe: Universe
@@ -17,18 +19,18 @@ trait Regex {
     if (other.universe != universe)
       throw new Exception("cannot make operations between regex from different universes")
   }
-  
+
   /**
-   * Return whether a string is matched by the regular expression (i.e. whether the string is included in the language 
-   * generated by the expression). 
+   * Return whether a string is matched by the regular expression (i.e. whether the string is included in the language
+   * generated by the expression).
    * As the match is done using a DFA, its complexity is O(n), where n is the length of the string. It is constant
    * with respect to the length of the expression.
    */
   def matches(string: String): Boolean = {
     val (result, _) = matchAndReport(string)
     result
   }
-  
+
   /**
    * Similar to method [[matches]], except that also return how many characters were successfully matched in case of
    * failure.
@@ -59,43 +61,55 @@ trait Regex {
    * the DFA of the operands.
    */
   def intersect(other: Regex): Regex = {
-    checkUniverse(other)
-    new SynteticRegex(dfa intersect other.dfa, universe)
+    val (res, time) = Util.time {
+      checkUniverse(other)
+      new SynteticRegex(dfa intersect other.dfa, universe)
+    }
+    logger.debug(s"$this and $other intersected in $time")
+    res
   }
-  
+
   /**
    * Subtract other regular expression from this one. The resulting expression will match the strings that are
-   * matched this expression and are not matched by the other, and only those. Differences take O(n*m) time, where n 
+   * matched this expression and are not matched by the other, and only those. Differences take O(n*m) time, where n
    * and m are the number of states of the DFA of the operands.
    */
   def diff(other: Regex): Regex = {
-    checkUniverse(other)
-    new SynteticRegex(dfa diff other.dfa, universe)
+    val (res, time) = Util.time {
+      checkUniverse(other)
+      new SynteticRegex(dfa diff other.dfa, universe)
+    }
+    logger.debug(s"$this and $other diffed in $time")
+    res
   }
-  
+
   /**
-   * Unite this regular expression with another. The resulting expression will match the strings that are matched by 
-   * either of the operands, and only those. Unions take O(n*m) time, where n and m are the number of states of the DFA 
+   * Unite this regular expression with another. The resulting expression will match the strings that are matched by
+   * either of the operands, and only those. Unions take O(n*m) time, where n and m are the number of states of the DFA
    * of the operands.
    */
   def union(other: Regex): Regex = {
-    checkUniverse(other)
-    new SynteticRegex(dfa union other.dfa, universe)
+    val (res, time) = Util.time {
+      checkUniverse(other)
+      new SynteticRegex(dfa union other.dfa, universe)
+    }
+    logger.debug(s"$this and $other unioned in $time")
+    res
   }
-  
+
   /**
-   * Return whether this expression matches at least one string in common with another. Intersections take O(n*m) time, 
+   * Return whether this expression matches at least one string in common with another. Intersections take O(n*m) time,
    * where n and m are the number of states of the DFA of the operands.
    */
   def doIntersect(other: Regex): Boolean = intersect(other).matchesAnything()
 
-  def isSubsetOf(other: Regex): Boolean =  !(this diff other matchesAnything)
+  def isSubsetOf(other: Regex): Boolean = !(this diff other matchesAnything)
 
-  def isProperSubsetOf(other: Regex): Boolean =  (this isSubsetOf other) && (other diff this matchesAnything) 
+  def isProperSubsetOf(other: Regex): Boolean = (this isSubsetOf other) && (other diff this matchesAnything)
 
   /**
    * Return whether this regular expression is equivalent to other. Two regular expressions are equivalent if they
-   * match exactly the same set of strings. This operation takes O(n*m) time, where n and m are the number of states of 
+   * match exactly the same set of strings. This operation takes O(n*m) time, where n and m are the number of states of
    * the DFA of the operands.
    */
   def equiv(other: Regex): Boolean = {
@@ -110,28 +124,49 @@ trait Regex {
 
 }
 
-object Regex {
+object Regex extends StrictLogging {
+
+  def parse(regex: String): ParsedRegex = {
+    val (parsedRegex, time) = Util.time {
+      new ParsedRegex(RegexParser.parse(regex))
+    }
+    // log parsing time as trace, because this is usually quite fast
+    logger.trace(s"regex [$regex] parsed in $time")
+    parsedRegex
+  }
 
-  def parse(regex: String): ParsedRegex = new ParsedRegex(RegexParser.parse(regex))
-
   def compile(regex: String): CompiledRegex = {
     val tree = parse(regex)
-    new CompiledRegex(regex, tree, new Universe(Seq(tree)))
+    val (compiled, time) = Util.time {
+      new CompiledRegex(regex, tree, new Universe(Seq(tree)))
+    }
+    logger.debug(s"$compiled compiled in $time")
+    compiled
   }
 
   def compileParsed(originalString: String, tree: ParsedRegex, universe: Universe): CompiledRegex = {
-    new CompiledRegex(originalString, tree, universe)
+    val (compiled, time) = Util.time {
+      new CompiledRegex(originalString, tree, universe)
+    }
+    logger.debug(s"$compiled compiled in $time")
+    compiled
   }
-  
+
   def compile(regexs: Seq[String]): Seq[(String, CompiledRegex)] = {
     val trees = regexs.map(r => (r, parse(r)))
     val universe = new Universe(trees.unzip._2)
-    for ((regex, tree) <- trees) yield regex -> new CompiledRegex(regex, tree, universe)
+    for ((regex, tree) <- trees) yield {
+      val (res, time) = Util.time {
+        regex -> new CompiledRegex(regex, tree, universe)
+      }
+      logger.debug(s"${res._2} compiled in $time")
+      res
+    }
   }
-  
+
   /**
-   * Create a regular expression that does not match anything. Note that that is different from matching the empty 
-   * string. Despite the theoretical equivalence of automata and regular expressions, in practice there is no regular 
+   * Create a regular expression that does not match anything. Note that that is different from matching the empty
+   * string. Despite the theoretical equivalence of automata and regular expressions, in practice there is no regular
    * expression that does not match anything.
    */
   def nullRegex(u: Universe) = new SynteticRegex(Dfa.NothingDfa, u)

diff --git a/src/main/scala/dregex/SynteticRegex.scala b/src/main/scala/dregex/SynteticRegex.scala
@@ -6,5 +6,9 @@ import dregex.impl.Dfa
  * A regular expression that was generated by an operation between others (not parsing a string), so it lacks a 
  * literal expression or NFA.
  */
-class SynteticRegex private[dregex] (val dfa: Dfa, val universe: Universe) extends Regex
+class SynteticRegex private[dregex] (val dfa: Dfa, val universe: Universe) extends Regex {
+
+  override def toString = s"[synthetic] (DFA states: ${dfa.stateCount})"
+
+}
 
diff --git a/src/main/scala/dregex/Universe.scala b/src/main/scala/dregex/Universe.scala
@@ -4,11 +4,15 @@ import dregex.impl.Normalizer
 import dregex.impl.NormTree
 
 /**
- * Represent the set of characters that is the union of the sets of characters of a group of regular expressions. 
+ * Represent the set of characters that is the union of the sets of characters of a group of regular expressions.
  * Regex must belong to the same Universe to be able to make operations between them.
  */
 class Universe(parsedRegex: Seq[ParsedRegex]) {
-  val alphabet: Set[NormTree.SglChar] = 
+
+  val alphabet: Set[NormTree.SglChar] =
     parsedRegex.map(r => Normalizer.alphabet(r.tree)).flatten.map(NormTree.Lit(_)).toSet + NormTree.Other
+
+  // TODO: toString using hash
+
 }
 
diff --git a/src/main/scala/dregex/impl/Dfa.scala b/src/main/scala/dregex/impl/Dfa.scala
@@ -14,6 +14,8 @@ class Dfa(val impl: GenericDfa[State], val minimal: Boolean = false) extends Str
 
   case class BiState(first: State, second: State)
 
+  lazy val stateCount = impl.stateCount
+
   /*
    * Intersections, unions and differences between DFA are done using the "product construction"
    * The following pages include graphical examples of this technique:
@@ -109,7 +111,7 @@ class Dfa(val impl: GenericDfa[State], val minimal: Boolean = false) extends Str
   }
 
   /**
-   * Return whether a DFA matches anything. A DFA matches at least some language if there is a path from the initial 
+   * Return whether a DFA matches anything. A DFA matches at least some language if there is a path from the initial
    * state to any of the accepting states
    */
   def matchesAnything(): Boolean = {

diff --git a/src/main/scala/dregex/impl/GenericDfa.scala b/src/main/scala/dregex/impl/GenericDfa.scala
@@ -14,6 +14,8 @@ case class GenericDfa[A](initial: A, transitions: Map[A, Map[NormTree.SglChar, A
 
   lazy val allChars = transitions.values.map(_.keys).flatten.toSet
 
+  lazy val stateCount = allStates.size
+
   /**
    * Rewrite a DFA using canonical names for the states.
    * Useful for simplifying the DFA product of intersections or NFA conversions.

diff --git a/src/main/scala/dregex/impl/Util.scala b/src/main/scala/dregex/impl/Util.scala
@@ -1,6 +1,7 @@
 package dregex.impl
 
 import com.typesafe.scalalogging.slf4j.StrictLogging
+import java.time.Duration
 
 object Util extends StrictLogging {
 
@@ -19,10 +20,10 @@ object Util extends StrictLogging {
 
   def doIntersect[A](left: Set[A], right: Set[A]) = left exists right
 
-  def time[A](thunk: => A): (A, Long) = {
+  def time[A](thunk: => A): (A, Duration) = {
     val start = System.nanoTime()
     val res = thunk
-    val time = (System.nanoTime() - start) / 1000
+    val time = Duration.ofNanos(System.nanoTime() - start)
     (res, time)
   }
 

diff --git a/src/test/resources/logback.xml b/src/test/resources/logback.xml
@@ -19,7 +19,7 @@
 	</appender>
 
 	<root level="debug">
-		<!-- <appender-ref ref="FILE" /> -->
+		<appender-ref ref="FILE" />
 		<appender-ref ref="STDOUT"/>
 	</root>