Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

A 30x performance improvement in Textile parsing

  • Loading branch information...
commit ea6eb32198ac2370d7be531f512f5758c91f13ee 1 parent 033a6ab
@dpp dpp authored
View
6 lift-textile/pom.xml
@@ -22,6 +22,12 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>net.liftweb</groupId>
+ <artifactId>lift-util</artifactId>
+ <version>1.1-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.5</version>
View
106 lift-textile/src/main/scala/net/liftweb/textile/TextileParser.scala
@@ -89,7 +89,7 @@ object TextileParser {
}
val parser = new TextileParsers(urlRewrite, disableLinks)
- val lst = parser.document(new _root_.scala.util.parsing.input.CharArrayReader(toParse.toCharArray()))
+ val lst = parser.document(new MyReader(toParse.toCharArray(), 0))
lst map { it =>
val tr = findRefs(List(it))
@@ -101,6 +101,106 @@ object TextileParser {
Some(it)
} getOrElse None
}
+
+ import _root_.scala.util.parsing.input._
+ import collection.mutable.ArrayBuffer
+
+ private class MyReader(source: Array[Char], offset: Int, index: Array[Int]) extends CharArrayReader(source, offset) {
+
+ // private def calcIndex(source: Array[Char]): Array[Int] =
+
+
+ def this(source: Array[Char], offset: Int) =
+ this(source, offset, {
+ var lineStarts = new ArrayBuffer[Int]
+ lineStarts += 0
+ for (i <- 0 until source.length)
+ if (source.charAt(i) == '\n') lineStarts += (i + 1)
+ lineStarts += source.length
+ lineStarts.toArray
+ })
+
+
+
+ override def rest: CharSequenceReader =
+ if (offset < source.length) new MyReader(source, offset + 1, index)
+ else this
+
+ /** The position of the first element in the reader
+ */
+ override lazy val pos: Position = new MyOffsetPosition(source, offset, index)
+
+ /** Returns an abstract reader consisting of all elements except the first
+ * <code>n</code> elements.
+ */
+ override def drop(n: Int): CharSequenceReader =
+ new MyReader(source, offset + n, index)
+
+ }
+
+
+ /** <p>
+ * <code>OffsetPosition</code> is a standard class for positions
+ * represented as offsets into a source ``document''.
+ * @param source The source document
+ * @param offset The offset indicating the position
+ *
+ * @author Martin Odersky
+ */
+ private case class MyOffsetPosition(source: java.lang.CharSequence, offset: Int, index: Array[Int]) extends Position {
+
+/* /** An index that contains all line starts, including first line, and eof */
+ private lazy val index: Array[Int] = {
+ var lineStarts = new ArrayBuffer[Int]
+ lineStarts += 0
+ for (i <- 0 until source.length)
+ if (source.charAt(i) == '\n') lineStarts += (i + 1)
+ lineStarts += source.length
+ lineStarts.toArray
+ }
+ */
+
+ /** The line number referred to by the position; line numbers start at 1 */
+ lazy val line: Int = {
+ var lo = 0
+ var hi = index.length - 1
+ while (lo + 1 < hi) {
+ val mid = (hi + lo) / 2
+ if (offset < index(mid)) hi = mid
+ else lo = mid
+ }
+ lo + 1
+ }
+
+ /** The column number referred to by the position; column numbers start at 1 */
+ lazy val column: Int = offset - index(line - 1) + 1
+
+ /** The contents of the line numbered `lnum' (must not contain a new-line character).
+ *
+ * @param lnum a 1-based integer index into the `document'
+ * @return the line at `lnum' (not including a newline)
+ */
+ def lineContents: String =
+ source.subSequence(index(line - 1), index(line)).toString
+
+ /** Returns a string representation of the `Position', of the form `line.column' */
+ override def toString = line+"."+column
+
+ /** Compare this position to another, by first comparing their line numbers,
+ * and then -- if necessary -- using the columns to break a tie.
+ *
+ * @param `that' a `Position' to compare to this `Position'
+ * @return true if this position's line or (in case of a tie wrt. line numbers)
+ * its column is smaller than the corresponding components of `that'
+ */
+ override def <(that: Position) = that match {
+ case OffsetPosition(_, that_offset) =>
+ this.offset < that_offset
+ case _ =>
+ this.line < that.line ||
+ this.line == that.line && this.column < that.column
+ }
+ }
def toHtml(toParse: String, wikiUrlFunc: Option[RewriteFunc], disableLinks: Boolean): NodeSeq = {
parse(toParse, wikiUrlFunc, disableLinks).map(_.toHtml) getOrElse Text("")
@@ -153,7 +253,7 @@ object TextileParser {
lazy val beginl = Parser[Unit]{ in =>
- if(in.pos.column==1) Success((), in) else Failure("at column "+in.pos.column+", not beginning of line", in)
+ if(in.pos.column==1) Success((), in) else Failure("", in)
}
lazy val beginlS = beginl ~ rep(' ')
@@ -475,7 +575,7 @@ object TextileParser {
Parser[List[Textile] ~ List[Textile] ~ List[Attribute] ~ List[Textile]] =
formattedLineElem(m, rep(attribute))
- lazy val begOrSpace: Parser[Int] = beginl ^^^ 0 | rep1(' ') ^^ {case lst => lst.length}
+ lazy val begOrSpace: Parser[Int] = rep1(' ') ^^ {case lst => lst.length} | beginl ^^^ 0
lazy val spaceOrEnd: Parser[Int] = endOfLine ^^^ 0 | rep1(' ') ^^ {case lst => lst.length}
def formattedLineElem[Q <% Parser[Any]](m: Q, p: Parser[List[Attribute]]):
View
247 lift-textile/src/test/scala/net/lift/textile/TextileSpec.scala
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 WorldWide Conferencing, LLC
+ * Copyright 2008-2009 WorldWide Conferencing, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@ import _root_.org.specs.Sugar._
import _root_.scala.xml._
+import _root_.net.liftweb.util._
+
class TextileSpecTest extends Runner(TextileSpec) with JUnit with Console
object TextileSpec extends Specification {
import TextileParser._
@@ -148,7 +150,9 @@ A regular example.
}
}
- """VF-DE:
+
+ "Fast" in {
+ val s = """VF-DE:
IFC.ksh: 04/10/08 03:26:52: * * * * * * * * * * * * * * * start of IFC.ksh * * * * * * * * * * * * * * *
IFC.ksh: 04/10/08 15:34:30: * * * * * * * * * * * * * * * end of IFC.ksh * * * * * * * * * * * * * * *
VF-ES:
@@ -199,6 +203,245 @@ UK:
IFC.ksh: 04/11/08 03:38:41: * * * * * * * * * * * * * * * start of IFC.ksh * * * * * * * * * * * * * * *
IFC.ksh: 04/11/08 05:26:49: * * * * * * * * * * * * * * * end of IFC.ksh * * * * * * * * * * * * * * *"""
+ val (time, res) = Helpers.calcTime{
+ toHtml(s)
+ }
+
+ time must be < 1000L
+ }
+
+ "Fast 2" in {
+ val s = """I am <em>very</em> serious
+
+Observe -- very nice!
+
+Observe - tiny and brief.
+
+"Observe!"
+
+Hello Dude
+
+**Bold * Not Strong**
+
+
+my bold line **bold**
+
+**strong* Not Bold
+
+
+*strong*
+
+This is a single paragraph
+
+This is another paragraph
+
+I am <b>very</b> serious.
+
+This
+is a paragraph
+
+<pre>
+I am <b>very</b> serious.
+
+Oh, yes I am!!
+</pre>
+
+I spoke.
+And none replied.
+
+
+
+
+Observe...
+
+Observe: 2 x 2.
+
+one(TM), two(R), three(C).
+
+h1. Header 1
+second line of header 1
+
+h2. Header 2
+
+h3. Header 3
+
+An old text
+
+bq. A block quotation.
+
+Any old text
+
+This is covered elsewhere[1].
+
+fn1. Down here, in fact.
+
+I _believe_ every word.
+
+And then? She *fell*!
+
+I __know__.
+I **really** __know__.
+
+??Cat's Cradle?? by Vonnegut
+
+Convert with @r.to_html@
+
+I'm -sure- not sure.
+
+You are a +pleasant+ child.
+
+a ^2^ + b ^2^ = c ^2^
+
+log ~2~ x
+
+I'm %unaware% of most soft drinks.
+
+I'm %{color:red}unaware%
+of most soft drinks.
+
+http://hobix.com/textile/#attributes
+
+I searched "Google":http://google.com.
+
+CamelCase
+
+\\CamelCase
+
+ThreeHumpCamel
+
+Four4FourHumpCamel
+
+
+I am crazy about "Hobix":hobix
+and "it's":hobix "all":hobix I ever
+"link to":hobix!
+
+[hobix]http://hobix.com
+
+# A first item
+# A second item
+# A third
+
+# Fuel could be:
+## Coal
+## Gasoline
+## Electricity
+# Humans need only:
+## Water
+## Protein
+
+* A first item
+* A second item
+* A third
+
+* Fuel could be:
+** Coal
+** Gasoline
+** Electricity
+* Humans need only:
+** Water
+** Protein
+
+| name | age | sex |
+| joan | 24 | f |
+| archie | 29 | m |
+| bella | 45 | f |
+
+|_. name |_. age |_. sex |
+| joan | 24 | f |
+| archie | 29 | m |
+| bella | 45 | f |
+
+|_. attribute list |
+|<. align left |
+|>. align right|
+|=. center |
+|<>. justify this block |
+|^. valign top |
+|~. bottom |
+
+|\2. spans two cols |
+| col 1 | col 2 |
+
+|/3. spans 3 rows | a |
+| b |
+| c |
+
+|{background:#ddd}. Grey cell|
+
+table{border:1px solid black}.
+|This|is|a|row|
+|This|is|a|row|
+
+|This|is|a|row|
+{background:#ddd}. |This|is|grey|row|
+
+p<. align left
+
+p>. align right
+
+p=. centered
+
+p<>. justified
+
+p(. left ident 1em
+
+p((. left ident 2em
+
+p))). right ident 3em
+
+h2()>. Bingo.
+
+h3()>[no]{color:red}. Bingo
+
+<pre>
+<code>
+a.gsub!( /</, '' )
+</code>
+</pre>
+
+
+<div style='float:right;'>
+float right
+</div>
+
+<div style='float:right;'>
+
+h3. Sidebar
+
+"Hobix":http://hobix.com/
+"Ruby":http://ruby-lang.org/
+
+</div>
+
+The main text of the
+page goes here and will
+stay to the left of the
+sidebar.
+
+!http://hobix.com/sample.jpg!
+
+!http://hobix.com/sa.jpg(Bunny.)!
+
+!http://hobix.com/sample.jpg!:http://hobix.com/
+
+!>http://hobix.com/sample.jpg!
+
+And others sat all round the small
+machine and paid it to sing to them.
+
+We use CSS(Cascading Style Sheets).
+
+
+"""
+ val (time, res) = Helpers.calcTime{
+ toHtml(s)
+ }
+
+ time must be < 1000L
+ }
+
+
"""Hello:
* THis is a * on a line
* This is a *strong* line
Please sign in to comment.
Something went wrong with that request. Please try again.