Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Delimit formatted line elements with punctuation in addition to space

Refactor, allow proper parsing of nested elements
  • Loading branch information...
commit 0fb0a8e1df36fe2ae5eba609cf1d54011460f76b 1 parent c800244
@vdichev vdichev authored
View
88 textile/src/main/scala/net/liftweb/textile/TextileParser.scala
@@ -18,7 +18,7 @@ package net.liftweb {
package textile {
import _root_.scala.util.parsing.combinator.{Parsers, ImplicitConversions}
-import _root_.scala.xml.{Elem => XmlElem, MetaData, NodeSeq, Null, Text, TopScope, UnprefixedAttribute, Group, Node}
+import _root_.scala.xml.{Elem => XmlElem, Atom, MetaData, NodeSeq, Null, Text, TopScope, UnprefixedAttribute, Group, Node}
import _root_.scala.collection.mutable.HashMap
/**
@@ -230,14 +230,6 @@ object TextileParser {
def discard[T](p: Parser[T]): Parser[Unit] = p ^^ {x => ()}
- def peek[T](p: Parser[T]): Parser[T] = Parser { in =>
- p(in) match {
- case s @ Success(v, _) => Success(v, in)
- case e @ Error(msg, _) => Error(msg, in)
- case f @ Failure(msg, _) => Failure(msg, in)
- }
- }
-
lazy val document : Parser[Lst] = rep(paragraph) ^^ Lst
// final val Ch = '\032'
private def chrExcept(cs: Char*): Parser[Char] = elem("", {c => ('\032' :: cs.toList) forall (_ != c)}) //{x => !cs.contains(x)})
@@ -268,41 +260,28 @@ object TextileParser {
/**
* Line elements make up paragraphs and block elements
*/
- lazy val lineElem : Parser[Textile] = {
+ def lineElemParser(st: => Parser[Textile] = strong, em: => Parser[Textile] = emph) : Parser[Textile] = {
not(blankLine) ~> (endOfLine | image | footnote_def |
anchor | dimension | elipsis |
copyright | trademark | registered |
emDash |
- enDash | italic | emph | bold |
+ enDash | italic | em | bold |
cite | span | code | delete | insert |
- sup | sub | strong | html |
+ sup | sub | st | html |
single_quote | quote | acronym | charBlock)
}
+ lazy val lineElem : Parser[Textile] = lineElemParser()
/**
* If we've got an italic (__text__), the parser doesn't do well with a single underscore, so
- * we exclude looking for _emph_
+ * we exclude looking for _emph_ followed by another _
*/
- lazy val lineElem_notEmph : Parser[Textile] = {
- not(blankLine) ~> (endOfLine | image | footnote_def | anchor |
- dimension | elipsis |
- copyright | trademark | registered | emDash | enDash | italic |
- bold |
- cite | span| code | delete | insert| sup | sub | strong |
- html| single_quote | quote | acronym | charBlock)
- }
+ lazy val lineElem_inItalic : Parser[Textile] = lineElemParser(em = emphNotItalic)
/**
- * Don't look for *strong* if we're currently in a **bold** element
+ * Don't look for *strong* followed by * if we're currently in a **bold** element
*/
- lazy val lineElem_notStrong : Parser[Textile] = {
- not(blankLine) ~> (endOfLine | image | footnote_def | anchor |
- dimension | elipsis |
- copyright | trademark | registered | emDash | enDash | italic |
- emph |
- cite | span | code | delete | insert | sup |
- sub | bold | html| single_quote | quote | acronym | charBlock)
- }
+ lazy val lineElem_inBold : Parser[Textile] = lineElemParser(st = strongNotBold)
/**
@@ -575,18 +554,23 @@ object TextileParser {
{case fbl ~ abl => Bullet(fbl :: abl, numbered)}
}
- def formattedLineElem[Q <% Parser[Any]](m: Q):
- Parser[List[Textile] ~ List[Textile] ~ List[Attribute] ~ List[Textile]] =
- formattedLineElem(m, rep(attribute))
+ def isStartPunct(c: Char) = str2chars(".,\"'?!;:").contains(c)
+ lazy val startPunct: Parser[Elem] = elem("startPunct", isStartPunct)
+
+ val punctRegex = java.util.regex.Pattern.compile("""\p{Punct}""")
+ def isPunct(c: Char) = punctRegex.matcher(c.toString).matches
+ lazy val punct: Parser[Elem] = elem("endPunct", isPunct)
- lazy val begOrSpace: Parser[Int] = rep1(' ') ^^ {case lst => lst.length} | beginl ^^^ 0
- lazy val spaceOrEnd: Parser[Int] = endOfLine ^^^ 0 | rep1(' ') ^^ {case lst => lst.length}
+ lazy val begPunctOrSpace: Parser[String] = beginl ^^^ "" |
+ (startPunct | ' ') ^^ { case lst => lst.toString }
+ lazy val spacePunctOrEnd: Parser[Int] = endOfLine ^^^ 0 |
+ (punct | ' ') ^^ { case lst => 1 }
- def formattedLineElem[Q <% Parser[Any]](m: Q, p: Parser[List[Attribute]]):
+ def formattedLineElem[Q <% Parser[Any]](m: Q, p: Parser[List[Attribute]] = rep(attribute), restrictSuffix: Parser[Elem] = failure("?"), le: Parser[Textile] = lineElem):
Parser[List[Textile] ~ List[Textile] ~ List[Attribute] ~ List[Textile]] =
- begOrSpace ~ (m ~> p) ~ (rep1(not(endOfLine | (m ~ (endOfLine | rep1(' ')))) ~> lineElem) <~ m) ~ peek(spaceOrEnd) ^^
- {case bg ~ attrs ~ ln ~ end =>
- val t1: List[Textile] ~ List[Textile] = new ~(List(CharBlock(" " * bg)),
+ begPunctOrSpace ~ (m ~> p) ~ (rep1(not(endOfLine | (m ~ (spacePunctOrEnd))) ~> le) <~ m) <~ not(restrictSuffix) ^^
+ {case bg ~ attrs ~ ln =>
+ val t1: List[Textile] ~ List[Textile] = new ~(List(CharBlock(bg)),
reduceCharBlocks(ln))
val t2 = new ~(t1, attrs)
new ~(t2, Nil)
@@ -596,10 +580,12 @@ object TextileParser {
// TODO: generalize formattedLineElem some more
lazy val bold: Parser[Textile] =
- formattedLineElem(accept("**")) ^^ flatten4(Special * "b")
+ formattedLineElem(accept("**"), le = lineElem_inBold) ^^ flatten4(Special * "b")
lazy val strong: Parser[Textile] = formattedLineElem('*') ^^ flatten4(Strong)
+ lazy val strongNotBold: Parser[Textile] = formattedLineElem('*', restrictSuffix = '*') ^^ flatten4(Strong)
+
lazy val cite: Parser[Textile] = formattedLineElem(accept("??")) ^^ flatten4(Cite)
lazy val code: Parser[Textile] = formattedLineElem('@') ^^ flatten4(Special * "code")
@@ -652,11 +638,13 @@ object TextileParser {
lazy val sub : Parser[Textile] = formattedLineElem('~') ^^ flatten4(Special * "sub")
- lazy val italic : Parser[Textile] = formattedLineElem(accept("__")) ^^ flatten4(Special * "i")
+ lazy val italic : Parser[Textile] = formattedLineElem(accept("__"), le = lineElem_inItalic) ^^ flatten4(Special * "i")
lazy val emph : Parser[Textile] = formattedLineElem('_') ^^ flatten4(Emph)
- lazy val quote : Parser[Textile] = formattedLineElem('"', success(Nil)) ^^ flatten4((f, x, y, lst) => Quoted(f, x, lst))
+ lazy val emphNotItalic : Parser[Textile] = formattedLineElem('_', restrictSuffix = '_') ^^ flatten4(Emph)
+
+ lazy val quote : Parser[Textile] = formattedLineElem('"', success(Nil), ':') ^^ flatten4((f, x, y, lst) => Quoted(f, x, lst))
def reduceCharBlocks(in : List[Textile]) : List[Textile] =
{
@@ -870,7 +858,21 @@ object TextileParser {
// drop the last EOL to prevent needless <br />
// TODO: find a better solution.. it's not quite clear to me where newlines are meaningful
- private def flattenAndDropLastEOL(elems : List[Textile]) = ((elems match {case Nil => Nil case x => (x.last match { case EOL => elems.init case _ => elems})})).flatMap{e => e.toHtml.toList}
+ private def flattenAndDropLastEOL(elems : List[Textile]) = normalize(
+ (elems match {
+ case _ :: _ if elems.last == EOL => elems.init
+ case _ => elems
+ }).flatMap(_.toHtml)
+ ).toList
+
+ // reduce adjacent text elements into one
+ def normalize(nodes: Seq[Node]): Seq[Node] = nodes match {
+ case s @ Seq() => s
+ case Seq(head: Atom[String], tail @ _*) =>
+ val (text, rest) = tail span { _.isInstanceOf[Atom[String]] }
+ Text(text.foldLeft(head.text) { _ + _.text}) +: normalize(rest)
+ case Seq(head, tail @ _*) => head +: tail
+ }
case class Paragraph(elems : List[Textile], attrs: List[Attribute]) extends ATextile(elems, attrs) {
override def toHtml : NodeSeq = {
View
22 textile/src/test/scala/net/liftweb/textile/TextileSpec.scala
@@ -79,6 +79,18 @@ object TextileSpec extends Specification {
toHtml("**Hello World**") must ==/(<p><b>Hello World</b></p>)
}
+ "End line elements with punctuation" in {
+ toHtml("And then? She *fell*!") must ==/(<p>And then? She <strong>fell</strong>!</p>)
+ }
+
+ "Start line elements with punctuation" in {
+ toHtml("see:_Hello word_") must ==/(<p>see:<em>Hello word</em></p>)
+ }
+
+ "**Bold * Not Strong**" in {
+ toHtml("**Bold * Not Strong**") must ==/(<p><b>Bold * Not Strong</b></p>)
+ }
+
"Make other things bold" in {
toHtml("Dude this is **Hello World** kind of stuff") must ==/(<p>Dude this is <b>Hello World</b> kind of stuff</p>)
}
@@ -92,17 +104,17 @@ object TextileSpec extends Specification {
}
"Observe -- very nice!" in {
- toHtml("Observe -- very nice!") must ==/(<p>Observe &#8212; very nice!</p>)
+ toHtml("Observe -- very nice!") must ==/(<p>Observe \u2014 very nice!</p>)
}
"Observe - tiny and brief." in {
- toHtml("Observe - tiny and brief.") must ==/(<p>Observe &#8211; tiny and brief.</p>)
+ toHtml("Observe - tiny and brief.") must ==/(<p>Observe \u2013 tiny and brief.</p>)
}
"\"Observe!\"" in {
val ret = toHtml("\"Observe!\"")
- ret must ==/(<p>&#8220;Observe!&#8221;</p>)
+ ret must ==/(<p>\u201cObserve!\u201d</p>)
}
"A simple example." in {
@@ -138,6 +150,10 @@ A regular example.
toHtml("a link http://yahoo.com inside") must ==/(<p>a link <a href="http://yahoo.com">http://yahoo.com</a> inside</p>)
}
+ "a quoted link" in {
+ toHtml("""I searched "Google":http://google.com.""") must ==/(<p>I searched <a href="http://google.com.">Google</a></p>)
+ }
+
"deal with a very long line of text" in {
val sb = new StringBuilder()
(1 to 10000).foreach(i => sb.append(i.toString+" "))
Please sign in to comment.
Something went wrong with that request. Please try again.