Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Delimit formatted line elements with punctuation in addition to space

Refactor, allow proper parsing of nested elements
  • Loading branch information...
commit 0fb0a8e1df36fe2ae5eba609cf1d54011460f76b 1 parent c800244
@vdichev vdichev authored
View
88 textile/src/main/scala/net/liftweb/textile/TextileParser.scala
@@ -18,7 +18,7 @@ package net.liftweb {
package textile {
import _root_.scala.util.parsing.combinator.{Parsers, ImplicitConversions}
-import _root_.scala.xml.{Elem => XmlElem, MetaData, NodeSeq, Null, Text, TopScope, UnprefixedAttribute, Group, Node}
+import _root_.scala.xml.{Elem => XmlElem, Atom, MetaData, NodeSeq, Null, Text, TopScope, UnprefixedAttribute, Group, Node}
import _root_.scala.collection.mutable.HashMap
/**
@@ -230,14 +230,6 @@ object TextileParser {
def discard[T](p: Parser[T]): Parser[Unit] = p ^^ {x => ()}
- def peek[T](p: Parser[T]): Parser[T] = Parser { in =>
- p(in) match {
- case s @ Success(v, _) => Success(v, in)
- case e @ Error(msg, _) => Error(msg, in)
- case f @ Failure(msg, _) => Failure(msg, in)
- }
- }
-
lazy val document : Parser[Lst] = rep(paragraph) ^^ Lst
// final val Ch = '\032'
private def chrExcept(cs: Char*): Parser[Char] = elem("", {c => ('\032' :: cs.toList) forall (_ != c)}) //{x => !cs.contains(x)})
@@ -268,41 +260,28 @@ object TextileParser {
/**
* Line elements make up paragraphs and block elements
*/
- lazy val lineElem : Parser[Textile] = {
+ def lineElemParser(st: => Parser[Textile] = strong, em: => Parser[Textile] = emph) : Parser[Textile] = {
not(blankLine) ~> (endOfLine | image | footnote_def |
anchor | dimension | elipsis |
copyright | trademark | registered |
emDash |
- enDash | italic | emph | bold |
+ enDash | italic | em | bold |
cite | span | code | delete | insert |
- sup | sub | strong | html |
+ sup | sub | st | html |
single_quote | quote | acronym | charBlock)
}
+ lazy val lineElem : Parser[Textile] = lineElemParser()
/**
* If we've got an italic (__text__), the parser doesn't do well with a single underscore, so
- * we exclude looking for _emph_
+ * we exclude looking for _emph_ followed by another _
*/
- lazy val lineElem_notEmph : Parser[Textile] = {
- not(blankLine) ~> (endOfLine | image | footnote_def | anchor |
- dimension | elipsis |
- copyright | trademark | registered | emDash | enDash | italic |
- bold |
- cite | span| code | delete | insert| sup | sub | strong |
- html| single_quote | quote | acronym | charBlock)
- }
+ lazy val lineElem_inItalic : Parser[Textile] = lineElemParser(em = emphNotItalic)
/**
- * Don't look for *strong* if we're currently in a **bold** element
+ * Don't look for *strong* followed by * if we're currently in a **bold** element
*/
- lazy val lineElem_notStrong : Parser[Textile] = {
- not(blankLine) ~> (endOfLine | image | footnote_def | anchor |
- dimension | elipsis |
- copyright | trademark | registered | emDash | enDash | italic |
- emph |
- cite | span | code | delete | insert | sup |
- sub | bold | html| single_quote | quote | acronym | charBlock)
- }
+ lazy val lineElem_inBold : Parser[Textile] = lineElemParser(st = strongNotBold)
/**
@@ -575,18 +554,23 @@ object TextileParser {
{case fbl ~ abl => Bullet(fbl :: abl, numbered)}
}
- def formattedLineElem[Q <% Parser[Any]](m: Q):
- Parser[List[Textile] ~ List[Textile] ~ List[Attribute] ~ List[Textile]] =
- formattedLineElem(m, rep(attribute))
+ def isStartPunct(c: Char) = str2chars(".,\"'?!;:").contains(c)
+ lazy val startPunct: Parser[Elem] = elem("startPunct", isStartPunct)
+
+ val punctRegex = java.util.regex.Pattern.compile("""\p{Punct}""")
+ def isPunct(c: Char) = punctRegex.matcher(c.toString).matches
+ lazy val punct: Parser[Elem] = elem("endPunct", isPunct)
- lazy val begOrSpace: Parser[Int] = rep1(' ') ^^ {case lst => lst.length} | beginl ^^^ 0
- lazy val spaceOrEnd: Parser[Int] = endOfLine ^^^ 0 | rep1(' ') ^^ {case lst => lst.length}
+ lazy val begPunctOrSpace: Parser[String] = beginl ^^^ "" |
+ (startPunct | ' ') ^^ { case lst => lst.toString }
+ lazy val spacePunctOrEnd: Parser[Int] = endOfLine ^^^ 0 |
+ (punct | ' ') ^^ { case lst => 1 }
- def formattedLineElem[Q <% Parser[Any]](m: Q, p: Parser[List[Attribute]]):
+ def formattedLineElem[Q <% Parser[Any]](m: Q, p: Parser[List[Attribute]] = rep(attribute), restrictSuffix: Parser[Elem] = failure("?"), le: Parser[Textile] = lineElem):
Parser[List[Textile] ~ List[Textile] ~ List[Attribute] ~ List[Textile]] =
- begOrSpace ~ (m ~> p) ~ (rep1(not(endOfLine | (m ~ (endOfLine | rep1(' ')))) ~> lineElem) <~ m) ~ peek(spaceOrEnd) ^^
- {case bg ~ attrs ~ ln ~ end =>
- val t1: List[Textile] ~ List[Textile] = new ~(List(CharBlock(" " * bg)),
+ begPunctOrSpace ~ (m ~> p) ~ (rep1(not(endOfLine | (m ~ (spacePunctOrEnd))) ~> le) <~ m) <~ not(restrictSuffix) ^^
+ {case bg ~ attrs ~ ln =>
+ val t1: List[Textile] ~ List[Textile] = new ~(List(CharBlock(bg)),
reduceCharBlocks(ln))
val t2 = new ~(t1, attrs)
new ~(t2, Nil)
@@ -596,10 +580,12 @@ object TextileParser {
// TODO: generalize formattedLineElem some more
lazy val bold: Parser[Textile] =
- formattedLineElem(accept("**")) ^^ flatten4(Special * "b")
+ formattedLineElem(accept("**"), le = lineElem_inBold) ^^ flatten4(Special * "b")
lazy val strong: Parser[Textile] = formattedLineElem('*') ^^ flatten4(Strong)
+ lazy val strongNotBold: Parser[Textile] = formattedLineElem('*', restrictSuffix = '*') ^^ flatten4(Strong)
+
lazy val cite: Parser[Textile] = formattedLineElem(accept("??")) ^^ flatten4(Cite)
lazy val code: Parser[Textile] = formattedLineElem('@') ^^ flatten4(Special * "code")
@@ -652,11 +638,13 @@ object TextileParser {
lazy val sub : Parser[Textile] = formattedLineElem('~') ^^ flatten4(Special * "sub")
- lazy val italic : Parser[Textile] = formattedLineElem(accept("__")) ^^ flatten4(Special * "i")
+ lazy val italic : Parser[Textile] = formattedLineElem(accept("__"), le = lineElem_inItalic) ^^ flatten4(Special * "i")
lazy val emph : Parser[Textile] = formattedLineElem('_') ^^ flatten4(Emph)
- lazy val quote : Parser[Textile] = formattedLineElem('"', success(Nil)) ^^ flatten4((f, x, y, lst) => Quoted(f, x, lst))
+ lazy val emphNotItalic : Parser[Textile] = formattedLineElem('_', restrictSuffix = '_') ^^ flatten4(Emph)
+
+ lazy val quote : Parser[Textile] = formattedLineElem('"', success(Nil), ':') ^^ flatten4((f, x, y, lst) => Quoted(f, x, lst))
def reduceCharBlocks(in : List[Textile]) : List[Textile] =
{
@@ -870,7 +858,21 @@ object TextileParser {
// drop the last EOL to prevent needless <br />
// TODO: find a better solution.. it's not quite clear to me where newlines are meaningful
- private def flattenAndDropLastEOL(elems : List[Textile]) = ((elems match {case Nil => Nil case x => (x.last match { case EOL => elems.init case _ => elems})})).flatMap{e => e.toHtml.toList}
+ private def flattenAndDropLastEOL(elems : List[Textile]) = normalize(
+ (elems match {
+ case _ :: _ if elems.last == EOL => elems.init
+ case _ => elems
+ }).flatMap(_.toHtml)
+ ).toList
+
+ // reduce adjacent text elements into one
+ def normalize(nodes: Seq[Node]): Seq[Node] = nodes match {
+ case s @ Seq() => s
+ case Seq(head: Atom[String], tail @ _*) =>
+ val (text, rest) = tail span { _.isInstanceOf[Atom[String]] }
+ Text(text.foldLeft(head.text) { _ + _.text}) +: normalize(rest)
+ case Seq(head, tail @ _*) => head +: tail
+ }
case class Paragraph(elems : List[Textile], attrs: List[Attribute]) extends ATextile(elems, attrs) {
override def toHtml : NodeSeq = {
View
22 textile/src/test/scala/net/liftweb/textile/TextileSpec.scala
@@ -79,6 +79,18 @@ object TextileSpec extends Specification {
toHtml("**Hello World**") must ==/(<p><b>Hello World</b></p>)
}
+ "End line elements with punctuation" in {
+ toHtml("And then? She *fell*!") must ==/(<p>And then? She <strong>fell</strong>!</p>)
+ }
+
+ "Start line elements with punctuation" in {
+ toHtml("see:_Hello word_") must ==/(<p>see:<em>Hello word</em></p>)
+ }
+
+ "**Bold * Not Strong**" in {
+ toHtml("**Bold * Not Strong**") must ==/(<p><b>Bold * Not Strong</b></p>)
+ }
+
"Make other things bold" in {
toHtml("Dude this is **Hello World** kind of stuff") must ==/(<p>Dude this is <b>Hello World</b> kind of stuff</p>)
}
@@ -92,17 +104,17 @@ object TextileSpec extends Specification {
}
"Observe -- very nice!" in {
- toHtml("Observe -- very nice!") must ==/(<p>Observe &#8212; very nice!</p>)
+ toHtml("Observe -- very nice!") must ==/(<p>Observe \u2014 very nice!</p>)
}
"Observe - tiny and brief." in {
- toHtml("Observe - tiny and brief.") must ==/(<p>Observe &#8211; tiny and brief.</p>)
+ toHtml("Observe - tiny and brief.") must ==/(<p>Observe \u2013 tiny and brief.</p>)
}
"\"Observe!\"" in {
val ret = toHtml("\"Observe!\"")
- ret must ==/(<p>&#8220;Observe!&#8221;</p>)
+ ret must ==/(<p>\u201cObserve!\u201d</p>)
}
"A simple example." in {
@@ -138,6 +150,10 @@ A regular example.
toHtml("a link http://yahoo.com inside") must ==/(<p>a link <a href="http://yahoo.com">http://yahoo.com</a> inside</p>)
}
+ "a quoted link" in {
+ toHtml("""I searched "Google":http://google.com.""") must ==/(<p>I searched <a href="http://google.com.">Google</a></p>)
+ }
+
"deal with a very long line of text" in {
val sb = new StringBuilder()
(1 to 10000).foreach(i => sb.append(i.toString+" "))
Please sign in to comment.
Something went wrong with that request. Please try again.