Skip to content

Commit

Permalink
Fix exception in XhtmlParser with HTML5 docype
Browse files Browse the repository at this point in the history
Make characters, including whitespace, after the `html' identifier in
<!DOCTYPE html> be optional.  Otherwise, the following errors are
emitted:

    file.xml:2:15: whitespace expected<html xmlns="http://www.w3.org/1999/xhtml">              ^
    file.xml:2:15: document must contain exactly one element

And a java.lang.NullPointerException is thrown.

Unfortunately, testing with Source.fromString doesn't reproduce the
NullPointerException that occurs with Source.fromFile.
  • Loading branch information
ashawley committed Sep 25, 2018
1 parent 33538b1 commit 61a2691
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 1 deletion.
52 changes: 52 additions & 0 deletions jvm/src/test/scala/scala/xml/parsing/ConstructingParserTest.scala
Expand Up @@ -4,6 +4,7 @@ package parsing
import scala.io.Source
import org.junit.Test
import scala.xml.JUnitAssertsForXML.{ assertEquals => assertXml }
import org.junit.Assert.assertEquals

class ConstructingParserTest {

Expand All @@ -19,4 +20,55 @@ class ConstructingParserTest {

}

/* Example of using SYSTEM in DOCTYPE */
@Test
def docbookTest = {
val xml =
"""|<!DOCTYPE docbook SYSTEM 'docbook.dtd'>
|<book>
| <title>Book</title>
| <chapter>
| <title>Chapter</title>
| <para>Text</para>
| </chapter>
|</book>""".stripMargin

val expected = <book>
<title>Book</title>
<chapter>
<title>Chapter</title>
<para>Text</para>
</chapter>
</book>

val source = new Source {
val iter = xml.iterator
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) = {}
}

val doc = ConstructingParser.fromSource(source, true).document

assertEquals(expected, doc.theSeq)
}

/* Unsupported use of lowercase DOCTYPE and SYSTEM */
@Test(expected = classOf[scala.xml.parsing.FatalError])
def docbookFail: Unit = {
val xml =
"""|<!doctype docbook system 'docbook.dtd'>
|<book>
|<title>Book</title>
|<chapter>
|<title>Chapter</title>
|<para>Text</para>
|</chapter>
|</book>""".stripMargin

val source = new Source {
val iter = xml.iterator
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) = {}
}

ConstructingParser.fromSource(source, true).content(TopScope)
}
}
61 changes: 61 additions & 0 deletions jvm/src/test/scala/scala/xml/parsing/XhtmlParserTest.scala
@@ -0,0 +1,61 @@
package scala.xml
package parsing

import scala.io.Source

import org.junit.Test
import org.junit.Assert.assertEquals

class XhtmlParserTest {

@Test
def issue259: Unit = {
val xml =
"""|<!DOCTYPE html>
|<html xmlns="http://www.w3.org/1999/xhtml">
| <head>
| <meta charset="utf-8"/>
| </head>
| <body>
| <p>Text</p>
| </body>
|</html>""".stripMargin

val expected = <html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
</head>
<body>
<p>Text</p>
</body>
</html>

assertEquals(expected, XhtmlParser(Source.fromString(xml)).theSeq)
}

@Test
def html4Strict: Unit = {
val xml =
"""|<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
| "http://www.w3.org/TR/html4/strict.dtd">
|<html>
| <head>
| <title>Title</title>
| </head>
| <body>
| <p>Text</p>
| </body>
|</html>""".stripMargin

val expected = <html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Title</title>
</head>
<body>
<p>Text</p>
</body>
</html>

assertEquals(expected, XhtmlParser(Source.fromString(xml)).theSeq)
}
}
2 changes: 1 addition & 1 deletion shared/src/main/scala/scala/xml/parsing/MarkupParser.scala
Expand Up @@ -518,7 +518,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
xToken("DOCTYPE")
xSpace()
val n = xName
xSpace()
xSpaceOpt()
//external ID
if ('S' == ch || 'P' == ch) {
extID = externalID()
Expand Down

0 comments on commit 61a2691

Please sign in to comment.