Skip to content

Commit

Permalink
HtmlParser: Parse DOCTYPE case-insensitively
Browse files Browse the repository at this point in the history
  • Loading branch information
tindzk committed Sep 1, 2019
1 parent 29f4888 commit 41d9dae
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/main/scala-js/pine/HtmlParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ object HtmlParser {
if (!html.startsWith("<")) throw new ParseError("Does not start with tag")
val reader = new Reader(html)
val node =
if (reader.prefix("<!DOCTYPE"))
if (reader.prefixIgnoreCase("<!DOCTYPE"))
DomParser.parse(html, "text/html").documentElement
else {
// Skip leading comments
Expand Down
18 changes: 14 additions & 4 deletions src/main/scala/pine/Reader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ private[pine] class Reader(data: String) {
result
}

/** Return next `n` characters (or less) without advancing pointer */
def take(n: Int): String = data.slice(offset, offset + n)

/** Returns true if `value` matches */
def lookahead(value: Char): Boolean = data(offset) == value

Expand All @@ -21,16 +24,23 @@ private[pine] class Reader(data: String) {

/** Returns true if `value` matches and places pointer afterwards */
def prefix(value: Char): Boolean =
if (data(offset) != value) false
else {
data(offset) == value && {
offset += 1
true
}

/** Returns true if `value` matches and places pointer afterwards */
def prefix(value: String): Boolean =
if (!rest().startsWith(value)) false
else {
rest().startsWith(value) && {
offset += value.length
true
}

/** Returns true if `value` matches regardless of case and places pointer
* afterwards
*/
def prefixIgnoreCase(value: String): Boolean =
take(value.length).equalsIgnoreCase(value) && {
offset += value.length
true
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/pine/internal/HtmlParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ object HtmlParser {
}

def skipDocType(reader: Reader): Unit =
if (reader.prefix("<!DOCTYPE"))
if (reader.prefixIgnoreCase("<!DOCTYPE"))
reader.collect('>').orElse(expected(reader, ">"))

def skipXml(reader: Reader): Unit =
Expand Down
8 changes: 8 additions & 0 deletions src/test/scala/pine/HtmlParserSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ class HtmlParserSpec extends FunSuite {
assert(node.toHtml == html)
}

test("Parse DOCTYPE (2)") {
// Parser must match DOCTYPE case-insensitively
// See https://html.spec.whatwg.org/multipage/syntax.html#the-doctype
val html = """<!doctype html><html><head lang="en"></head><body><span>42</span></body></html>"""
val node = HtmlParser.fromString(html)
assert(node.toHtml == html.replace("<!doctype", "<!DOCTYPE"))
}

test("Do not handle CDATA") {
val html = """<script>// <![CDATA[
var x = 42;
Expand Down

0 comments on commit 41d9dae

Please sign in to comment.