Skip to content
This repository
Browse code

Fix that HTML::Node.parse would blow up on unclosed CDATA sections.

If an unclosed CDATA section is encountered and parsing is strict, an
exception will be raised. Otherwise, we consider the remainder of the line to
be the section contents. This is consistent with HTML::Tokenizer#scan_tag.

Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
  • Loading branch information...
commit 1a1822726189f3cfff305dea62e5cfbdbc0da577 1 parent 838cb1a
Jeffrey Hardy authored October 22, 2008 jeremy committed October 23, 2008
9  actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
@@ -150,7 +150,14 @@ def parse(parent, line, pos, content, strict=true)
150 150
           end
151 151
 
152 152
           if scanner.skip(/!\[CDATA\[/)
153  
-            scanner.scan_until(/\]\]>/)
  153
+            unless scanner.skip_until(/\]\]>/)
  154
+              if strict
  155
+                raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
  156
+              else
  157
+                scanner.skip_until(/\Z/)
  158
+              end
  159
+            end
  160
+
154 161
             return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
155 162
           end
156 163
           
21  actionpack/test/controller/html-scanner/node_test.rb
@@ -65,4 +65,25 @@ def test_parse_with_unclosed_tag
65 65
     assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
66 66
     assert node.attributes.has_key?("onmouseover")
67 67
   end
  68
+
  69
+  def test_parse_with_valid_cdata_section
  70
+    s = "<![CDATA[<span>contents</span>]]>"
  71
+    node = nil
  72
+    assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
  73
+    assert_kind_of HTML::CDATA, node
  74
+    assert_equal '<span>contents</span>', node.content
  75
+  end
  76
+
  77
+  def test_parse_strict_with_unterminated_cdata_section
  78
+    s = "<![CDATA[neverending..."
  79
+    assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) }
  80
+  end
  81
+
  82
+  def test_parse_relaxed_with_unterminated_cdata_section
  83
+    s = "<![CDATA[neverending..."
  84
+    node = nil
  85
+    assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
  86
+    assert_kind_of HTML::CDATA, node
  87
+    assert_equal 'neverending...', node.content
  88
+  end
68 89
 end
10  actionpack/test/controller/html-scanner/sanitizer_test.rb
@@ -17,6 +17,8 @@ def test_strip_tags
17 17
     %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize(
18 18
     %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}))
19 19
     assert_equal "This has a  here.", sanitizer.sanitize("This has a <!-- comment --> here.")
  20
+    assert_equal "This has a  here.", sanitizer.sanitize("This has a <![CDATA[<section>]]> here.")
  21
+    assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed <![CDATA[<section>]] here...")
20 22
     [nil, '', '   '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) }
21 23
   end
22 24
 
@@ -243,6 +245,14 @@ def test_should_sanitize_img_vbscript
243 245
     assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
244 246
   end
245 247
 
  248
+  def test_should_sanitize_cdata_section
  249
+    assert_sanitized "<![CDATA[<span>section</span>]]>", "&lt;![CDATA[&lt;span>section&lt;/span>]]>"
  250
+  end
  251
+
  252
+  def test_should_sanitize_unterminated_cdata_section
  253
+    assert_sanitized "<![CDATA[<span>neverending...", "&lt;![CDATA[&lt;span>neverending...]]>"
  254
+  end
  255
+
246 256
 protected
247 257
   def assert_sanitized(input, expected = nil)
248 258
     @sanitizer ||= HTML::WhiteListSanitizer.new

0 notes on commit 1a18227

Please sign in to comment.
Something went wrong with that request. Please try again.