Skip to content

Commit

Permalink
feat: Remove useless filtered element content by default
Browse files Browse the repository at this point in the history
  • Loading branch information
rgrove committed Oct 14, 2018
1 parent 54dcf57 commit faf9a0f
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 30 deletions.
4 changes: 3 additions & 1 deletion lib/sanitize/config/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ module Config
# If this is an Array or Set of element names, then only the contents of
# the specified elements (when filtered) will be removed, and the contents
# of all other filtered elements will be left behind.
:remove_contents => false,
:remove_contents => %w[
iframe noembed noframes noscript script style
],

# Transformers allow you to filter or alter nodes using custom logic. See
# README.md for details and examples.
Expand Down
6 changes: 4 additions & 2 deletions lib/sanitize/transformers/clean_element.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ def call(env)
end
end

unless @remove_all_contents || @remove_element_contents.include?(name)
node.add_previous_sibling(node.children)
unless node.children.empty?
unless @remove_all_contents || @remove_element_contents.include?(name)
node.add_previous_sibling(node.children)
end
end

node.unlink
Expand Down
6 changes: 1 addition & 5 deletions test/test_clean_comment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# Special case: the comment markup is inside a <script>, which makes it
# text content and not an actual HTML comment.
@s.fragment("<script><!-- comment --></script>").must_equal '&lt;!-- comment --&gt;'
@s.fragment("<script><!-- comment --></script>").must_equal ''

Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
.must_equal '<script><!-- comment --></script>'
Expand All @@ -40,10 +40,6 @@
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --&gt; --&gt;bar'
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>&gt;bar</div>'

# Special case: the comment markup is inside a <script>, which makes it
# text content and not an actual HTML comment.
@s.fragment("<script><!-- comment --></script>").must_equal '&lt;!-- comment --&gt;'

Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
.must_equal '<script><!-- comment --></script>'
end
Expand Down
64 changes: 50 additions & 14 deletions test/test_clean_element.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,22 @@
strings = {
:basic => {
:html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',

:default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");',
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet .foo { color: #fff; } alert("hello world");',
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet .foo { color: #fff; } alert("hello world");',
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
:default => 'Lorem ipsum dolor sit amet ',
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ',
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
},

:malformed => {
:html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',

:default => 'Lorem dolor sit amet alert("hello world");',
:restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
:default => 'Lorem dolor sit amet ',
:restricted => 'Lorem <strong>dolor</strong> sit amet ',
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
},

:unclosed => {
:html => '<p>a</p><blockquote>b',

:default => ' a b ',
:restricted => ' a b ',
:basic => '<p>a</p><blockquote>b</blockquote>',
Expand All @@ -35,7 +32,6 @@

:malicious => {
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',

:default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
Expand Down Expand Up @@ -171,10 +167,10 @@
.must_equal 'foo bar baz quux'

Sanitize.fragment('<script>alert("<xss>");</script>')
.must_equal 'alert("&lt;xss&gt;");'
.must_equal ''

Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
.must_equal '&lt;script&gt;alert("&lt;xss&gt;");&lt;/&lt;script&gt;&gt;'
.must_equal '&lt;'

Sanitize.fragment('< script <>> alert("<xss>");</script>')
.must_equal '&lt; script &lt;&gt;&gt; alert("");'
Expand All @@ -196,6 +192,46 @@
.must_equal ''
end

it 'should escape the content of removed `plaintext` elements' do
Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
.must_equal 'hello! &lt;script&gt;alert(0)&lt;/script&gt;'
end

it 'should escape the content of removed `xmp` elements' do
Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
.must_equal 'hello! &lt;script&gt;alert(0)&lt;/script&gt;'
end

it 'should not preserve the content of removed `iframe` elements' do
Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
.must_equal ''
end

it 'should not preserve the content of removed `noembed` elements' do
Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
.must_equal ''
end

it 'should not preserve the content of removed `noframes` elements' do
Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
.must_equal ''
end

it 'should not preserve the content of removed `noscript` elements' do
Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
.must_equal ''
end

it 'should not preserve the content of removed `script` elements' do
Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
.must_equal ''
end

it 'should not preserve the content of removed `style` elements' do
Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
.must_equal ''
end

strings.each do |name, data|
it "should clean #{name} HTML" do
Sanitize.fragment(data[:html]).must_equal(data[:default])
Expand Down
6 changes: 3 additions & 3 deletions test/test_malicious_html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@

it 'should not be possible to inject <script> via a malformed <img> tag' do
@s.fragment('<img """><script>alert("XSS")</script>">').
must_equal '<img>alert("XSS")"&gt;'
must_equal '<img>"&gt;'
end

it 'should not be possible to inject protocol-based JS' do
Expand Down Expand Up @@ -117,12 +117,12 @@
describe '<script>' do
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
must_equal 'alert(1)'
must_equal ''
end

it 'should not be possible to inject <script> via extraneous open brackets' do
@s.fragment(%[<<script>alert("XSS");//<</script>]).
must_equal '&lt;alert("XSS");//&lt;'
must_equal '&lt;'
end
end

Expand Down
4 changes: 2 additions & 2 deletions test/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
end

it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
Sanitize.fragment('foo <script>bar').must_equal 'foo bar'
Sanitize.fragment('foo <style>bar').must_equal 'foo bar'
Sanitize.fragment('foo <script>bar').must_equal 'foo '
Sanitize.fragment('foo <style>bar').must_equal 'foo '
end

it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
Expand Down
6 changes: 3 additions & 3 deletions test/test_sanitize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

it 'should sanitize an HTML document' do
@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
.must_equal "<html>Lorem ipsum dolor sit amet alert(\"hello world\");</html>"
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
end

it 'should not modify the input string' do
Expand All @@ -42,7 +42,7 @@
describe '#fragment' do
it 'should sanitize an HTML fragment' do
@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
.must_equal 'Lorem ipsum dolor sit amet '
end

it 'should not modify the input string' do
Expand Down Expand Up @@ -71,7 +71,7 @@
doc.xpath('/html/body/node()').each {|node| frag << node }

@s.node!(frag)
frag.to_html.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
frag.to_html.must_equal 'Lorem ipsum dolor sit amet '
end

describe "when the given node is a document and <html> isn't whitelisted" do
Expand Down

0 comments on commit faf9a0f

Please sign in to comment.