Permalink
Browse files

trying to fix a bug in script content stripping

  • Loading branch information...
originell committed Mar 4, 2012
1 parent f8d95b1 commit a2780e4b60560a8cb1ed67adfa3260256c6d652c
Showing with 46 additions and 3 deletions.
  1. +19 −1 bleach/sanitizer.py
  2. +27 −2 bleach/tests/test_security.py
View
@@ -12,6 +12,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
allowed_svg_properties = []
skip_token = False
+ previous_token = {}
def sanitize_token(self, token):
"""Sanitize a token either by HTML-encoding or dropping.
@@ -30,6 +31,7 @@ def sanitize_token(self, token):
isinstance(self.allowed_attributes, dict)):
self.wildcard_attributes = self.allowed_attributes.get('*', [])
+
if token['type'] in (tokenTypes['StartTag'], tokenTypes['EndTag'],
tokenTypes['EmptyTag']):
if token['name'] in self.allowed_elements:
@@ -71,14 +73,27 @@ def sanitize_token(self, token):
attrs['style'] = self.sanitize_css(attrs['style'])
token['data'] = [(name, val) for name, val in
attrs.items()]
+ self.previous_token = token
return token
elif self.strip_scripts and 'script' in token['name']:
- self.skip_token = True
+ if self.skip_token and not (
+ 'data' in self.previous_token and
+ isinstance(self.previous_token['data'],
+ basestring) and
+ ('"' in self.previous_token['data'] or
+ "'" in self.previous_token['data'])
+ ):
+ self.skip_token = False
+ else:
+ self.skip_token = True
+ self.previous_token = token
pass
elif self.strip_disallowed_elements:
+ self.previous_token = token
pass
else:
self.skip_token = False
+ self.previous_token = token
if token['type'] == tokenTypes['EndTag']:
token['data'] = '</%s>' % token['name']
elif token['data']:
@@ -93,13 +108,16 @@ def sanitize_token(self, token):
del token["name"]
return token
elif self.skip_token:
+ self.previous_token = token
pass
elif token['type'] == tokenTypes['Comment']:
self.skip_token = False
if not self.strip_html_comments:
+ self.previous_token = token
return token
else:
self.skip_token = False
+ self.previous_token = token
return token
def sanitize_css(self, style):
@@ -98,10 +98,10 @@ def test_strip_script_contents():
'<scr<script>function know_how(to) { alert("Write JavaScript"); }'
'<script></script></scr>'
'</p>', '&lt;p&gt;Hello &lt;/scr&gt;&lt;/p&gt;'),
- ('<p>Hello '
+ ('<p>My dear '
'<scr<script>function know_how(to) { alert("<script>"); }'
'<script></script></scr>'
- '</p>', '&lt;p&gt;Hello &lt;/scr&gt;&lt;/p&gt;')
+ '</p>', '&lt;p&gt;My dear &lt;/scr&gt;&lt;/p&gt;')
)
def check(teststr, expected_output):
@@ -111,6 +111,31 @@ def check(teststr, expected_output):
yield check, test, output
+def test_strip_with_strip_script_contents():
+ """Test the combination of strip=True with strip_script_contents=True."""
+ tests = (
+ ('<p>Ouh yeah '
+ '<script>function know_how(to) { alert("Write JS"); }'
+ '</script>'
+ '<a href="example.com/">This is a test link.</a>'
+ '<span> with a test span and <div>div</div></span>.'
+ '</p>', 'Hello This is a test link. with a test span and div.'),
+ ('<p>Good bye. '
+ '<script>function die(gotohell) { <script>alert("With you bad <script> javascript skills."); }'
+ '</script>'
+ '<a href="example.com/">This is a test link.</a>'
+ '<span> with a test span and <div>div</div></span>.'
+ '</p>', 'Good bye. This is a test link. with a test span and div.'),
+ )
+
+ def check(teststr, expected_output):
+ eq_(expected_output, clean(teststr, tags=[], strip=True,
+ strip_script_content=True))
+
+ for test, output in tests:
+ yield check, test, output
+
+
def test_nasty():
"""Nested, broken up, multiple tags, are still foiled!"""
test = ('<scr<script></script>ipt type="text/javascript">alert("foo");</'

0 comments on commit a2780e4

Please sign in to comment.