Permalink
Browse files

Specify an html tokenizer when calling linkify.

This will only work with bleach v1.1.4 and up, and prevents unknown tags from
being unfairly escaped during the linkify step (which causes problems).
  • Loading branch information...
mythmon committed Jul 16, 2012
1 parent 3d0e4b2 commit 64daa0999004c738e4788047b363945355e2dca5
Showing with 4 additions and 2 deletions.
  1. +1 −1 setup.py
  2. +3 −1 wikimarkup/parser.py
View
@@ -13,6 +13,6 @@
url='http://www.github.com/dcramer/py-wikimarkup/',
zip_safe=False,
include_package_data=True,
- install_requires=['bleach'],
+ install_requires=['bleach>=1.1.4'],
package_data = { '': ['README.rst'] },
)
View
@@ -23,6 +23,8 @@
import bleach
+from html5lib.tokenizer import HTMLTokenizer
+
# a few patterns we use later
MW_COLON_STATE_TEXT = 0
@@ -1711,7 +1713,7 @@ def parse(self, text, show_toc=True, tags=ALLOWED_TAGS,
if utf8:
text.encode("utf-8")
# Pass output through bleach and linkify
- text = bleach.linkify(text, nofollow=nofollow)
+ text = bleach.linkify(text, nofollow=nofollow, tokenizer=HTMLTokenizer)
return bleach.clean(text, tags=self.tags, attributes=attributes,
styles=styles, strip_comments=False)

0 comments on commit 64daa09

Please sign in to comment.