Skip to content

Commit

Permalink
Merge pull request #180 from chripede/patch-2
Browse files Browse the repository at this point in the history
Add inline_style option
  • Loading branch information
scoder committed Jul 24, 2016
2 parents b3ffdd9 + c355493 commit 0a43d0b
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/lxml/html/clean.py
Expand Up @@ -112,7 +112,10 @@ class Cleaner(object):
Removes any comments.
``style``:
Removes any style tags or attributes.
Removes any style tags.
``inline_style``
Removes any style attributes.
``links``:
Removes any ``<link>`` tags
Expand Down Expand Up @@ -191,6 +194,7 @@ class Cleaner(object):
javascript = True
comments = True
style = False
inline_style = False
links = True
meta = True
page_structure = True
Expand Down Expand Up @@ -314,6 +318,7 @@ def __call__(self, doc):
kill_tags.add(etree.ProcessingInstruction)
if self.style:
kill_tags.add('style')
if self.inline_style:
etree.strip_attributes(doc, 'style')
if self.links:
kill_tags.add('link')
Expand Down
23 changes: 21 additions & 2 deletions src/lxml/html/tests/test_clean.txt
Expand Up @@ -122,7 +122,7 @@
</body>
</html>

>>> print(Cleaner(style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
>>> print(Cleaner(style=True, inline_style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
</head>
Expand All @@ -142,6 +142,26 @@
</body>
</html>

>>> print(Cleaner(style=True, inline_style=False, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
</head>
<body>
<a href="">a link</a>
<a href="">a control char link</a>
<a href="">data</a>
<a href="#">another link</a>
<p>a paragraph</p>
<div style="display: none">secret EVIL!</div>
of EVIL!
Password:
<a href="evil-site" rel="nofollow">spam spam SPAM!</a>
<a href="http://example.com" rel="author nofollow">Author</a>
<a href="http://example.com" rel="nofollow">Text</a>
<img src="evil!">
</body>
</html>

>>> print(Cleaner(links=False, page_structure=False, javascript=True, host_whitelist=['example.com'], whitelist_tags=None).clean_html(doc))
<html>
<head>
Expand All @@ -165,4 +185,3 @@
<img src="evil!">
</body>
</html>

0 comments on commit 0a43d0b

Please sign in to comment.