Skip to content

Commit cb6dca7

Browse files
committed
Make css sanitization non-default, fix docs, fix tests (#633)
In order to use css sanitization, you have to install the css extras which installs tinycss2. Additionally, I reworked css sanitization to be encapsulated in a class making it easier for developers to provide their own if they want to. I changed the ALLOWED_CSS_PROPERTIES (previously called styles) to match what html5lib has. I updated the tests and documentation accordingly.
1 parent efc224d commit cb6dca7

File tree

11 files changed

+271
-173
lines changed

11 files changed

+271
-173
lines changed

bleach/__init__.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from bleach.sanitizer import (
66
ALLOWED_ATTRIBUTES,
77
ALLOWED_PROTOCOLS,
8-
ALLOWED_STYLES,
98
ALLOWED_TAGS,
109
Cleaner,
1110
)
@@ -24,10 +23,10 @@ def clean(
2423
text,
2524
tags=ALLOWED_TAGS,
2625
attributes=ALLOWED_ATTRIBUTES,
27-
styles=ALLOWED_STYLES,
2826
protocols=ALLOWED_PROTOCOLS,
2927
strip=False,
3028
strip_comments=True,
29+
css_sanitizer=None,
3130
):
3231
"""Clean an HTML fragment of malicious content and return it
3332
@@ -59,26 +58,26 @@ def clean(
5958
:arg dict attributes: allowed attributes; can be a callable, list or dict;
6059
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
6160
62-
:arg list styles: allowed list of css styles; defaults to
63-
``bleach.sanitizer.ALLOWED_STYLES``
64-
6561
:arg list protocols: allowed list of protocols for links; defaults
6662
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
6763
6864
:arg bool strip: whether or not to strip disallowed elements
6965
7066
:arg bool strip_comments: whether or not to strip HTML comments
7167
68+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
69+
sanitizing style attribute values and style text; defaults to None
70+
7271
:returns: cleaned text as unicode
7372
7473
"""
7574
cleaner = Cleaner(
7675
tags=tags,
7776
attributes=attributes,
78-
styles=styles,
7977
protocols=protocols,
8078
strip=strip,
8179
strip_comments=strip_comments,
80+
css_sanitizer=css_sanitizer,
8281
)
8382
return cleaner.clean(text)
8483

bleach/css_sanitizer.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import tinycss2
2+
3+
4+
ALLOWED_CSS_PROPERTIES = frozenset(
5+
(
6+
"azimuth",
7+
"background-color",
8+
"border-bottom-color",
9+
"border-collapse",
10+
"border-color",
11+
"border-left-color",
12+
"border-right-color",
13+
"border-top-color",
14+
"clear",
15+
"color",
16+
"cursor",
17+
"direction",
18+
"display",
19+
"elevation",
20+
"float",
21+
"font",
22+
"font-family",
23+
"font-size",
24+
"font-style",
25+
"font-variant",
26+
"font-weight",
27+
"height",
28+
"letter-spacing",
29+
"line-height",
30+
"overflow",
31+
"pause",
32+
"pause-after",
33+
"pause-before",
34+
"pitch",
35+
"pitch-range",
36+
"richness",
37+
"speak",
38+
"speak-header",
39+
"speak-numeral",
40+
"speak-punctuation",
41+
"speech-rate",
42+
"stress",
43+
"text-align",
44+
"text-decoration",
45+
"text-indent",
46+
"unicode-bidi",
47+
"vertical-align",
48+
"voice-family",
49+
"volume",
50+
"white-space",
51+
"width",
52+
)
53+
)
54+
55+
56+
ALLOWED_SVG_PROPERTIES = frozenset(
57+
(
58+
"fill",
59+
"fill-opacity",
60+
"fill-rule",
61+
"stroke",
62+
"stroke-width",
63+
"stroke-linecap",
64+
"stroke-linejoin",
65+
"stroke-opacity",
66+
)
67+
)
68+
69+
70+
class CSSSanitizer:
71+
def __init__(
72+
self,
73+
allowed_css_properties=ALLOWED_CSS_PROPERTIES,
74+
allowed_svg_properties=ALLOWED_SVG_PROPERTIES,
75+
):
76+
self.allowed_css_properties = allowed_css_properties
77+
self.allowed_svg_properties = allowed_svg_properties
78+
79+
def sanitize_css(self, style):
80+
"""Sanitizes css in style tags"""
81+
parsed = tinycss2.parse_declaration_list(style)
82+
83+
if not parsed:
84+
return ""
85+
86+
new_tokens = []
87+
for token in parsed:
88+
if token.type == "at-rule":
89+
print("omg")
90+
elif token.type == "declaration":
91+
if (
92+
token.lower_name in self.allowed_css_properties
93+
or token.lower_name in self.allowed_svg_properties
94+
):
95+
new_tokens.append(token)
96+
elif token.type in ("comment", "whitespace"):
97+
if new_tokens and new_tokens[-1].type != token.type:
98+
new_tokens.append(token)
99+
# Declaration
100+
# AtRule
101+
# Comment
102+
# WhitespaceToken
103+
# ParseError
104+
105+
if not new_tokens:
106+
return ""
107+
108+
return tinycss2.serialize(new_tokens).strip()

bleach/html5lib_shim.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
) # noqa: E402 module level import not at top of file
3737
from bleach._vendor.html5lib.filters.sanitizer import (
3838
allowed_protocols,
39+
allowed_css_properties,
40+
allowed_svg_properties,
3941
) # noqa: E402 module level import not at top of file
4042
from bleach._vendor.html5lib.filters.sanitizer import (
4143
Filter as SanitizerFilter,

bleach/sanitizer.py

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import warnings
44

55
from bleach._vendor.parse import urlparse
6-
import tinycss2
76
from xml.sax.saxutils import unescape
87

98
from bleach import html5lib_shim
@@ -33,9 +32,6 @@
3332
"acronym": ["title"],
3433
}
3534

36-
#: List of allowed styles
37-
ALLOWED_STYLES = []
38-
3935
#: List of allowed protocols
4036
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
4137

@@ -85,11 +81,11 @@ def __init__(
8581
self,
8682
tags=ALLOWED_TAGS,
8783
attributes=ALLOWED_ATTRIBUTES,
88-
styles=ALLOWED_STYLES,
8984
protocols=ALLOWED_PROTOCOLS,
9085
strip=False,
9186
strip_comments=True,
9287
filters=None,
88+
css_sanitizer=None,
9389
):
9490
"""Initializes a Cleaner
9591
@@ -99,9 +95,6 @@ def __init__(
9995
:arg dict attributes: allowed attributes; can be a callable, list or dict;
10096
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
10197
102-
:arg list styles: allowed list of css styles; defaults to
103-
``bleach.sanitizer.ALLOWED_STYLES``
104-
10598
:arg list protocols: allowed list of protocols for links; defaults
10699
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
107100
@@ -118,14 +111,17 @@ def __init__(
118111
Using filters changes the output of ``bleach.Cleaner.clean``.
119112
Make sure the way the filters change the output are secure.
120113
114+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
115+
sanitizing style attribute values and style text; defaults to None
116+
121117
"""
122118
self.tags = tags
123119
self.attributes = attributes
124-
self.styles = styles
125120
self.protocols = protocols
126121
self.strip = strip
127122
self.strip_comments = strip_comments
128123
self.filters = filters or []
124+
self.css_sanitizer = css_sanitizer
129125

130126
self.parser = html5lib_shim.BleachHTMLParser(
131127
tags=self.tags,
@@ -175,11 +171,10 @@ def clean(self, text):
175171
attributes=self.attributes,
176172
strip_disallowed_elements=self.strip,
177173
strip_html_comments=self.strip_comments,
174+
css_sanitizer=self.css_sanitizer,
178175
# html5lib-sanitizer things
179176
allowed_elements=self.tags,
180-
allowed_css_properties=self.styles,
181177
allowed_protocols=self.protocols,
182-
allowed_svg_properties=[],
183178
)
184179

185180
# Apply any filters after the BleachSanitizerFilter
@@ -242,36 +237,40 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
242237
def __init__(
243238
self,
244239
source,
240+
allowed_elements=ALLOWED_TAGS,
245241
attributes=ALLOWED_ATTRIBUTES,
242+
allowed_protocols=ALLOWED_PROTOCOLS,
246243
strip_disallowed_elements=False,
247244
strip_html_comments=True,
245+
css_sanitizer=None,
248246
**kwargs,
249247
):
250248
"""Creates a BleachSanitizerFilter instance
251249
252250
:arg Treewalker source: stream
253251
254-
:arg list tags: allowed list of tags; defaults to
252+
:arg list allowed_elements: allowed list of tags; defaults to
255253
``bleach.sanitizer.ALLOWED_TAGS``
256254
257255
:arg dict attributes: allowed attributes; can be a callable, list or dict;
258256
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
259257
260-
:arg list styles: allowed list of css styles; defaults to
261-
``bleach.sanitizer.ALLOWED_STYLES``
262-
263-
:arg list protocols: allowed list of protocols for links; defaults
258+
:arg list allowed_protocols: allowed list of protocols for links; defaults
264259
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
265260
266261
:arg bool strip_disallowed_elements: whether or not to strip disallowed
267262
elements
268263
269264
:arg bool strip_html_comments: whether or not to strip HTML comments
270265
266+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
267+
sanitizing style attribute values and style text; defaults to None
268+
271269
"""
272270
self.attr_filter = attribute_filter_factory(attributes)
273271
self.strip_disallowed_elements = strip_disallowed_elements
274272
self.strip_html_comments = strip_html_comments
273+
self.css_sanitizer = css_sanitizer
275274

276275
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
277276
warnings.filterwarnings(
@@ -280,7 +279,12 @@ def __init__(
280279
category=DeprecationWarning,
281280
module="bleach._vendor.html5lib",
282281
)
283-
return super().__init__(source, **kwargs)
282+
return super().__init__(
283+
source,
284+
allowed_elements=allowed_elements,
285+
allowed_protocols=allowed_protocols,
286+
**kwargs,
287+
)
284288

285289
def sanitize_stream(self, token_iterator):
286290
for token in token_iterator:
@@ -542,7 +546,16 @@ def allow_token(self, token):
542546

543547
# If it's a style attribute, sanitize it
544548
if namespaced_name == (None, "style"):
545-
val = self.sanitize_css(val)
549+
if self.css_sanitizer:
550+
val = self.css_sanitizer.sanitize_css(val)
551+
else:
552+
# FIXME(willkg): if style is allowed, but no
553+
# css_sanitizer was set up, then this is probably a
554+
# mistake and we should raise an error here
555+
#
556+
# For now, we're going to set the value to "" because
557+
# there was no sanitizer set
558+
val = ""
546559

547560
# At this point, we want to keep the attribute, so add it in
548561
attrs[namespaced_name] = val
@@ -594,37 +607,3 @@ def disallowed_token(self, token):
594607

595608
del token["name"]
596609
return token
597-
598-
def sanitize_css(self, style):
599-
"""Sanitizes css in style tags"""
600-
parsed = tinycss2.parse_declaration_list(style)
601-
602-
if not parsed:
603-
return ""
604-
605-
# decl.name.lower() in self.allowed_css_properties
606-
# or decl.name.lower() in self.allowed_svg_properties
607-
608-
new_tokens = []
609-
for token in parsed:
610-
if token.type == "at-rule":
611-
print("omg")
612-
elif token.type == "declaration":
613-
if (
614-
token.lower_name in self.allowed_css_properties
615-
or token.lower_name in self.allowed_svg_properties
616-
):
617-
new_tokens.append(token)
618-
elif token.type in ("comment", "whitespace"):
619-
if new_tokens and new_tokens[-1].type != token.type:
620-
new_tokens.append(token)
621-
# Declaration
622-
# AtRule
623-
# Comment
624-
# WhitespaceToken
625-
# ParseError
626-
627-
if not new_tokens:
628-
return ""
629-
630-
return tinycss2.serialize(new_tokens).strip()

0 commit comments

Comments
 (0)