33import warnings
44
55from bleach ._vendor .parse import urlparse
6- import tinycss2
76from xml .sax .saxutils import unescape
87
98from bleach import html5lib_shim
3332 "acronym" : ["title" ],
3433}
3534
36- #: List of allowed styles
37- ALLOWED_STYLES = []
38-
3935#: List of allowed protocols
4036ALLOWED_PROTOCOLS = ["http" , "https" , "mailto" ]
4137
@@ -85,11 +81,11 @@ def __init__(
8581 self ,
8682 tags = ALLOWED_TAGS ,
8783 attributes = ALLOWED_ATTRIBUTES ,
88- styles = ALLOWED_STYLES ,
8984 protocols = ALLOWED_PROTOCOLS ,
9085 strip = False ,
9186 strip_comments = True ,
9287 filters = None ,
88+ css_sanitizer = None ,
9389 ):
9490 """Initializes a Cleaner
9591
@@ -99,9 +95,6 @@ def __init__(
9995 :arg dict attributes: allowed attributes; can be a callable, list or dict;
10096 defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
10197
102- :arg list styles: allowed list of css styles; defaults to
103- ``bleach.sanitizer.ALLOWED_STYLES``
104-
10598 :arg list protocols: allowed list of protocols for links; defaults
10699 to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
107100
@@ -118,14 +111,17 @@ def __init__(
118111 Using filters changes the output of ``bleach.Cleaner.clean``.
119112 Make sure the way the filters change the output are secure.
120113
114+ :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
115+ sanitizing style attribute values and style text; defaults to None
116+
121117 """
122118 self .tags = tags
123119 self .attributes = attributes
124- self .styles = styles
125120 self .protocols = protocols
126121 self .strip = strip
127122 self .strip_comments = strip_comments
128123 self .filters = filters or []
124+ self .css_sanitizer = css_sanitizer
129125
130126 self .parser = html5lib_shim .BleachHTMLParser (
131127 tags = self .tags ,
@@ -175,11 +171,10 @@ def clean(self, text):
175171 attributes = self .attributes ,
176172 strip_disallowed_elements = self .strip ,
177173 strip_html_comments = self .strip_comments ,
174+ css_sanitizer = self .css_sanitizer ,
178175 # html5lib-sanitizer things
179176 allowed_elements = self .tags ,
180- allowed_css_properties = self .styles ,
181177 allowed_protocols = self .protocols ,
182- allowed_svg_properties = [],
183178 )
184179
185180 # Apply any filters after the BleachSanitizerFilter
@@ -242,36 +237,40 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
242237 def __init__ (
243238 self ,
244239 source ,
240+ allowed_elements = ALLOWED_TAGS ,
245241 attributes = ALLOWED_ATTRIBUTES ,
242+ allowed_protocols = ALLOWED_PROTOCOLS ,
246243 strip_disallowed_elements = False ,
247244 strip_html_comments = True ,
245+ css_sanitizer = None ,
248246 ** kwargs ,
249247 ):
250248 """Creates a BleachSanitizerFilter instance
251249
252250 :arg Treewalker source: stream
253251
254- :arg list tags : allowed list of tags; defaults to
252+ :arg list allowed_elements : allowed list of tags; defaults to
255253 ``bleach.sanitizer.ALLOWED_TAGS``
256254
257255 :arg dict attributes: allowed attributes; can be a callable, list or dict;
258256 defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
259257
260- :arg list styles: allowed list of css styles; defaults to
261- ``bleach.sanitizer.ALLOWED_STYLES``
262-
263- :arg list protocols: allowed list of protocols for links; defaults
258+ :arg list allowed_protocols: allowed list of protocols for links; defaults
264259 to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
265260
266261 :arg bool strip_disallowed_elements: whether or not to strip disallowed
267262 elements
268263
269264 :arg bool strip_html_comments: whether or not to strip HTML comments
270265
266+ :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
267+ sanitizing style attribute values and style text; defaults to None
268+
271269 """
272270 self .attr_filter = attribute_filter_factory (attributes )
273271 self .strip_disallowed_elements = strip_disallowed_elements
274272 self .strip_html_comments = strip_html_comments
273+ self .css_sanitizer = css_sanitizer
275274
276275 # filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
277276 warnings .filterwarnings (
@@ -280,7 +279,12 @@ def __init__(
280279 category = DeprecationWarning ,
281280 module = "bleach._vendor.html5lib" ,
282281 )
283- return super ().__init__ (source , ** kwargs )
282+ return super ().__init__ (
283+ source ,
284+ allowed_elements = allowed_elements ,
285+ allowed_protocols = allowed_protocols ,
286+ ** kwargs ,
287+ )
284288
285289 def sanitize_stream (self , token_iterator ):
286290 for token in token_iterator :
@@ -542,7 +546,16 @@ def allow_token(self, token):
542546
543547 # If it's a style attribute, sanitize it
544548 if namespaced_name == (None , "style" ):
545- val = self .sanitize_css (val )
549+ if self .css_sanitizer :
550+ val = self .css_sanitizer .sanitize_css (val )
551+ else :
552+ # FIXME(willkg): if style is allowed, but no
553+ # css_sanitizer was set up, then this is probably a
554+ # mistake and we should raise an error here
555+ #
556+ # For now, we're going to set the value to "" because
557+ # there was no sanitizer set
558+ val = ""
546559
547560 # At this point, we want to keep the attribute, so add it in
548561 attrs [namespaced_name ] = val
@@ -594,37 +607,3 @@ def disallowed_token(self, token):
594607
595608 del token ["name" ]
596609 return token
597-
598- def sanitize_css (self , style ):
599- """Sanitizes css in style tags"""
600- parsed = tinycss2 .parse_declaration_list (style )
601-
602- if not parsed :
603- return ""
604-
605- # decl.name.lower() in self.allowed_css_properties
606- # or decl.name.lower() in self.allowed_svg_properties
607-
608- new_tokens = []
609- for token in parsed :
610- if token .type == "at-rule" :
611- print ("omg" )
612- elif token .type == "declaration" :
613- if (
614- token .lower_name in self .allowed_css_properties
615- or token .lower_name in self .allowed_svg_properties
616- ):
617- new_tokens .append (token )
618- elif token .type in ("comment" , "whitespace" ):
619- if new_tokens and new_tokens [- 1 ].type != token .type :
620- new_tokens .append (token )
621- # Declaration
622- # AtRule
623- # Comment
624- # WhitespaceToken
625- # ParseError
626-
627- if not new_tokens :
628- return ""
629-
630- return tinycss2 .serialize (new_tokens ).strip ()
0 commit comments