This repository has been archived by the owner. It is now read-only.
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
586 lines (523 sloc) 16.7 KB
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
# The Original Code is reddit.
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
# All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
# Inc. All Rights Reserved.
"""Parse and validate a safe subset of CSS.
The goal of this validation is not to ensure functionally correct stylesheets
but rather that the stylesheet is safe to show to downstream users. This
* not generating requests to third party hosts (information leak)
* xss via strange syntax in buggy browsers
Beyond that, every effort is made to allow the full gamut of modern CSS.
import itertools
import re
import unicodedata
import tinycss2
from pylons.i18n import N_
from r2.lib.contrib import rcssmin
from r2.lib.utils import tup
__all__ = ["validate_css"]
assert all(prefix == prefix.lower() for prefix in VENDOR_PREFIXES)
# the "filter" property cannot be safely added while IE9 is allowed to
# use subreddit stylesheets. see explanation here:
# "filter",
assert all(property == property.lower() for property in SAFE_PROPERTIES)
assert all(function == function.lower() for function in SAFE_FUNCTIONS)
"IMAGE_NOT_FOUND": N_('no image found with name "%(name)s"'),
"NON_PLACEHOLDER_URL": N_("only uploaded images are allowed; reference "
"them with the %%%%imagename%%%% system"),
"SYNTAX_ERROR": N_("syntax error: %(message)s"),
"UNKNOWN_AT_RULE": N_("@%(keyword)s is not allowed"),
"UNKNOWN_PROPERTY": N_('unknown property "%(name)s"'),
"UNKNOWN_FUNCTION": N_('unknown function "%(function)s"'),
"UNEXPECTED_TOKEN": N_('unexpected token "%(token)s"'),
"BACKSLASH": N_("backslashes are not allowed"),
"CONTROL_CHARACTER": N_("control characters are not allowed"),
"TOO_BIG": N_("the stylesheet is too big. maximum size: %(size)d KiB"),
SUBREDDIT_IMAGE_URL_PLACEHOLDER = re.compile(r"\A%%([a-zA-Z0-9\-]+)%%\Z")
def strip_vendor_prefix(identifier):
for prefix in VENDOR_PREFIXES:
if identifier.startswith(prefix):
return identifier[len(prefix):]
return identifier
class ValidationError(object):
def __init__(self, line_number, error_code, message_params=None):
self.line = line_number
self.error_code = error_code
self.message_params = message_params or {}
# note: _source_lines is added to these objects by the parser
def offending_line(self):
return self._source_lines[self.line - 1]
def message_key(self):
return ERROR_MESSAGES[self.error_code]
class StylesheetValidator(object):
def __init__(self, images):
self.images = images
def validate_url(self, url_node):
m = SUBREDDIT_IMAGE_URL_PLACEHOLDER.match(url_node.value)
if not m:
return ValidationError(url_node.source_line, "NON_PLACEHOLDER_URL")
image_name =
if image_name not in self.images:
return ValidationError(url_node.source_line, "IMAGE_NOT_FOUND",
{"name": image_name})
# rewrite the url value to the actual url of the image
url_node.value = self.images[image_name]
def validate_function(self, function_node):
function_name = strip_vendor_prefix(function_node.lower_name)
if function_name not in SAFE_FUNCTIONS:
return ValidationError(function_node.source_line,
# property: attr(something url)
elif function_name == "attr":
for argument in function_node.arguments:
if argument.type == "ident" and argument.lower_value == "url":
return ValidationError(argument.source_line,
return self.validate_component_values(function_node.arguments)
def validate_block(self, block):
return self.validate_component_values(block.content)
def validate_component_values(self, component_values):
return self.validate_list(component_values, {
# {} blocks are technically part of component values but i don't
# know of any actual valid uses for them in selectors etc. and they
# can cause issues with e.g.
# Safari 5: p[foo=bar{}*{background:green}]{background:red}
"[] block": self.validate_block,
"() block": self.validate_block,
"url": self.validate_url,
"function": self.validate_function,
}, ignored_types=SIMPLE_TOKEN_TYPES)
def validate_declaration(self, declaration):
if strip_vendor_prefix(declaration.lower_name) not in SAFE_PROPERTIES:
return ValidationError(declaration.source_line, "UNKNOWN_PROPERTY",
return self.validate_component_values(declaration.value)
def validate_declaration_list(self, declarations):
return self.validate_list(declarations, {
"at-rule": self.validate_at_rule,
"declaration": self.validate_declaration,
def validate_qualified_rule(self, rule):
prelude_errors = self.validate_component_values(rule.prelude)
declarations = tinycss2.parse_declaration_list(rule.content)
declaration_errors = self.validate_declaration_list(declarations)
return itertools.chain(prelude_errors, declaration_errors)
def validate_at_rule(self, rule):
prelude_errors = self.validate_component_values(rule.prelude)
keyword = strip_vendor_prefix(rule.lower_at_keyword)
if keyword in ("media", "keyframes"):
rules = tinycss2.parse_rule_list(rule.content)
rule_errors = self.validate_rule_list(rules)
elif keyword == "page":
rule_errors = self.validate_qualified_rule(rule)
return ValidationError(rule.source_line, "UNKNOWN_AT_RULE",
{"keyword": rule.at_keyword})
return itertools.chain(prelude_errors, rule_errors)
def validate_rule_list(self, rules):
return self.validate_list(rules, {
"qualified-rule": self.validate_qualified_rule,
"at-rule": self.validate_at_rule,
def validate_list(self, nodes, validators_by_type, ignored_types=None):
for node in nodes:
if node.type == "error":
yield ValidationError(node.source_line, "SYNTAX_ERROR",
{"message": node.message})
elif node.type == "literal":
if node.value == ";":
# if we're seeing a semicolon as a literal, it's in a place
# that doesn't fit naturally in the syntax.
# Safari 5 will treat this as two color properties:
# color: calc(;color:red;);
message = "semicolons are not allowed in this context"
yield ValidationError(node.source_line, "SYNTAX_ERROR",
{"message": message})
validator = validators_by_type.get(node.type)
if validator:
for error in tup(validator(node)):
if error:
yield error
if not ignored_types or node.type not in ignored_types:
yield ValidationError(node.source_line,
{"token": node.type})
def check_for_evil_codepoints(self, source_lines):
for line_number, line_text in enumerate(source_lines, start=1):
for codepoint in line_text:
# IE<8: *{color: expression\28 alert\28 1 \29 \29 }
if codepoint == "\\":
yield ValidationError(line_number, "BACKSLASH")
# accept these characters that get classified as control
elif codepoint in ("\t", "\n", "\r"):
# Safari: *{font-family:'foobar\x03;background:url(evil);';}
elif unicodedata.category(codepoint).startswith("C"):
yield ValidationError(line_number, "CONTROL_CHARACTER")
def parse_and_validate(self, stylesheet_source):
if len(stylesheet_source) > (MAX_SIZE_KIB * 1024):
return "", [ValidationError(0, "TOO_BIG", {"size": MAX_SIZE_KIB})]
nodes = tinycss2.parse_stylesheet(stylesheet_source)
source_lines = stylesheet_source.splitlines()
backslash_errors = self.check_for_evil_codepoints(source_lines)
validation_errors = self.validate_rule_list(nodes)
errors = []
for error in itertools.chain(backslash_errors, validation_errors):
error._source_lines = source_lines
errors.sort(key=lambda e: e.line)
if not errors:
serialized = rcssmin.cssmin(tinycss2.serialize(nodes))
serialized = ""
return serialized.encode("utf-8"), errors
def validate_css(stylesheet, images):
"""Validate and re-serialize the user submitted stylesheet.
images is a mapping of subreddit image names to their URLs. The
re-serialized stylesheet will have %%name%% tokens replaced with their
appropriate URLs.
The return value is a two-tuple of the re-serialized (and minified)
stylesheet and a list of errors. If the list is empty, the stylesheet is
assert isinstance(stylesheet, unicode)
validator = StylesheetValidator(images)
return validator.parse_and_validate(stylesheet)