Permalink
Browse files

Disallow surrogate escapes in dicts and lists in the config

In Dict.to_str() and List.to_str() we use json.dump to get a value. However,
JSON includes surrogate escapes in the dumped values, which breaks round trips.

>>> yaml.load(json.dumps({'\U00010000': True}))
{'\ud800\udc00': True}

>>> yaml.load(json.dumps({'\U00010000': True}, ensure_ascii=False))
yaml.reader.ReaderError: unacceptable character #x10000: special characters are not allowed

See:
https://stackoverflow.com/a/38552626/2085149
https://news.ycombinator.com/item?id=12798032
  • Loading branch information...
The-Compiler committed Jul 4, 2017
1 parent fa0f4e1 commit 9ac2dbcc80330c6090ecdce656046931e8cf591b
Showing with 22 additions and 1 deletion.
  1. +17 −0 qutebrowser/config/configtypes.py
  2. +5 −1 tests/unit/config/test_configtypes.py
@@ -181,6 +181,17 @@ def _basic_str_validation(self, value):
raise configexc.ValidationError(
value, "may not contain unprintable chars!")
def _validate_surrogate_escapes(self, full_value, value):
"""Make sure the given value doesn't contain surrogate escapes.
This is used for values passed to json.dump, as it can't handle those.
"""
if not isinstance(value, str):
return
if any(ord(c) > 0xFFFF for c in value):
raise configexc.ValidationError(
full_value, "may not contain surrogate escapes!")
def _validate_valid_values(self, value):
"""Validate value against possible values.
@@ -418,6 +429,9 @@ def to_py(self, value):
if not value:
return []
for val in value:
self._validate_surrogate_escapes(value, val)
if self.length is not None and len(value) != self.length:
raise configexc.ValidationError(value, "Exactly {} values need to "
"be set!".format(self.length))
@@ -1089,6 +1103,9 @@ def to_py(self, value):
return self._fill_fixed_keys({})
self._validate_keys(value)
for key, val in value.items():
self._validate_surrogate_escapes(value, key)
self._validate_surrogate_escapes(value, val)
d = {self.keytype.to_py(key): self.valtype.to_py(val)
for key, val in value.items()}
@@ -429,6 +429,8 @@ def test_lengths_invalid(self, klass, minlen, maxlen):
({'minlen': 2, 'maxlen': 3}, 'abc'),
# valid_values
({'valid_values': configtypes.ValidValues('abcd')}, 'abcd'),
# Surrogate escapes are allowed in strings
({}, '\U00010000'),
])
def test_to_py(self, klass, kwargs, val):
assert klass(**kwargs).to_py(val) == val
@@ -535,7 +537,7 @@ def test_from_str_invalid(self, klass, val):
def test_to_py(self, klass, val):
assert klass().to_py(val) == val
@pytest.mark.parametrize('val', [[42], '["foo"]'])
@pytest.mark.parametrize('val', [[42], '["foo"]', ['\U00010000']])
def test_to_py_invalid(self, klass, val):
with pytest.raises(configexc.ValidationError):
klass().to_py(val)
@@ -1427,6 +1429,8 @@ def test_to_py_valid(self, klass, keytype, valtype, val):
assert klass(keytype=keytype, valtype=valtype).to_py(val) == val
@pytest.mark.parametrize('val', [
{'\U00010000': 'foo'}, # UTF-16 surrogate in key
{'foo': '\U00010000'}, # UTF-16 surrogate in value
{0: 'foo'}, # Invalid key type
{'foo': 0}, # Invalid value type
])

0 comments on commit 9ac2dbc

Please sign in to comment.