Skip to content

Commit

Permalink
only use '&' as a query string separator
Browse files Browse the repository at this point in the history
Origin: python/cpython#24297
Last-Update: 2021-04-03

 CVE-2021-23336

Gbp-Pq: Name CVE-2021-23336.patch
  • Loading branch information
gladk committed Apr 5, 2021
1 parent f131219 commit b1f98fd
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 42 deletions.
7 changes: 5 additions & 2 deletions Doc/library/cgi.rst
Expand Up @@ -188,6 +188,9 @@ A form submitted via POST that also has a query string will contain both
Added support for the context management protocol to the
:class:`FieldStorage` class.

.. versionchanged:: 3.5.3-1+deb9u4
Added the *separator* parameter.


Higher Level Interface
----------------------
Expand Down Expand Up @@ -277,10 +280,10 @@ These are useful if you want more control, or if you want to employ some of the
algorithms implemented in this module in other circumstances.


.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")

Parse a query in the environment or from a file (the file defaults to
``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are
``sys.stdin``). The *keep_blank_values*, *strict_parsing* parameters and *separator* parameters are
passed to :func:`urllib.parse.parse_qs` unchanged.


Expand Down
15 changes: 13 additions & 2 deletions Doc/library/urllib.parse.rst
Expand Up @@ -135,7 +135,7 @@ or on combining URL components into a URL string.
now raise :exc:`ValueError`.


.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', separator='&')

Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
Expand All @@ -156,6 +156,9 @@ or on combining URL components into a URL string.
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.

The optional argument *separator* is the symbol to use for separating the
query arguments. It defaults to ``&``.

Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
parameter set to ``True``) to convert such dictionaries into query
strings.
Expand All @@ -164,8 +167,13 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.2
Add *encoding* and *errors* parameters.

.. versionchanged:: 3.5.3-1+deb9u4
Added *separator* parameter with the default value of ``&``. Python
versions earlier than Python 3.7.10 allowed using both ``;`` and ``&`` as
query parameter separator. This has been changed to allow only a single
separator key, with ``&`` as the default separator.

.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', separator='&')

Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
Expand All @@ -185,6 +193,9 @@ or on combining URL components into a URL string.
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.

The optional argument *separator* is the symbol to use for separating the
query arguments. It defaults to ``&``.

Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
query strings.

Expand Down
20 changes: 12 additions & 8 deletions Lib/cgi.py
Expand Up @@ -117,7 +117,7 @@ def closelog():
# 0 ==> unlimited input
maxlen = 0

def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0, separator='&'):
"""Parse a query in the environment or from a file (default stdin)
Arguments, all optional:
Expand All @@ -136,6 +136,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
separator: str. The symbol to use for separating the query arguments.
Defaults to &.
"""
if fp is None:
fp = sys.stdin
Expand All @@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
if environ['REQUEST_METHOD'] == 'POST':
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
if ctype == 'multipart/form-data':
return parse_multipart(fp, pdict)
return parse_multipart(fp, pdict, separator=separator)
elif ctype == 'application/x-www-form-urlencoded':
clength = int(environ['CONTENT_LENGTH'])
if maxlen and clength > maxlen:
Expand All @@ -180,7 +183,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
qs = ""
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
encoding=encoding)
encoding=encoding, separator=separator)


# parse query string function called from urlparse,
Expand All @@ -198,7 +201,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
DeprecationWarning, 2)
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)

def parse_multipart(fp, pdict):
def parse_multipart(fp, pdict, separator='&'):
"""Parse multipart input.
Arguments:
Expand Down Expand Up @@ -404,7 +407,7 @@ class FieldStorage:
"""
def __init__(self, fp=None, headers=None, outerboundary=b'',
environ=os.environ, keep_blank_values=0, strict_parsing=0,
limit=None, encoding='utf-8', errors='replace'):
limit=None, encoding='utf-8', errors='replace', separator='&'):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
Expand Down Expand Up @@ -448,6 +451,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'',
method = 'GET'
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
self.separator = separator
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
Expand Down Expand Up @@ -673,7 +677,7 @@ def read_urlencoded(self):
self.list = []
query = urllib.parse.parse_qsl(
qs, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors)
encoding=self.encoding, errors=self.errors, separator=self.separator)
for key, value in query:
self.list.append(MiniFieldStorage(key, value))
self.skip_lines()
Expand All @@ -689,7 +693,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
if self.qs_on_post:
query = urllib.parse.parse_qsl(
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors)
encoding=self.encoding, errors=self.errors, separator=self.separator)
for key, value in query:
self.list.append(MiniFieldStorage(key, value))

Expand Down Expand Up @@ -727,7 +731,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):

part = klass(self.fp, headers, ib, environ, keep_blank_values,
strict_parsing,self.limit-self.bytes_read,
self.encoding, self.errors)
self.encoding, self.errors, self.separator)
self.bytes_read += part.bytes_read
self.list.append(part)
if part.done or self.bytes_read >= self.length > 0:
Expand Down
29 changes: 23 additions & 6 deletions Lib/test/test_cgi.py
Expand Up @@ -54,12 +54,9 @@ def do_test(buf, method):
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
(";", ValueError("bad query field: ''")),
(";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
Expand All @@ -74,8 +71,6 @@ def do_test(buf, method):
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
Expand Down Expand Up @@ -178,7 +173,29 @@ def test_strict(self):
self.assertEqual(fs.getvalue(key), expect_val)
else:
self.assertEqual(fs.getvalue(key), expect_val[0])

def test_separator(self):
parse_semicolon = [
("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
(";", ValueError("bad query field: ''")),
(";;", ValueError("bad query field: ''")),
("=;a", ValueError("bad query field: 'a'")),
(";b=a", ValueError("bad query field: ''")),
("b;=a", ValueError("bad query field: 'b'")),
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
]
for orig, expect in parse_semicolon:
env = {'QUERY_STRING': orig}
fs = cgi.FieldStorage(separator=';', environ=env)
if isinstance(expect, dict):
for key in expect.keys():
expect_val = expect[key]
self.assertIn(key, fs)
if len(expect_val) > 1:
self.assertEqual(fs.getvalue(key), expect_val)
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
def test_log(self):
cgi.log("Testing")

Expand Down
64 changes: 44 additions & 20 deletions Lib/test/test_urlparse.py
Expand Up @@ -32,16 +32,10 @@
(b"&a=b", [(b'a', b'b')]),
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
(";", []),
(";;", []),
(";a=b", [('a', 'b')]),
("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1;a=2", [('a', '1'), ('a', '2')]),
(b";", []),
(b";;", []),
(b";a=b", [(b'a', b'b')]),
(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
(";a=b", [(';a', 'b')]),
("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
(b";a=b", [(b';a', b'b')]),
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
]

parse_qs_test_cases = [
Expand All @@ -65,16 +59,10 @@
(b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}),
(";", {}),
(";;", {}),
(";a=b", {'a': ['b']}),
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=1;a=2", {'a': ['1', '2']}),
(b";", {}),
(b";;", {}),
(b";a=b", {b'a': [b'b']}),
(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1;a=2", {b'a': [b'1', b'2']}),
(";a=b", {';a': ['b']}),
("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
(b";a=b", {b';a': [b'b']}),
(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
]

class UrlParseTestCase(unittest.TestCase):
Expand Down Expand Up @@ -867,6 +855,42 @@ def test_parse_qsl_encoding(self):
errors="ignore")
self.assertEqual(result, [('key', '\u0141-')])

def test_parse_qs_separator(self):
parse_qs_semicolon_cases = [
(";", {}),
(";;", {}),
(";a=b", {'a': ['b']}),
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=1;a=2", {'a': ['1', '2']}),
(b";", {}),
(b";;", {}),
(b";a=b", {b'a': [b'b']}),
(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1;a=2", {b'a': [b'1', b'2']}),
]
for orig, expect in parse_qs_semicolon_cases:
with self.subTest("Original: %s, Expected: %s"%(orig, expect)):
result = urllib.parse.parse_qs(orig, separator=';')
self.assertEqual(result, expect, "Error parsing %r" % orig)

def test_parse_qsl_separator(self):
parse_qsl_semicolon_cases = [
(";", []),
(";;", []),
(";a=b", [('a', 'b')]),
("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1;a=2", [('a', '1'), ('a', '2')]),
(b";", []),
(b";;", []),
(b";a=b", [(b'a', b'b')]),
(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
]
for orig, expect in parse_qsl_semicolon_cases:
with self.subTest("Original: %s, Expected: %s"%(orig, expect)):
result = urllib.parse.parse_qsl(orig, separator=';')
self.assertEqual(result, expect, "Error parsing %r" % orig)

def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.
Expand Down
18 changes: 14 additions & 4 deletions Lib/urllib/parse.py
Expand Up @@ -571,7 +571,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
return ''.join(res)

def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
encoding='utf-8', errors='replace', separator='&'):
"""Parse a query given as a string argument.
Arguments:
Expand All @@ -591,10 +591,13 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
separator: str. The symbol to use for separating the query arguments.
Defaults to &.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors)
encoding=encoding, errors=errors, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
Expand All @@ -603,7 +606,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
return parsed_result

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
encoding='utf-8', errors='replace', separator='&'):
"""Parse a query given as a string argument.
Arguments:
Expand All @@ -623,10 +626,17 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
separator: str. The symbol to use for separating the query arguments.
Defaults to &.
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

if not separator or (not isinstance(separator, (str, bytes))):
raise ValueError("Separator must be of type string or bytes.")

pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
Expand Down

0 comments on commit b1f98fd

Please sign in to comment.