diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst index 41219ee..116a69c 100644 --- a/Doc/library/cgi.rst +++ b/Doc/library/cgi.rst @@ -188,6 +188,9 @@ A form submitted via POST that also has a query string will contain both Added support for the context management protocol to the :class:`FieldStorage` class. +.. versionchanged:: 3.5.3-1+deb9u4 + Added the *separator* parameter. + Higher Level Interface ---------------------- @@ -277,10 +280,10 @@ These are useful if you want more control, or if you want to employ some of the algorithms implemented in this module in other circumstances. -.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False) +.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&") Parse a query in the environment or from a file (the file defaults to - ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are + ``sys.stdin``). The *keep_blank_values*, *strict_parsing* parameters and *separator* parameters are passed to :func:`urllib.parse.parse_qs` unchanged. diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 5378d8a..e6060bb 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -135,7 +135,7 @@ or on combining URL components into a URL string. now raise :exc:`ValueError`. -.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace') +.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', separator='&') Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a @@ -156,6 +156,9 @@ or on combining URL components into a URL string. percent-encoded sequences into Unicode characters, as accepted by the :meth:`bytes.decode` method. + The optional argument *separator* is the symbol to use for separating the + query arguments. It defaults to ``&``. + Use the :func:`urllib.parse.urlencode` function (with the ``doseq`` parameter set to ``True``) to convert such dictionaries into query strings. @@ -164,8 +167,13 @@ or on combining URL components into a URL string. .. versionchanged:: 3.2 Add *encoding* and *errors* parameters. + .. versionchanged:: 3.5.3-1+deb9u4 + Added *separator* parameter with the default value of ``&``. Python + versions earlier than Python 3.7.10 allowed using both ``;`` and ``&`` as + query parameter separator. This has been changed to allow only a single + separator key, with ``&`` as the default separator. -.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace') +.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', separator='&') Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of @@ -185,6 +193,9 @@ or on combining URL components into a URL string. percent-encoded sequences into Unicode characters, as accepted by the :meth:`bytes.decode` method. + The optional argument *separator* is the symbol to use for separating the + query arguments. It defaults to ``&``. + Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into query strings. diff --git a/Lib/cgi.py b/Lib/cgi.py index 189c6d5..76b5c9b 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -117,7 +117,7 @@ def closelog(): # 0 ==> unlimited input maxlen = 0 -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): +def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0, separator='&'): """Parse a query in the environment or from a file (default stdin) Arguments, all optional: @@ -136,6 +136,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ if fp is None: fp = sys.stdin @@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): if environ['REQUEST_METHOD'] == 'POST': ctype, pdict = parse_header(environ['CONTENT_TYPE']) if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) + return parse_multipart(fp, pdict, separator=separator) elif ctype == 'application/x-www-form-urlencoded': clength = int(environ['CONTENT_LENGTH']) if maxlen and clength > maxlen: @@ -180,7 +183,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): qs = "" environ['QUERY_STRING'] = qs # XXX Shouldn't, really return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, - encoding=encoding) + encoding=encoding, separator=separator) # parse query string function called from urlparse, @@ -198,7 +201,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): DeprecationWarning, 2) return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) -def parse_multipart(fp, pdict): +def parse_multipart(fp, pdict, separator='&'): """Parse multipart input. Arguments: @@ -404,7 +407,7 @@ class FieldStorage: """ def __init__(self, fp=None, headers=None, outerboundary=b'', environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace'): + limit=None, encoding='utf-8', errors='replace', separator='&'): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -448,6 +451,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'', method = 'GET' self.keep_blank_values = keep_blank_values self.strict_parsing = strict_parsing + self.separator = separator if 'REQUEST_METHOD' in environ: method = environ['REQUEST_METHOD'].upper() self.qs_on_post = None @@ -673,7 +677,7 @@ def read_urlencoded(self): self.list = [] query = urllib.parse.parse_qsl( qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors) + encoding=self.encoding, errors=self.errors, separator=self.separator) for key, value in query: self.list.append(MiniFieldStorage(key, value)) self.skip_lines() @@ -689,7 +693,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): if self.qs_on_post: query = urllib.parse.parse_qsl( self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors) + encoding=self.encoding, errors=self.errors, separator=self.separator) for key, value in query: self.list.append(MiniFieldStorage(key, value)) @@ -727,7 +731,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing,self.limit-self.bytes_read, - self.encoding, self.errors) + self.encoding, self.errors, self.separator) self.bytes_read += part.bytes_read self.list.append(part) if part.done or self.bytes_read >= self.length > 0: diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index ab9f6ab..bedcc13 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -54,12 +54,9 @@ def do_test(buf, method): ("", ValueError("bad query field: ''")), ("&", ValueError("bad query field: ''")), ("&&", ValueError("bad query field: ''")), - (";", ValueError("bad query field: ''")), - (";&;", ValueError("bad query field: ''")), # Should the next few really be valid? ("=", {}), ("=&=", {}), - ("=;=", {}), # This rest seem to make sense ("=a", {'': ['a']}), ("&=a", ValueError("bad query field: ''")), @@ -74,8 +71,6 @@ def do_test(buf, method): ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 'cuyer': ['r'], @@ -178,7 +173,29 @@ def test_strict(self): self.assertEqual(fs.getvalue(key), expect_val) else: self.assertEqual(fs.getvalue(key), expect_val[0]) - + def test_separator(self): + parse_semicolon = [ + ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}), + ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), + (";", ValueError("bad query field: ''")), + (";;", ValueError("bad query field: ''")), + ("=;a", ValueError("bad query field: 'a'")), + (";b=a", ValueError("bad query field: ''")), + ("b;=a", ValueError("bad query field: 'b'")), + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), + ("a=a+b;a=b+a", {'a': ['a b', 'b a']}), + ] + for orig, expect in parse_semicolon: + env = {'QUERY_STRING': orig} + fs = cgi.FieldStorage(separator=';', environ=env) + if isinstance(expect, dict): + for key in expect.keys(): + expect_val = expect[key] + self.assertIn(key, fs) + if len(expect_val) > 1: + self.assertEqual(fs.getvalue(key), expect_val) + else: + self.assertEqual(fs.getvalue(key), expect_val[0]) def test_log(self): cgi.log("Testing") diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index c6378b9..1b73438 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -32,16 +32,10 @@ (b"&a=b", [(b'a', b'b')]), (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), - (";", []), - (";;", []), - (";a=b", [('a', 'b')]), - ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), - ("a=1;a=2", [('a', '1'), ('a', '2')]), - (b";", []), - (b";;", []), - (b";a=b", [(b'a', b'b')]), - (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), - (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), + (";a=b", [(';a', 'b')]), + ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), + (b";a=b", [(b';a', b'b')]), + (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), ] parse_qs_test_cases = [ @@ -65,16 +59,10 @@ (b"&a=b", {b'a': [b'b']}), (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1&a=2", {b'a': [b'1', b'2']}), - (";", {}), - (";;", {}), - (";a=b", {'a': ['b']}), - ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), - ("a=1;a=2", {'a': ['1', '2']}), - (b";", {}), - (b";;", {}), - (b";a=b", {b'a': [b'b']}), - (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), - (b"a=1;a=2", {b'a': [b'1', b'2']}), + (";a=b", {';a': ['b']}), + ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), + (b";a=b", {b';a': [b'b']}), + (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), ] class UrlParseTestCase(unittest.TestCase): @@ -867,6 +855,42 @@ def test_parse_qsl_encoding(self): errors="ignore") self.assertEqual(result, [('key', '\u0141-')]) + def test_parse_qs_separator(self): + parse_qs_semicolon_cases = [ + (";", {}), + (";;", {}), + (";a=b", {'a': ['b']}), + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), + ("a=1;a=2", {'a': ['1', '2']}), + (b";", {}), + (b";;", {}), + (b";a=b", {b'a': [b'b']}), + (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), + (b"a=1;a=2", {b'a': [b'1', b'2']}), + ] + for orig, expect in parse_qs_semicolon_cases: + with self.subTest("Original: %s, Expected: %s"%(orig, expect)): + result = urllib.parse.parse_qs(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + + def test_parse_qsl_separator(self): + parse_qsl_semicolon_cases = [ + (";", []), + (";;", []), + (";a=b", [('a', 'b')]), + ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), + ("a=1;a=2", [('a', '1'), ('a', '2')]), + (b";", []), + (b";;", []), + (b";a=b", [(b'a', b'b')]), + (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), + (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), + ] + for orig, expect in parse_qsl_semicolon_cases: + with self.subTest("Original: %s, Expected: %s"%(orig, expect)): + result = urllib.parse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + def test_urlencode_sequences(self): # Other tests incidentally urlencode things; test non-covered cases: # Sequence and object values. diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 7158c50..6b70c14 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -571,7 +571,7 @@ def unquote(string, encoding='utf-8', errors='replace'): return ''.join(res) def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): + encoding='utf-8', errors='replace', separator='&'): """Parse a query given as a string argument. Arguments: @@ -591,10 +591,13 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) + encoding=encoding, errors=errors, separator=separator) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -603,7 +606,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, return parsed_result def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): + encoding='utf-8', errors='replace', separator='&'): """Parse a query given as a string argument. Arguments: @@ -623,10 +626,17 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + + if not separator or (not isinstance(separator, (str, bytes))): + raise ValueError("Separator must be of type string or bytes.") + + pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: