From 23240ed6a5445b14fda2c7f6deed43b5dd09c996 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 29 Apr 2026 13:37:14 +0300 Subject: [PATCH 1/3] gh-149028: Make cookies parsing more lenient Accept RFC 2965 syntax for quoted values and RFC 6265 syntax for unquoted values. Fix non-linear complexity in regexps. --- Lib/http/cookies.py | 71 +++++++++++++++++++++-------------- Lib/test/test_http_cookies.py | 12 +++--- 2 files changed, 49 insertions(+), 34 deletions(-) diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 660fec4f1be865..f4dcf41dbcb49c 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -450,30 +450,37 @@ def OutputString(self, attrs=None): # specifications. I have since discovered that MSIE 3.0x doesn't # follow the character rules outlined in those specs. As a # result, the parsing rules here are less strict. -# +# Currently, it is a hybrid of RFC 2109/2965 (for quoted strings) +# and RFC 6265. + +# token, defined in RFC 2616, Section 2.2 +_token = r"[\w\d!#$%&'*+\-.^_`|~]+" +# cookie-name, defined in RFC 6265, Section 4.1.1 +_StrictKeyPattern = re.compile(_token) +# quoted-string, defined in RFC 2616, Section 2.2 +_quoted_string = fr'"(?:\\[\x00-\x7f]|[^"\x00-\x1f]|[\t\r\n])*+"' +# value, defined in RFC 2965, Section 3.1 +_StrictRFC2965ValuePattern = re.compile(fr'{_token}|{_quoted_string}') +# cookie-value, defined in RFC 6265, Section 4.1.1 +_cookie_octet = r"[\w\d!#$%&'()*+\-./:<=>?@\[\]^_`{|}~]" +_StrictRFC6265ValuePattern = re.compile(fr'{_cookie_octet}*|"{_cookie_octet}*+"') +# hybrid pattern +_StrictValuePattern = re.compile(fr'{_cookie_octet}*|{_quoted_string}') -_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" -_LegalValueChars = _LegalKeyChars + r'\[\]' _CookiePattern = re.compile(r""" - \s* # Optional whitespace at start of cookie - (?P # Start of group 'key' - [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P # Start of group 'val' - "(?:\\"|.)*?" # Any double-quoted string - | # or - # Special case for "expires" attr - (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day - [\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Date and time in specific format - | # or - [""" + _LegalValueChars + r"""]* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. + \s*+ # Optional whitespace at start of cookie + ([^=;]*+) # Name: any characters except "=" and ";" (RFC 6265) + (?: # Optional group: there may not be a value. + \s*+=\s*+ # Equal Sign + ( # Value: + "(?:\\.|[^"])*+" # Any double-quoted string (RFC 2109/2965) + | # or + [^;]*+ # Any characters except ";" (RFC 6265) + ) + )?+ # End of optional value group + \s*+ # Any number of spaces. + (?:;|\z) # Ending either at semicolon, or EOS. + """, re.ASCII | re.VERBOSE) # re.ASCII is needed for \s. # At long last, here is the cookie class. Using this class is almost just like @@ -547,21 +554,21 @@ def js_output(self, attrs=None): result.append(value.js_output(attrs)) return _nulljoin(result) - def load(self, rawdata): + def load(self, rawdata, *, strict=False): """Load cookies from a string (presumably HTTP_COOKIE) or from a dictionary. Loading cookies from a dictionary 'd' is equivalent to calling: map(Cookie.__setitem__, d.keys(), d.values()) """ if isinstance(rawdata, str): - self.__parse_string(rawdata) + self.__parse_string(rawdata, strict) else: # self.update() wouldn't call our custom __setitem__ for key, value in rawdata.items(): self[key] = value return - def __parse_string(self, str, patt=_CookiePattern): + def __parse_string(self, str, strict): i = 0 # Our starting point n = len(str) # Length of string parsed_items = [] # Parsed (type, key, value) triples @@ -575,13 +582,21 @@ def __parse_string(self, str, patt=_CookiePattern): # attacks). while 0 <= i < n: # Start looking for a cookie - match = patt.match(str, i) + match = _CookiePattern.match(str, i) if not match: # No more cookies break - key, value = match.group("key"), match.group("val") - i = match.end(0) + key, value = match.groups() + key = key.rstrip(' \t\r\n') + if value: + value = value.rstrip(' \t\r\n') + if strict: + if not _StrictKeyPattern.fullmatch(key): + break + if value and not _StrictValuePattern.fullmatch(value): + break + i = match.end() if key[0] == "$": if not morsel_seen: diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index cfcbc17bd6df80..2edcea5f3fc8fd 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -50,12 +50,12 @@ def test_basic(self): )) }, - # gh-92936: allow double quote in cookie values + # gh-149028: allow any characters in unquoted cookie values { - 'data': 'cookie="{"key": "value"}"', + 'data': 'cookie={"key": "value"}', 'dict': {'cookie': '{"key": "value"}'}, 'repr': "", - 'output': 'Set-Cookie: cookie="{"key": "value"}"', + 'output': 'Set-Cookie: cookie={"key": "value"}', }, { 'data': 'key="some value; surrounded by quotes"', @@ -64,11 +64,11 @@ def test_basic(self): 'output': 'Set-Cookie: key="some value; surrounded by quotes"', }, { - 'data': 'session="user123"; preferences="{"theme": "dark"}"', + 'data': 'session="user123"; preferences={"theme": "dark"}', 'dict': {'session': 'user123', 'preferences': '{"theme": "dark"}'}, 'repr': "", 'output': '\n'.join(( - 'Set-Cookie: preferences="{"theme": "dark"}"', + 'Set-Cookie: preferences={"theme": "dark"}', 'Set-Cookie: session="user123"', )) } @@ -316,7 +316,7 @@ def test_invalid_cookies(self): 'Set-Cookie: foo=bar', 'Set-Cookie: foo', 'foo=bar; baz', 'baz; foo=bar', 'secure;foo=bar', 'Version=1;foo=bar'): - C.load(s) + C.load(s, strict=True) self.assertEqual(dict(C), {}) self.assertEqual(C.output(), '') From d5b5cbec60b99a752f537b7f72d15cb7af469a7d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Apr 2026 11:46:29 +0300 Subject: [PATCH 2/3] Minimize the diff. Always strict key check, no strict value check. --- Lib/http/cookies.py | 31 ++++++++----------------------- Lib/test/test_http_cookies.py | 10 ++-------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index f4dcf41dbcb49c..de877da3c03ded 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -453,20 +453,6 @@ def OutputString(self, attrs=None): # Currently, it is a hybrid of RFC 2109/2965 (for quoted strings) # and RFC 6265. -# token, defined in RFC 2616, Section 2.2 -_token = r"[\w\d!#$%&'*+\-.^_`|~]+" -# cookie-name, defined in RFC 6265, Section 4.1.1 -_StrictKeyPattern = re.compile(_token) -# quoted-string, defined in RFC 2616, Section 2.2 -_quoted_string = fr'"(?:\\[\x00-\x7f]|[^"\x00-\x1f]|[\t\r\n])*+"' -# value, defined in RFC 2965, Section 3.1 -_StrictRFC2965ValuePattern = re.compile(fr'{_token}|{_quoted_string}') -# cookie-value, defined in RFC 6265, Section 4.1.1 -_cookie_octet = r"[\w\d!#$%&'()*+\-./:<=>?@\[\]^_`{|}~]" -_StrictRFC6265ValuePattern = re.compile(fr'{_cookie_octet}*|"{_cookie_octet}*+"') -# hybrid pattern -_StrictValuePattern = re.compile(fr'{_cookie_octet}*|{_quoted_string}') - _CookiePattern = re.compile(r""" \s*+ # Optional whitespace at start of cookie ([^=;]*+) # Name: any characters except "=" and ";" (RFC 6265) @@ -554,21 +540,21 @@ def js_output(self, attrs=None): result.append(value.js_output(attrs)) return _nulljoin(result) - def load(self, rawdata, *, strict=False): + def load(self, rawdata): """Load cookies from a string (presumably HTTP_COOKIE) or from a dictionary. Loading cookies from a dictionary 'd' is equivalent to calling: map(Cookie.__setitem__, d.keys(), d.values()) """ if isinstance(rawdata, str): - self.__parse_string(rawdata, strict) + self.__parse_string(rawdata) else: # self.update() wouldn't call our custom __setitem__ for key, value in rawdata.items(): self[key] = value return - def __parse_string(self, str, strict): + def __parse_string(self, str, patt=_CookiePattern): i = 0 # Our starting point n = len(str) # Length of string parsed_items = [] # Parsed (type, key, value) triples @@ -582,7 +568,7 @@ def __parse_string(self, str, strict): # attacks). while 0 <= i < n: # Start looking for a cookie - match = _CookiePattern.match(str, i) + match = patt.match(str, i) if not match: # No more cookies break @@ -591,11 +577,10 @@ def __parse_string(self, str, strict): key = key.rstrip(' \t\r\n') if value: value = value.rstrip(' \t\r\n') - if strict: - if not _StrictKeyPattern.fullmatch(key): - break - if value and not _StrictValuePattern.fullmatch(value): - break + if not _is_legal_key(key): + break + if value and _has_control_character(value): + break i = match.end() if key[0] == "$": diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 2edcea5f3fc8fd..41c34231bd745a 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -314,9 +314,9 @@ def test_invalid_cookies(self): C = cookies.SimpleCookie() for s in (']foo=x', '[foo=x', 'blah]foo=x', 'blah[foo=x', 'Set-Cookie: foo=bar', 'Set-Cookie: foo', - 'foo=bar; baz', 'baz; foo=bar', + 'foo=bar; baz', 'baz; foo=bar', 'foo,bar=baz', 'secure;foo=bar', 'Version=1;foo=bar'): - C.load(s, strict=True) + C.load(s) self.assertEqual(dict(C), {}) self.assertEqual(C.output(), '') @@ -333,12 +333,6 @@ def test_pickle(self): C1 = pickle.loads(pickle.dumps(C, protocol=proto)) self.assertEqual(C1.output(), expected_output) - def test_illegal_chars(self): - rawdata = "a=b; c,d=e" - C = cookies.SimpleCookie() - with self.assertRaises(cookies.CookieError): - C.load(rawdata) - def test_comment_quoting(self): c = cookies.SimpleCookie() c['foo'] = '\N{COPYRIGHT SIGN}' From 73cac1d2f46065163bb9e981f30310df7f48b60d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Apr 2026 13:31:28 +0300 Subject: [PATCH 3/3] Update docs. --- Doc/library/http.cookies.rst | 12 +++++++----- Doc/whatsnew/3.15.rst | 6 ++++-- .../2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst | 3 +++ 3 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index b3fcd21c7e2244..9db56530f43e99 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -27,9 +27,6 @@ in a cookie name (as :attr:`~Morsel.key`). .. versionchanged:: 3.3 Allowed '``:``' as a valid cookie name character. -.. versionchanged:: 3.15 - Allowed '``"``' as a valid cookie value character. - .. note:: On encountering an invalid cookie, :exc:`CookieError` is raised, so if your @@ -118,6 +115,11 @@ Cookie Objects for k, v in rawdata.items(): cookie[k] = v + .. versionchanged:: next + Allowed any characters except semicolon (``';'``) and control + characters in non-quoted cookie values. + The ``';'`` separator is now mandatory between name-value pairs. + .. _morsel-objects: @@ -315,8 +317,8 @@ The following example demonstrates how to use the :mod:`!http.cookies` module. Set-Cookie: string=seven >>> import json >>> C = cookies.SimpleCookie() - >>> C.load(f'cookies=7; mixins="{json.dumps({"chips": "dark chocolate"})}"; state=gooey') + >>> C.load(f'cookies=7; mixins={json.dumps({"chips": "dark chocolate"})}; state=gooey') >>> print(C) Set-Cookie: cookies=7 - Set-Cookie: mixins="{"chips": "dark chocolate"}" + Set-Cookie: mixins={"chips": "dark chocolate"} Set-Cookie: state=gooey diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index eb08f8c4ed69e7..8a6904c850ab96 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -941,8 +941,10 @@ http.client http.cookies ------------ -* Allow '``"``' double quotes in cookie values. - (Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.) +* Cookie parsing now allows any characters except semicolon (``';'``) + and control characters in non-quoted cookie values. + The ``';'`` separator is now mandatory between name-value pairs. + (Contributed by Serhiy Storchaka in :gh:`149028`.) http.server diff --git a/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst b/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst new file mode 100644 index 00000000000000..703487c795fcd4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst @@ -0,0 +1,3 @@ +Relax parsing :mod:`HTTP coockies ` values: allow any +characters except semicolon (``';'``) and control characters in non-quoted +cookie values. The ``';'`` separator is now mandatory between name-value pairs.