diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index 1122b30d29def0..925135ef84b5c4 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -116,6 +116,11 @@ Cookie Objects for k, v in rawdata.items(): cookie[k] = v + .. versionchanged:: next + Allowed any characters except semicolon (``';'``) and control + characters in non-quoted cookie values. + The ``';'`` separator is now mandatory between name-value pairs. + .. _morsel-objects: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a687ee5115be05..554a5944626990 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -947,6 +947,15 @@ http.client (Contributed by Alexander Enrique Urieles Nieto in :gh:`131724`.) +http.cookies +------------ + +* Cookie parsing now allows any characters except semicolon (``';'``) + and control characters in non-quoted cookie values. + The ``';'`` separator is now mandatory between name-value pairs. + (Contributed by Serhiy Storchaka in :gh:`149028`.) + + http.server ----------- diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 5c5b14788dc2f0..3470a2c9907881 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -450,30 +450,23 @@ def OutputString(self, attrs=None): # specifications. I have since discovered that MSIE 3.0x doesn't # follow the character rules outlined in those specs. As a # result, the parsing rules here are less strict. -# +# Currently, it is a hybrid of RFC 2109/2965 (for quoted strings) +# and RFC 6265. -_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" -_LegalValueChars = _LegalKeyChars + r'\[\]' _CookiePattern = re.compile(r""" - \s* # Optional whitespace at start of cookie - (?P # Start of group 'key' - [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any double-quoted string - | # or - # Special case for "expires" attr - (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day - [\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Date and time in specific format - | # or - [""" + _LegalValueChars + r"""]* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. + \s*+ # Optional whitespace at start of cookie + ([^=;]*+) # Name: any characters except "=" and ";" (RFC 6265) + (?: # Optional group: there may not be a value. + \s*+=\s*+ # Equal Sign + ( # Value: + "(?:[^\\"]|\\.)*+" # Any double-quoted string (RFC 2109/2965) + | # or + [^;]*+ # Any characters except ";" (RFC 6265) + ) + )?+ # End of optional value group + \s*+ # Any number of spaces. + (?:;|\z) # Ending either at semicolon, or EOS. + """, re.ASCII | re.VERBOSE) # re.ASCII is needed for \s. # At long last, here is the cookie class. Using this class is almost just like @@ -580,8 +573,15 @@ def __parse_string(self, str, patt=_CookiePattern): # No more cookies break - key, value = match.group("key"), match.group("val") - i = match.end(0) + key, value = match.groups() + key = key.rstrip(' \t\r\n') + if value: + value = value.rstrip(' \t\r\n') + if not _is_legal_key(key): + break + if value and _has_control_character(value): + break + i = match.end() if key[0] == "$": if not morsel_seen: diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 4884b07c95b9c5..67494329faa85a 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -48,7 +48,21 @@ def test_basic(self): 'Set-Cookie: d=r', 'Set-Cookie: f=h' )) - } + }, + + # gh-149028: allow any characters in non-quoted cookie values + { + 'data': 'cookie={"key": "value"}', + 'dict': {'cookie': '{"key": "value"}'}, + 'repr': "", + 'output': 'Set-Cookie: cookie={"key": "value"}', + }, + { + 'data': 'key="some value; surrounded by quotes"', + 'dict': {'key': 'some value; surrounded by quotes'}, + 'repr': "", + 'output': 'Set-Cookie: key="some value; surrounded by quotes"', + }, ] for case in cases: @@ -291,7 +305,7 @@ def test_invalid_cookies(self): C = cookies.SimpleCookie() for s in (']foo=x', '[foo=x', 'blah]foo=x', 'blah[foo=x', 'Set-Cookie: foo=bar', 'Set-Cookie: foo', - 'foo=bar; baz', 'baz; foo=bar', + 'foo=bar; baz', 'baz; foo=bar', 'foo,bar=baz', 'secure;foo=bar', 'Version=1;foo=bar'): C.load(s) self.assertEqual(dict(C), {}) @@ -310,12 +324,6 @@ def test_pickle(self): C1 = pickle.loads(pickle.dumps(C, protocol=proto)) self.assertEqual(C1.output(), expected_output) - def test_illegal_chars(self): - rawdata = "a=b; c,d=e" - C = cookies.SimpleCookie() - with self.assertRaises(cookies.CookieError): - C.load(rawdata) - def test_comment_quoting(self): c = cookies.SimpleCookie() c['foo'] = '\N{COPYRIGHT SIGN}' diff --git a/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst b/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst new file mode 100644 index 00000000000000..703487c795fcd4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-30-13-30-51.gh-issue-149028.QhZ93P.rst @@ -0,0 +1,3 @@ +Relax parsing :mod:`HTTP coockies ` values: allow any +characters except semicolon (``';'``) and control characters in non-quoted +cookie values. The ``';'`` separator is now mandatory between name-value pairs.