Skip to content

Commit

Permalink
httputil: Only strip tabs and spaces from header values
Browse files Browse the repository at this point in the history
The RFC specifies that only tabs and spaces should be stripped.
Removing additonal whitespace characters can lead to framing
errors with certain proxies.
  • Loading branch information
bdarnell committed Jun 6, 2024
1 parent fb119c7 commit 8d721a8
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
7 changes: 5 additions & 2 deletions tornado/httputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@
from asyncio import Future # noqa: F401
import unittest # noqa: F401

# To be used with str.strip() and related methods.
HTTP_WHITESPACE = " \t"


@lru_cache(1000)
def _normalize_header(name: str) -> str:
Expand Down Expand Up @@ -171,15 +174,15 @@ def parse_line(self, line: str) -> None:
# continuation of a multi-line header
if self._last_key is None:
raise HTTPInputError("first header line cannot start with whitespace")
new_part = " " + line.lstrip()
new_part = " " + line.lstrip(HTTP_WHITESPACE)
self._as_list[self._last_key][-1] += new_part
self._dict[self._last_key] += new_part
else:
try:
name, value = line.split(":", 1)
except ValueError:
raise HTTPInputError("no colon in header line")
self.add(name, value.strip())
self.add(name, value.strip(HTTP_WHITESPACE))

@classmethod
def parse(cls, headers: str) -> "HTTPHeaders":
Expand Down
19 changes: 19 additions & 0 deletions tornado/test/httputil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,25 @@ def test_unicode_newlines(self):
gen_log.warning("failed while trying %r in %s", newline, encoding)
raise

def test_unicode_whitespace(self):
# Only tabs and spaces are to be stripped according to the HTTP standard.
# Other unicode whitespace is to be left as-is. In the context of headers,
# this specifically means the whitespace characters falling within the
# latin1 charset.
whitespace = [
(" ", True), # SPACE
("\t", True), # TAB
("\u00a0", False), # NON-BREAKING SPACE
("\u0085", False), # NEXT LINE
]
for c, stripped in whitespace:
headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
if stripped:
expected = [("Transfer-Encoding", "chunked")]
else:
expected = [("Transfer-Encoding", "%schunked" % c)]
self.assertEqual(expected, list(headers.get_all()))

def test_optional_cr(self):
# Both CRLF and LF should be accepted as separators. CR should not be
# part of the data when followed by LF, but it is a normal char
Expand Down

0 comments on commit 8d721a8

Please sign in to comment.