Skip to content

Commit

Permalink
py3: Be able to read and write non-ASCII headers
Browse files Browse the repository at this point in the history
Apparently Python's stdlib got more picky about what a header should
look like. As a result, if an account, container, or object had a
non-ASCII metadata name (values were fine), the proxy-server wouldn't
parse all of the headers. See https://bugs.python.org/issue37093 for
more information.

This presented several problems:
- Since the non-ASCII header aborts parsing, we may lose important
  HTTP-level information like Content-Length or Transfer-Encoding.
- Since the offending header wouldn't get parsed, the client wouldn't
  even know what the problem was.
- Even if the client knew what the bad header was, it would have no way
  to clear it, as the server uses the same logic to parse incoming
  requests.

So, hack in our own header parsing if we detect that parsing was
aborted. Note that we also have to mangle bufferedhttp's putheader so we
can get non-ASCII headers to the backend servers.

Now, we can run the test_unicode_metadata tests in
test/functional/test_account.py and test/functional/test_container.py
under py2 against services running under py3.

Change-Id: I0f03c211f35a9a49e047a5718a9907b515ca88d7
  • Loading branch information
tipabu authored and matthewoliver committed Jul 3, 2019
1 parent bf3e254 commit 76fde89
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
22 changes: 22 additions & 0 deletions swift/common/bufferedhttp.py
Expand Up @@ -83,6 +83,23 @@ def __init__(self, sock, debuglevel=0, strict=0,
self.will_close = _UNKNOWN # conn will close at end of response
self._readline_buffer = b''

if not six.PY2:
def begin(self):
HTTPResponse.begin(self)
header_payload = self.headers.get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# How can/should we handle a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
self.headers.add_header(header, value)

def expect_response(self):
if self.fp:
self.fp.close()
Expand Down Expand Up @@ -198,6 +215,11 @@ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
return HTTPConnection.putrequest(self, method, url, skip_host,
skip_accept_encoding)

def putheader(self, header, value):
if not isinstance(header, bytes):
header = header.encode('latin-1')
HTTPConnection.putheader(self, header, value)

def getexpect(self):
kwargs = {'method': self._method}
if hasattr(self, 'strict'):
Expand Down
26 changes: 26 additions & 0 deletions swift/common/wsgi.py
Expand Up @@ -464,6 +464,32 @@ def parse_request(self):
# else, mangled protocol, most likely; let base class deal with it
return wsgi.HttpProtocol.parse_request(self)

if not six.PY2:
def get_environ(self, *args, **kwargs):
environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs)
header_payload = self.headers.get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
headers_raw = list(environ['headers_raw'])
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# Should we 400 a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
headers_raw.append((header, value))
wsgi_key = 'HTTP_' + header.replace('-', '_').encode(
'latin1').upper().decode('latin1')
if wsgi_key in ('HTTP_CONTENT_LENGTH',
'HTTP_CONTENT_TYPE'):
wsgi_key = wsgi_key[5:]
environ[wsgi_key] = value
environ['headers_raw'] = tuple(headers_raw)
return environ


class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
"""
Expand Down

0 comments on commit 76fde89

Please sign in to comment.