Skip to content

Commit

Permalink
check and test for headers containing return characters or leading wh…
Browse files Browse the repository at this point in the history
…itespace
  • Loading branch information
nateprewitt committed Jul 2, 2016
1 parent 92fe51c commit 2669ab7
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 7 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Expand Up @@ -166,3 +166,4 @@ Patches and Suggestions
- Dmitry Dygalo (`@Stranger6667 <https://github.com/Stranger6667>`_)
- piotrjurkiewicz
- Jesse Shapiro <jesse@jesseshapiro.net> (`@haikuginger <https://github.com/haikuginger>`_)
- Nate Prewitt <nate.prewitt@gmail.com> (`@nateprewitt <https://github.com/nateprewitt>`_)
6 changes: 5 additions & 1 deletion requests/exceptions.py
Expand Up @@ -80,7 +80,11 @@ class InvalidSchema(RequestException, ValueError):


class InvalidURL(RequestException, ValueError):
""" The URL provided was somehow invalid. """
"""The URL provided was somehow invalid."""


class InvalidHeader(RequestException, ValueError):
"""The header value provided was somehow invalid."""


class ChunkedEncodingError(RequestException):
Expand Down
12 changes: 8 additions & 4 deletions requests/models.py
Expand Up @@ -27,7 +27,8 @@
from .utils import (
guess_filename, get_auth_from_url, requote_uri,
stream_decode_response_unicode, to_key_val_list, parse_header_links,
iter_slices, guess_json_utf, super_len, to_native_string)
iter_slices, guess_json_utf, super_len, to_native_string,
check_header_validity)
from .compat import (
cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,
is_py2, chardet, builtin_str, basestring)
Expand Down Expand Up @@ -403,10 +404,13 @@ def prepare_url(self, url, params):
def prepare_headers(self, headers):
"""Prepares the given HTTP headers."""

self.headers = CaseInsensitiveDict()
if headers:
self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items())
else:
self.headers = CaseInsensitiveDict()
for header in headers.items():
# Raise exception on invalid header value.
check_header_validity(header)
name, value = header
self.headers[to_native_string(name)] = value

def prepare_body(self, data, files, json=None):
"""Prepares the given HTTP body data."""
Expand Down
20 changes: 19 additions & 1 deletion requests/utils.py
Expand Up @@ -27,7 +27,7 @@
basestring)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
from .exceptions import InvalidURL, FileModeWarning
from .exceptions import InvalidURL, InvalidHeader, FileModeWarning

_hush_pyflakes = (RequestsCookieJar,)

Expand Down Expand Up @@ -732,6 +732,24 @@ def to_native_string(string, encoding='ascii'):

return out

# Moved outside of function to avoid recompile every call
_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')

def check_header_validity(header):

This comment has been minimized.

Copy link
@jeffherman

jeffherman Aug 9, 2016

Is there a cross-reference link to the specification or RFC for prohibiting non-string values? According to RFC7230 §3.2, the BNF for header field values is:

HTAB           =  %x09 ; horizontal tab
SP             =  %x20
CR             =  %x0D ; carriage return
LF             =  %x0A ; linefeed
CRLF           =  CR LF ; Internet standard newline
VCHAR          =  %x21-7E ; visible (printing) characters

obs-fold       = CRLF 1*( SP / HTAB )
obs-text       = %x80-FF

field-vchar    = VCHAR / obs-text

field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
               = (VCHAR / obs-text) [ 1*( SP / HTAB ) (VCHAR / obs-text) ]
               = (VCHAR / %x80-FF) [ 1*( SP / HTAB ) (VCHAR / %x80-FF) ]

field-value    = *( field-content / obs-fold )
               = *( ((VCHAR / %x80-FF) [ 1*( SP / HTAB ) (VCHAR / %x80-FF) ]) / (CRLF 1*( SP / HTAB )) )

This comment has been minimized.

Copy link
@Lukasa

Lukasa Aug 9, 2016

Member

This question doesn't make any sense. The RFC doesn't forbid non-string values in any literal sense because it's prohibiting something that cannot be done. Header fields are allowed to contain visible characters, the high bytes, and whitespace. Definitionally, then, they are strings (or at least stringish). We cannot, for example, send integers in big-endian form.

What Requests is refusing to do is guess how you want header field values to be rendered. We are now requiring that you pass us a string, which is something that we then do not have to do any guessing with.

This comment has been minimized.

Copy link
@sthompson0

sthompson0 Aug 9, 2016

So this means requests now expects callers to manually encode header fields that are non-string values?

This is breaking some service interface code I'm using and it will take some time to go through and update all of it.

It seems like a better approach would be to auto encode values the old way (or do whatever was being done before) and warn users that a breaking change to requests is coming in the future to give users time to update their code.

What was the old encoding method used?

This comment has been minimized.

Copy link
@sigmavirus24

sigmavirus24 Aug 9, 2016

Contributor

@sthompson0 the documentation has always stated that we expect string values for headers. We didn't have an old encoding method. It was always whatever httplib felt like encoding for whatever version of Python you used. I'm sorry that enforcing our API has broken your service interface code, but we didn't write that code and are under no obligation to fix this for you at the wrong level.

"""Verifies that header value doesn't contain leading whitespace or
return characters. This prevents unintended header injection.
:param header: tuple, in the format (name, value).
"""
name, value = header

if isinstance(value, bytes):
pat = _CLEAN_HEADER_REGEX_BYTE
else:
pat = _CLEAN_HEADER_REGEX_STR
if not pat.match(value):
raise InvalidHeader("Invalid return character or leading space in header: %s" % name)

def urldefragauth(url):
"""
Expand Down
43 changes: 42 additions & 1 deletion tests/test_requests.py
Expand Up @@ -23,7 +23,7 @@
from requests.exceptions import (
ConnectionError, ConnectTimeout, InvalidSchema, InvalidURL,
MissingSchema, ReadTimeout, Timeout, RetryError, TooManyRedirects,
ProxyError)
ProxyError, InvalidHeader)
from requests.models import PreparedRequest
from requests.structures import CaseInsensitiveDict
from requests.sessions import SessionRedirectMixin
Expand Down Expand Up @@ -1128,6 +1128,47 @@ def test_header_keys_are_native(self, httpbin):
assert 'unicode' in p.headers.keys()
assert 'byte' in p.headers.keys()

def test_header_validation(self,httpbin):
"""Ensure prepare_headers regex isn't flagging valid header contents."""
headers_ok = {'foo': 'bar baz qux',
'bar': '1',
'baz': '',
'qux': str.encode(u'fbbq')}
r = requests.get(httpbin('get'), headers=headers_ok)
assert r.request.headers['foo'] == headers_ok['foo']

def test_header_no_return_chars(self, httpbin):
"""Ensure that a header containing return character sequences raise an
exception. Otherwise, multiple headers are created from single string.
"""
headers_ret = {'foo': 'bar\r\nbaz: qux'}
headers_lf = {'foo': 'bar\nbaz: qux'}
headers_cr = {'foo': 'bar\rbaz: qux'}

# Test for newline
with pytest.raises(InvalidHeader):
r = requests.get(httpbin('get'), headers=headers_ret)
# Test for line feed
with pytest.raises(InvalidHeader):
r = requests.get(httpbin('get'), headers=headers_lf)
# Test for carriage return
with pytest.raises(InvalidHeader):
r = requests.get(httpbin('get'), headers=headers_cr)

def test_header_no_leading_space(self, httpbin):
"""Ensure headers containing leading whitespace raise
InvalidHeader Error before sending.
"""
headers_space = {'foo': ' bar'}
headers_tab = {'foo': ' bar'}

# Test for whitespace
with pytest.raises(InvalidHeader):
r = requests.get(httpbin('get'), headers=headers_space)
# Test for tab
with pytest.raises(InvalidHeader):
r = requests.get(httpbin('get'), headers=headers_tab)

@pytest.mark.parametrize('files', ('foo', b'foo', bytearray(b'foo')))
def test_can_send_objects_with_files(self, httpbin, files):
data = {'a': 'this is a string'}
Expand Down

0 comments on commit 2669ab7

Please sign in to comment.