Skip to content

Commit

Permalink
Merge pull request #1688 from dstufft/accept-encoding-identity
Browse files Browse the repository at this point in the history
Accept encoding identity
  • Loading branch information
dstufft committed Mar 27, 2014
2 parents 6e173eb + 190824b commit aca5182
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGES.txt
Expand Up @@ -19,6 +19,10 @@

* Added a virtualenv-specific configuration file. (:pull:`1364`)

* Send `Accept-Encoding: identity` when downloading files in an attempt to
convince some servers who double compress the downloaded file to stop doing
so. (:pull:`1688`)


**1.5.4 (2014-02-21)**

Expand Down
50 changes: 48 additions & 2 deletions pip/download.py
Expand Up @@ -447,7 +447,30 @@ def resp_read(chunk_size):
# Special case for urllib3.
try:
for chunk in resp.raw.stream(
chunk_size, decode_content=False):
chunk_size,
# We use decode_content=False here because we do
# want urllib3 to mess with the raw bytes we get
# from the server. If we decompress inside of
# urllib3 then we cannot verify the checksum
# because the checksum will be of the compressed
# file. This breakage will only occur if the
# server adds a Content-Encoding header, which
# depends on how the server was configured:
# - Some servers will notice that the file isn't a
# compressible file and will leave the file alone
# and with an empty Content-Encoding
# - Some servers will notice that the file is
# already compressed and will leave the file
# alone and will add a Content-Encoding: gzip
# header
# - Some servers won't notice anything at all and
# will take a file that's already been compressed
# and compress it again and set the
# Content-Encoding: gzip header
#
# By setting this not to decode automatically we
# hope to eliminate problems with the second case.
decode_content=False):
yield chunk
except IncompleteRead as e:
raise ChunkedEncodingError(e)
Expand Down Expand Up @@ -582,7 +605,30 @@ def unpack_http_url(link, location, download_cache, download_dir=None,
# let's download to a tmp dir
if not temp_location:
try:
resp = session.get(target_url, stream=True)
resp = session.get(
target_url,
# We use Accept-Encoding: identity here because requests
# defaults to accepting compressed responses. This breaks in
# a variety of ways depending on how the server is configured.
# - Some servers will notice that the file isn't a compressible
# file and will leave the file alone and with an empty
# Content-Encoding
# - Some servers will notice that the file is already
# compressed and will leave the file alone and will add a
# Content-Encoding: gzip header
# - Some servers won't notice anything at all and will take
# a file that's already been compressed and compress it again
# and set the Content-Encoding: gzip header
# By setting this to request only the identity encoding We're
# hoping to eliminate the third case. Hopefully there does not
# exist a server which when given a file will notice it is
# already compressed and that you're not asking for a
# compressed file and will then decompress it before sending
# because if that's the case I don't think it'll ever be
# possible to make this work.
headers={"Accept-Encoding": "identity"},
stream=True,
)
resp.raise_for_status()
except requests.HTTPError as exc:
logger.fatal("HTTP error %s while getting %s" %
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_download.py
Expand Up @@ -109,6 +109,7 @@ def test_unpack_http_url_bad_cache_checksum(mock_unpack_file):
# despite existence of cached file with bad hash, downloaded again
session.get.assert_called_once_with(
"http://www.example.com/somepackage.tgz",
headers={"Accept-Encoding": "identity"},
stream=True,
)
# cached file is replaced with newly downloaded file
Expand Down Expand Up @@ -151,6 +152,7 @@ def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file):
# despite existence of downloaded file with bad hash, downloaded again
session.get.assert_called_once_with(
'http://www.example.com/somepackage.tgz',
headers={"Accept-Encoding": "identity"},
stream=True,
)
# cached file is replaced with newly downloaded file
Expand Down

0 comments on commit aca5182

Please sign in to comment.