Skip to content

Commit

Permalink
Refactor stream_decode_gzip and _deflate into one function, stream_de…
Browse files Browse the repository at this point in the history
…compress.

stream_decompress will now iterate over the raw data if there is a problem with decompression
Remove gzip decoding from Response.content, as urllib3 was doing it anyway.
  • Loading branch information
gazpachoking committed Dec 31, 2011
1 parent f80984f commit 9c6ffc5
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 31 deletions.
17 changes: 5 additions & 12 deletions requests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects,
URLRequired, SSLError)
from .utils import (
get_encoding_from_headers, stream_decode_response_unicode, decode_gzip,
stream_decode_gzip, stream_decode_deflate, guess_filename, requote_path)
get_encoding_from_headers, stream_decode_response_unicode,
stream_decompress, guess_filename, requote_path)


REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved)
Expand Down Expand Up @@ -488,7 +488,7 @@ def send(self, anyway=False, prefetch=False):
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
decode_content=True,
retries=self.config.get('max_retries', 0),
timeout=self.timeout,
)
Expand Down Expand Up @@ -614,9 +614,9 @@ def generate():
gen = generate()

if 'gzip' in self.headers.get('content-encoding', ''):
gen = stream_decode_gzip(gen)
gen = stream_decompress(gen, mode='gzip')
elif 'deflate' in self.headers.get('content-encoding', ''):
gen = stream_decode_deflate(gen)
gen = stream_decompress(gen, mode='deflate')

if decode_unicode is None:
decode_unicode = self.config.get('decode_unicode')
Expand Down Expand Up @@ -675,13 +675,6 @@ def content(self):

content = self._content

# Decode GZip'd content.
if 'gzip' in self.headers.get('content-encoding', ''):
try:
content = decode_gzip(self._content)
except zlib.error:
pass

# Decode unicode content.
if self.config.get('decode_unicode'):

Expand Down
40 changes: 21 additions & 19 deletions requests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,36 +354,38 @@ def decode_gzip(content):
return zlib.decompress(content, 16 + zlib.MAX_WBITS)


def stream_decode_gzip(iterator):
"""Stream decodes a gzip-encoded iterator"""
try:
dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
for chunk in iterator:
rv = dec.decompress(chunk)
if rv:
yield rv
buf = dec.decompress('')
rv = buf + dec.flush()
if rv:
yield rv
except zlib.error:
pass
def stream_decompress(iterator, mode='gzip'):
"""
Stream decodes an iterator over compressed data
:param iterator: An iterator over compressed data
:param mode: 'gzip' or 'deflate'
:return: An iterator over decompressed data
"""

def stream_decode_deflate(iterator):
"""Stream decodes a deflate-encoded iterator"""
if mode not in ['gzip', 'deflate']:
raise ValueError('stream_decompress mode must be gzip or deflate')

zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS
dec = zlib.decompressobj(zlib_mode)
try:
dec = zlib.decompressobj(-zlib.MAX_WBITS)
for chunk in iterator:
rv = dec.decompress(chunk)
if rv:
yield rv
except zlib.error:
# If there was an error decompressing, just return the raw chunk
yield chunk
# Continue to return the rest of the raw data
for chunk in iterator:
yield chunk
else:
# Make sure everything has been returned from the decompression object
buf = dec.decompress('')
rv = buf + dec.flush()
if rv:
yield rv
except zlib.error:
pass


def requote_path(path):
"""Re-quote the given URL path component.
Expand Down

0 comments on commit 9c6ffc5

Please sign in to comment.