Skip to content

Commit

Permalink
bpo-39389: gzip: fix compression level metadata (GH-18077)
Browse files Browse the repository at this point in the history
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a
gzip member header should indicate whether the DEFLATE algorithm was
tuned for speed or compression ratio. Prior to this patch, archives
emitted by the `gzip` module always indicated maximum compression.
(cherry picked from commit eab3b3f)

Co-authored-by: William Chargin <wchargin@gmail.com>
  • Loading branch information
miss-islington and wchargin committed Jan 21, 2020
1 parent 060ad2f commit ab0d8e3
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
12 changes: 9 additions & 3 deletions Lib/gzip.py
Expand Up @@ -201,7 +201,7 @@ def __init__(self, filename=None, mode=None,
self.fileobj = fileobj

if self.mode == WRITE:
self._write_gzip_header()
self._write_gzip_header(compresslevel)

@property
def filename(self):
Expand All @@ -228,7 +228,7 @@ def _init_write(self, filename):
self.bufsize = 0
self.offset = 0 # Current file offset for seek(), tell(), etc

def _write_gzip_header(self):
def _write_gzip_header(self, compresslevel):
self.fileobj.write(b'\037\213') # magic header
self.fileobj.write(b'\010') # compression method
try:
Expand All @@ -249,7 +249,13 @@ def _write_gzip_header(self):
if mtime is None:
mtime = time.time()
write32u(self.fileobj, int(mtime))
self.fileobj.write(b'\002')
if compresslevel == _COMPRESS_LEVEL_BEST:
xfl = b'\002'
elif compresslevel == _COMPRESS_LEVEL_FAST:
xfl = b'\004'
else:
xfl = b'\000'
self.fileobj.write(xfl)
self.fileobj.write(b'\377')
if fname:
self.fileobj.write(fname + b'\000')
Expand Down
20 changes: 20 additions & 0 deletions Lib/test/test_gzip.py
Expand Up @@ -358,6 +358,26 @@ def test_metadata(self):
isizeBytes = fRead.read(4)
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))

def test_compresslevel_metadata(self):
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
# specifically, discussion of XFL in section 2.3.1
cases = [
('fast', 1, b'\x04'),
('best', 9, b'\x02'),
('tradeoff', 6, b'\x00'),
]
xflOffset = 8

for (name, level, expectedXflByte) in cases:
with self.subTest(name):
fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
with fWrite:
fWrite.write(data1)
with open(self.filename, 'rb') as fRead:
fRead.seek(xflOffset)
xflByte = fRead.read(1)
self.assertEqual(xflByte, expectedXflByte)

def test_with_open(self):
# GzipFile supports the context management protocol
with gzip.GzipFile(self.filename, "wb") as f:
Expand Down
@@ -0,0 +1,2 @@
Write accurate compression level metadata in :mod:`gzip` archives, rather
than always signaling maximum compression.

0 comments on commit ab0d8e3

Please sign in to comment.