Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-26253: Add compressionlevel to tarfile stream #2962

Merged
merged 5 commits into from
Jun 25, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Doc/library/tarfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ Some facts and figures:
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
opened in binary mode for *name*. It is supposed to be at position 0.

For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``,
``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument
For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the versionchanged directive.

Document the change in the What's New document.

Copy link
Contributor Author

@jarondl jarondl Aug 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versionchanged - Done

``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument
*compresslevel* (default ``9``) to specify the compression level of the file.

For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the
Expand Down Expand Up @@ -152,6 +152,9 @@ Some facts and figures:
.. versionchanged:: 3.6
The *name* parameter accepts a :term:`path-like object`.

.. versionchanged:: 3.12
The *compresslevel* keyword argument also works for streams.


.. class:: TarFile
:noindex:
Expand Down
22 changes: 13 additions & 9 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,8 @@ class _Stream:
_Stream is intended to be used only internally.
"""

def __init__(self, name, mode, comptype, fileobj, bufsize):
def __init__(self, name, mode, comptype, fileobj, bufsize,
compresslevel):
"""Construct a _Stream object.
"""
self._extfileobj = True
Expand Down Expand Up @@ -371,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
self._init_read_gz()
self.exception = zlib.error
else:
self._init_write_gz()
self._init_write_gz(compresslevel)

elif comptype == "bz2":
try:
Expand All @@ -383,7 +384,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
self.cmp = bz2.BZ2Decompressor()
self.exception = OSError
else:
self.cmp = bz2.BZ2Compressor()
self.cmp = bz2.BZ2Compressor(compresslevel)

elif comptype == "xz":
try:
Expand All @@ -410,13 +411,14 @@ def __del__(self):
if hasattr(self, "closed") and not self.closed:
self.close()

def _init_write_gz(self):
def _init_write_gz(self, compresslevel):
"""Initialize for writing with gzip compression.
"""
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
-self.zlib.MAX_WBITS,
self.zlib.DEF_MEM_LEVEL,
0)
self.cmp = self.zlib.compressobj(compresslevel,
self.zlib.DEFLATED,
-self.zlib.MAX_WBITS,
self.zlib.DEF_MEM_LEVEL,
0)
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
Expand Down Expand Up @@ -1649,7 +1651,9 @@ def not_compressed(comptype):
if filemode not in ("r", "w"):
raise ValueError("mode must be 'r' or 'w'")

stream = _Stream(name, filemode, comptype, fileobj, bufsize)
compresslevel = kwargs.pop("compresslevel", 9)
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
compresslevel)
try:
t = cls(name, filemode, stream, **kwargs)
except:
Expand Down
68 changes: 68 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1532,6 +1532,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
decompressor = lzma.LZMADecompressor if lzma else None

class _CompressedWriteTest(TarTest):
# This is not actually a standalone test.
# It does not inherit WriteTest because it only makes sense with gz,bz2
source = (b"And we move to Bristol where they have a special, " +
b"Very Silly candidate")

def _compressed_tar(self, compresslevel):
fobj = io.BytesIO()
with tarfile.open(tmpname, self.mode, fobj,
compresslevel=compresslevel) as tarfl:
tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source))
return fobj

def _test_bz2_header(self, compresslevel):
fobj = self._compressed_tar(compresslevel)
self.assertEqual(fobj.getvalue()[0:10],
b"BZh%d1AY&SY" % compresslevel)

def _test_gz_header(self, compresslevel):
fobj = self._compressed_tar(compresslevel)
self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08")

class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase):
prefix = "w:"
def test_compression_levels(self):
self._test_bz2_header(1)
self._test_bz2_header(5)
self._test_bz2_header(9)

class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest,
unittest.TestCase):
prefix = "w|"
def test_compression_levels(self):
self._test_bz2_header(1)
self._test_bz2_header(5)
self._test_bz2_header(9)

class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase):
prefix = "w:"
def test_compression_levels(self):
self._test_gz_header(1)
self._test_gz_header(5)
self._test_gz_header(9)

class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest,
unittest.TestCase):
prefix = "w|"
def test_compression_levels(self):
self._test_gz_header(1)
self._test_gz_header(5)
self._test_gz_header(9)

class CompressLevelRaises(unittest.TestCase):
def test_compresslevel_wrong_modes(self):
compresslevel = 5
fobj = io.BytesIO()
with self.assertRaises(TypeError):
tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel)

def test_wrong_compresslevels(self):
# BZ2 checks that the compresslevel is in [1,9]. gz does not
fobj = io.BytesIO()
with self.assertRaises(ValueError):
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0)
with self.assertRaises(ValueError):
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10)
with self.assertRaises(ValueError):
tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10)

class GNUWriteTest(unittest.TestCase):
# This testcase checks for correct creation of GNU Longname
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Allow adjustable compression level for tarfile streams in
:function:`tarfile.open`.
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved