Skip to content

Commit

Permalink
bpo-36832: add zipfile.Path (#13153)
Browse files Browse the repository at this point in the history
* bpo-36832: add zipfile.Path

* bpo-36832: add documentation for zipfile.Path

* πŸ“œπŸ€– Added by blurb_it.

* Remove module reference from blurb.

* Sort the imports

* Update docstrings and docs per recommendations.

* Rely on test.support.temp_dir

* Signal that 'root' is the parameter.

* Correct spelling of 'mod'

* Convert docstring to comment for brevity.

* Fix more errors in the docs
  • Loading branch information
jaraco authored and warsaw committed May 8, 2019
1 parent 70b8054 commit b2758ff
Show file tree
Hide file tree
Showing 4 changed files with 320 additions and 8 deletions.
67 changes: 67 additions & 0 deletions Doc/library/zipfile.rst
Expand Up @@ -52,6 +52,15 @@ The module defines the following items:
:ref:`zipfile-objects` for constructor details.


.. class:: Path
:noindex:

A pathlib-compatible wrapper for zip files. See section
:ref:`path-objects` for details.

.. versionadded:: 3.8


.. class:: PyZipFile
:noindex:

Expand Down Expand Up @@ -456,6 +465,64 @@ The following data attributes are also available:
truncated.


.. _path-objects:

Path Objects
------------

.. class:: Path(root, at='')

Construct a Path object from a ``root`` zipfile (which may be a
:class:`ZipFile` instance or ``file`` suitable for passing to
the :class:`ZipFile` constructor).

``at`` specifies the location of this Path within the zipfile,
e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string,
indicating the root.

Path objects expose the following features of :mod:`pathlib.Path`
objects:

Path objects are traversable using the ``/`` operator.

.. attribute:: Path.name

The final path component.

.. method:: Path.open(*, **)

Invoke :meth:`ZipFile.open` on the current path. Accepts
the same arguments as :meth:`ZipFile.open`.

.. method:: Path.listdir()

Enumerate the children of the current directory.

.. method:: Path.is_dir()

Return ``True`` if the current context references a directory.

.. method:: Path.is_file()

Return ``True`` if the current context references a file.

.. method:: Path.exists()

Return ``True`` if the current context references a file or
directory in the zip file.

.. method:: Path.read_text(*, **)

Read the current file as unicode text. Positional and
keyword arguments are passed through to
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).

.. method:: Path.read_bytes()

Read the current file as bytes.


.. _pyzipfile-objects:

PyZipFile Objects
Expand Down
116 changes: 113 additions & 3 deletions Lib/test/test_zipfile.py
@@ -1,13 +1,15 @@
import contextlib
import importlib.util
import io
import os
import importlib.util
import pathlib
import posixpath
import time
import shutil
import struct
import zipfile
import tempfile
import time
import unittest
import zipfile


from tempfile import TemporaryFile
Expand Down Expand Up @@ -2392,5 +2394,113 @@ def test_extract_command(self):
with open(path, 'rb') as f:
self.assertEqual(f.read(), zf.read(zi))


# Poor man's technique to consume a (smallish) iterable.
consume = tuple


def add_dirs(zipfile):
"""
Given a writable zipfile, inject directory entries for
any directories implied by the presence of children.
"""
names = zipfile.namelist()
consume(
zipfile.writestr(name + "/", b"")
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
)
return zipfile


def build_abcde_files():
"""
Create a zip file with this structure:
.
β”œβ”€β”€ a.txt
└── b
β”œβ”€β”€ c.txt
└── d
└── e.txt
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("a.txt", b"content of a")
zf.writestr("b/c.txt", b"content of c")
zf.writestr("b/d/e.txt", b"content of e")
zf.filename = "abcde.zip"
return zf


class TestPath(unittest.TestCase):
def setUp(self):
self.fixtures = contextlib.ExitStack()
self.addCleanup(self.fixtures.close)

def zipfile_abcde(self):
with self.subTest():
yield build_abcde_files()
with self.subTest():
yield add_dirs(build_abcde_files())

def zipfile_ondisk(self):
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
for zipfile_abcde in self.zipfile_abcde():
buffer = zipfile_abcde.fp
zipfile_abcde.close()
path = tmpdir / zipfile_abcde.filename
with path.open("wb") as strm:
strm.write(buffer.getvalue())
yield path

def test_iterdir_istype(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
assert root.is_dir()
a, b = root.iterdir()
assert a.is_file()
assert b.is_dir()
c, d = b.iterdir()
assert c.is_file()
e, = d.iterdir()
assert e.is_file()

def test_open(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
with a.open() as strm:
data = strm.read()
assert data == b"content of a"

def test_read(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
assert a.read_text() == "content of a"
assert a.read_bytes() == b"content of a"

def test_traverse_truediv(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a = root / "a"
assert a.is_file()
e = root / "b" / "d" / "e.txt"
assert e.read_text() == "content of e"

def test_pathlike_construction(self):
"""
zipfile.Path should be constructable from a path-like object
"""
for zipfile_ondisk in self.zipfile_ondisk():
pathlike = pathlib.Path(str(zipfile_ondisk))
zipfile.Path(pathlike)

def test_traverse_pathlike(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
root / pathlib.Path("a")

if __name__ == "__main__":
unittest.main()
144 changes: 139 additions & 5 deletions Lib/zipfile.py
Expand Up @@ -3,16 +3,18 @@
XXX references to utf-8 need further investigation.
"""
import binascii
import functools
import importlib.util
import io
import os
import importlib.util
import sys
import time
import stat
import posixpath
import shutil
import stat
import struct
import binascii
import sys
import threading
import time

try:
import zlib # We may need its compression method
Expand Down Expand Up @@ -2102,6 +2104,138 @@ def _compile(file, optimize=-1):
return (fname, archivename)


class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
β”œβ”€β”€ a.txt
└── b
β”œβ”€β”€ c.txt
└── d
└── e.txt
>>> data = io.BytesIO()
>>> zf = ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('abcde.zip', 'a.txt')
>>> b
Path('abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coersion to string:
>>> str(c)
'abcde.zip/b/c.txt'
"""

__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"

def __init__(self, root, at=""):
self.root = root if isinstance(root, ZipFile) else ZipFile(root)
self.at = at

@property
def open(self):
return functools.partial(self.root.open, self.at)

@property
def name(self):
return posixpath.basename(self.at.rstrip("/"))

def read_text(self, *args, **kwargs):
with self.open() as strm:
return io.TextIOWrapper(strm, *args, **kwargs).read()

def read_bytes(self):
with self.open() as strm:
return strm.read()

def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")

def _next(self, at):
return Path(self.root, at)

def is_dir(self):
return not self.at or self.at.endswith("/")

def is_file(self):
return not self.is_dir()

def exists(self):
return self.at in self._names()

def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self._names())
return filter(self._is_child, subs)

def __str__(self):
return posixpath.join(self.root.filename, self.at)

def __repr__(self):
return self.__repr.format(self=self)

def __truediv__(self, add):
next = posixpath.join(self.at, add)
next_dir = posixpath.join(self.at, add, "")
names = self._names()
return self._next(next_dir if next not in names and next_dir in names else next)

@staticmethod
def _add_implied_dirs(names):
return names + [
name + "/"
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
]

def _names(self):
return self._add_implied_dirs(self.root.namelist())


def main(args=None):
import argparse

Expand Down
@@ -0,0 +1 @@
Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.

0 comments on commit b2758ff

Please sign in to comment.