Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 67 additions & 6 deletions Lib/test/test_zipfile/_path/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import stat
import sys
import time
import unittest
import zipfile
import zipfile._path
Expand Down Expand Up @@ -592,7 +593,11 @@ def test_getinfo_missing(self, alpharep):

def test_malformed_paths(self):
"""
Path should handle malformed paths.
Path should handle malformed paths gracefully.

Paths with leading slashes are not visible.

Paths with dots are treated like regular files.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
Expand All @@ -601,15 +606,71 @@ def test_malformed_paths(self):
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
assert list(map(str, root.iterdir())) == [
'one-slash.txt',
'two-slash.txt',
'parent.txt',
]
assert list(map(str, root.iterdir())) == ['../']
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'

def test_unsupported_names(self):
"""
Path segments with special characters are readable.

On some platforms or file systems, characters like
``:`` and ``?`` are not allowed, but they are valid
in the zip file.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("path?", b"content")
zf.writestr("V: NMS.flac", b"fLaC...")
zf.filename = ''
root = zipfile.Path(zf)
contents = root.iterdir()
assert next(contents).name == 'path?'
assert next(contents).name == 'V: NMS.flac'
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."

def test_backslash_not_separator(self):
"""
In a zip file, backslashes are not separators.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
zf.filename = ''
root = zipfile.Path(zf)
(first,) = root.iterdir()
assert not first.is_dir()
assert first.name == 'foo\\bar'

@pass_alpharep
def test_interface(self, alpharep):
from importlib.resources.abc import Traversable

zf = zipfile.Path(alpharep)
assert isinstance(zf, Traversable)


class DirtyZipInfo(zipfile.ZipInfo):
"""
Bypass name sanitization.
"""

def __init__(self, filename, *args, **kwargs):
super().__init__(filename, *args, **kwargs)
self.filename = filename

@classmethod
def for_name(cls, name, archive):
"""
Construct the same way that ZipFile.writestr does.

TODO: extract this functionality and re-use
"""
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
self.compress_type = archive.compression
self.compress_level = archive.compresslevel
if self.filename.endswith('/'): # pragma: no cover
self.external_attr = 0o40775 << 16 # drwxrwxr-x
self.external_attr |= 0x10 # MS-DOS directory flag
else:
self.external_attr = 0o600 << 16 # ?rw-------
return self
82 changes: 17 additions & 65 deletions Lib/zipfile/_path/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
A Path-like interface for zipfiles.

This codebase is shared between zipfile.Path in the stdlib
and zipp in PyPI. See
https://github.com/python/importlib_metadata/wiki/Development-Methodology
for more detail.
"""

import io
import posixpath
import zipfile
Expand Down Expand Up @@ -36,7 +45,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
posixpath.sep, generate all elements of that path.

>>> list(_ancestry('b/d'))
['b/d', 'b']
Expand All @@ -48,9 +57,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]

Multiple separators are treated like a single.

>>> list(_ancestry('//b//d///f//'))
['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)

Expand Down Expand Up @@ -85,69 +99,7 @@ def __setstate__(self, state):
super().__init__(*args, **kwargs)


class SanitizedNames:
"""
ZipFile mix-in to ensure names are sanitized.
"""

def namelist(self):
return list(map(self._sanitize, super().namelist()))

@staticmethod
def _sanitize(name):
r"""
Ensure a relative path with posix separators and no dot names.

Modeled after
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
but provides consistent cross-platform behavior.

>>> san = SanitizedNames._sanitize
>>> san('/foo/bar')
'foo/bar'
>>> san('//foo.txt')
'foo.txt'
>>> san('foo/.././bar.txt')
'foo/bar.txt'
>>> san('foo../.bar.txt')
'foo../.bar.txt'
>>> san('\\foo\\bar.txt')
'foo/bar.txt'
>>> san('D:\\foo.txt')
'D/foo.txt'
>>> san('\\\\server\\share\\file.txt')
'server/share/file.txt'
>>> san('\\\\?\\GLOBALROOT\\Volume3')
'?/GLOBALROOT/Volume3'
>>> san('\\\\.\\PhysicalDrive1\\root')
'PhysicalDrive1/root'

Retain any trailing slash.
>>> san('abc/')
'abc/'

Raises a ValueError if the result is empty.
>>> san('../..')
Traceback (most recent call last):
...
ValueError: Empty filename
"""

def allowed(part):
return part and part not in {'..', '.'}

# Remove the drive letter.
# Don't use ntpath.splitdrive, because that also strips UNC paths
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
clean = bare.replace('\\', '/')
parts = clean.split('/')
joined = '/'.join(filter(allowed, parts))
if not joined:
raise ValueError("Empty filename")
return joined + '/' * name.endswith('/')


class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
class CompleteDirs(InitializedState, zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
causing infinite loops (gh-122905) without breaking contents using
legitimate characters.