Skip to content

Commit

Permalink
GH-104114: Fix pathlib.WindowsPath.glob() use of literal pattern se…
Browse files Browse the repository at this point in the history
…gment case (GH-104116)

We now use `_WildcardSelector` to evaluate literal pattern segments, which
allows us to retrieve the real filesystem case.

This change is necessary in order to implement a *case_sensitive* argument
(see GH-81079) and a *follow_symlinks* argument (see GH-77609).
  • Loading branch information
barneygale committed May 3, 2023
1 parent 38dc3f2 commit da1980a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 41 deletions.
52 changes: 13 additions & 39 deletions Lib/pathlib.py
Expand Up @@ -54,11 +54,6 @@ def _ignore_error(exception):
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)


def _is_wildcard_pattern(pat):
# Whether this pattern needs actual matching using fnmatch, or can
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat

def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'

Expand All @@ -78,10 +73,8 @@ def _make_selector(pattern_parts, flavour):
cls = _ParentSelector
elif '**' in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif _is_wildcard_pattern(pat):
cls = _WildcardSelector
else:
cls = _PreciseSelector
cls = _WildcardSelector
return cls(pat, child_parts, flavour)


Expand All @@ -102,55 +95,36 @@ def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
if not is_dir(parent_path):
if not parent_path.is_dir():
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir)
return self._select_from(parent_path, scandir)


class _TerminatingSelector:

def _select_from(self, parent_path, is_dir, exists, scandir):
def _select_from(self, parent_path, scandir):
yield parent_path


class _ParentSelector(_Selector):
def __init__(self, name, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir):
def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..')
for p in self.successor._select_from(path, is_dir, exists, scandir):
for p in self.successor._select_from(path, scandir):
yield p


class _PreciseSelector(_Selector):

def __init__(self, name, child_parts, flavour):
self.name = name
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir):
try:
path = parent_path._make_child_relpath(self.name)
follow = is_dir(path) if self.dironly else exists(path, follow_symlinks=False)
if follow:
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError:
return


class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir):
def _select_from(self, parent_path, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
Expand All @@ -171,7 +145,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir):
name = entry.name
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir):
for p in self.successor._select_from(path, scandir):
yield p
except PermissionError:
return
Expand All @@ -182,7 +156,7 @@ class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)

def _iterate_directories(self, parent_path, is_dir, scandir):
def _iterate_directories(self, parent_path, scandir):
yield parent_path
try:
# We must close the scandir() object before proceeding to
Expand All @@ -198,18 +172,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
raise
if entry_is_dir and not entry.is_symlink():
path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, is_dir, scandir):
for p in self._iterate_directories(path, scandir):
yield p
except PermissionError:
return

def _select_from(self, parent_path, is_dir, exists, scandir):
def _select_from(self, parent_path, scandir):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir):
for starting_point in self._iterate_directories(parent_path, scandir):
for p in successor_select(starting_point, scandir):
if p not in yielded:
yield p
yielded.add(p)
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_pathlib.py
Expand Up @@ -3122,15 +3122,15 @@ def test_glob(self):
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})

def test_rglob(self):
P = self.cls
p = P(BASE, "dirC")
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})

def test_expanduser(self):
P = self.cls
Expand Down
@@ -0,0 +1,3 @@
Fix issue where :meth:`pathlib.Path.glob` returns paths using the case of
non-wildcard segments for corresponding path segments, rather than the real
filesystem case.

0 comments on commit da1980a

Please sign in to comment.